Commit 197a9d976d50c92f867dbfa750207c856f6f3cd2

Authored by gibran
1 parent 2c1cf7d9

Código copiado da classe WebAgent.java para garantir o acesso

aos links da página por meio do cliente da API Jakarta Commons VErsão
3.1 e retirada do método para formatar a "url" sem arquivo da página,
ficando uma chamada para um método externo.
src/main/java/br/com/checker/emag/core/ContentEvaluation.java
1 1 package br.com.checker.emag.core;
2 2  
3 3 import java.io.IOException;
4   -import java.net.HttpURLConnection;
  4 +import java.io.UnsupportedEncodingException;
5 5 import java.net.MalformedURLException;
6 6 import java.net.URL;
  7 +import java.net.URLEncoder;
7 8 import java.util.ArrayList;
8 9 import java.util.Collections;
9 10 import java.util.Comparator;
... ... @@ -15,14 +16,19 @@ import java.util.regex.Pattern;
15 16  
16 17 import net.htmlparser.jericho.Attribute;
17 18 import net.htmlparser.jericho.Element;
18   -import net.htmlparser.jericho.Segment;
19 19 import net.htmlparser.jericho.Source;
20 20  
  21 +import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
  22 +import org.apache.commons.httpclient.HttpClient;
  23 +import org.apache.commons.httpclient.HttpMethod;
  24 +import org.apache.commons.httpclient.methods.GetMethod;
  25 +import org.apache.commons.httpclient.params.HttpMethodParams;
21 26 import org.apache.commons.lang3.StringUtils;
22 27  
23 28 import br.com.checker.emag.Occurrence;
24 29 import br.com.checker.emag.OccurrenceClassification;
25 30 import br.com.checker.emag.core.SpecificRecommendation.ContentRecommendation;
  31 +import br.com.checker.emag.util.UrlSemArquiNoFinal;
26 32  
27 33 public class ContentEvaluation extends Evaluation {
28 34  
... ... @@ -49,7 +55,6 @@ public class ContentEvaluation extends Evaluation {
49 55 public SpecificRecommendation recommendation17() {
50 56 return new EvaluationRecommendation17();
51 57 }
52   -
53 58 public SpecificRecommendation recommendation18() {
54 59 return new EvaluationRecommendation18();
55 60 }
... ... @@ -300,13 +305,12 @@ public class ContentEvaluation extends Evaluation {
300 305  
301 306 String titulo_site = "";
302 307  
303   - if (titulo != null) {
304   - titulo_site = titulo.getContent().getTextExtractor().toString();
  308 + if (titulo != null) {
  309 +
  310 + titulo_site = titulo.getContent().getTextExtractor().toString();
305 311 }
306   -
  312 +
307 313 return titulo_site;
308   -
309   -
310 314 }
311 315  
312 316 private List<Occurrence> checkRecommendation19() {
... ... @@ -321,12 +325,7 @@ public class ContentEvaluation extends Evaluation {
321 325 // if(head != null) {
322 326 Element title = getDocument().getFirstElement("title");
323 327 if (title == null) {
324   - occurrences
325   - .add(this
326   - .buildOccurrence(
327   - "3.3",
328   - true,
329   - " Observa&ccedil;&atilde;o – Sem Fonte (N&atilde;o existe t&iacute;tulo na p&aacute;gina)",
  328 + occurrences.add(this.buildOccurrence("3.3",true," Observa&ccedil;&atilde;o – Sem Fonte (N&atilde;o existe t&iacute;tulo na p&aacute;gina)",
330 329 getDocument().getFirstElement(), "1"));
331 330 // occurrences.add(new Occurrence("3.3", true,
332 331 // "Sem fonte (n�o existe t�tulo na p�gina)",OccurrenceClassification.CONTENT_INFORMATION,"1"));
... ... @@ -349,9 +348,13 @@ public class ContentEvaluation extends Evaluation {
349 348  
350 349 private List<Occurrence> checkRecommendation21() {
351 350 List<Occurrence> occurrences = new ArrayList<Occurrence>();
  351 + UrlSemArquiNoFinal objetoUrlSemArquiNoFinal = new UrlSemArquiNoFinal();
  352 +
  353 + String urlSemArquiNoFinal = objetoUrlSemArquiNoFinal.urlSemArquivoNoFinal(getUrl());
352 354  
353 355 Element LinkComImg;
354 356  
  357 +
355 358 for (Element link : getDocument().getAllElements("a")) {
356 359 String href = link.getAttributeValue("href");
357 360 String title = link.getAttributeValue("title");
... ... @@ -431,32 +434,47 @@ public class ContentEvaluation extends Evaluation {
431 434 if (link != null && hasLongContent(link))
432 435 occurrences.add(this.buildOccurrence("3.5", false,
433 436 link.toString(), link, "13"));//"9"));
434   -
435   - if (link != null && isLinkUnavailable(link, getUrl()))
436   - occurrences.add(this.buildOccurrence("3.5", true,
437   - link.toString(), link, "14"));//"10"));
438   -
  437 +
  438 + String retorno = "";
  439 + if (link != null)
  440 + retorno = isLinkUnavailable(link, urlSemArquiNoFinal);
  441 + if(retorno.equalsIgnoreCase("erro"))
  442 + {
  443 + occurrences.add(this.buildOccurrence("3.5", true,
  444 + link.toString(), link, "14"));//"10"));
  445 + }else if(retorno.equalsIgnoreCase("aviso"))
  446 + {
  447 + occurrences.add(this.buildOccurrence("3.5", false,
  448 + link.toString(), link, "15"));//"10"));
  449 + }
  450 +
439 451 }
440 452 return occurrences;
441 453 }
442   -
443   - private boolean isLinkUnavailable(Element link, String url) {
  454 +
  455 + private String isLinkUnavailable(Element link, String url) {
444 456  
445 457 String href = link.getAttributeValue("href");
446   -
  458 +
447 459 if (href != null && href.startsWith("www"))
  460 + {
448 461 href = "http://" + href;
  462 + }
449 463  
450 464 if (href != null && !href.startsWith("http") && url != null)
  465 + {
451 466 href = url + "/" + link.getAttributeValue("href");
452   -
  467 +
  468 + }
  469 +
453 470 if (link.getAttributeValue("href") != null
454 471 && !link.getAttributeValue("href").toString().trim().equalsIgnoreCase("")
455 472 && !link.getAttributeValue("href").substring(0, 1).equals("#")
456 473 && !link.getAttributeValue("href").substring(0, 1).equals("/")
457 474 && !link.getAttributeValue("href").contains("javascript")
458 475 && !link.getAttributeValue("href").contains("@")) {
459   -
  476 +
  477 +
460 478  
461 479 /*
462 480 * int[] codErro = { 400, 401, 402, 403, 404, 405, 406, 407, 408,
... ... @@ -472,6 +490,9 @@ public class ContentEvaluation extends Evaluation {
472 490 // if(!IsMatch(href,regex))
473 491 // return true;
474 492  
  493 + HttpMethod metodoRequisicaoGET = null;
  494 + HttpClient clienteHTTPJakartaCommons;
  495 + URL UrlConvertida;
475 496 try {
476 497 String[] test = href.split("\\../");
477 498 String newurl = "";
... ... @@ -479,19 +500,41 @@ public class ContentEvaluation extends Evaluation {
479 500 newurl = newurl + tes.trim();
480 501  
481 502 newurl = newurl.replace(" ", "%20");
482   -
483   - URL u = new URL(newurl);
484   -
485   - HttpURLConnection huc = (HttpURLConnection) u.openConnection();
486   - huc.setRequestMethod("GET");
487   - // huc.setRequestMethod("HEAD");
488   - huc.connect();
489   - codResponse = huc.getResponseCode();
490   - huc.disconnect();
491   - } catch (MalformedURLException e) {
492   - return true;
  503 +
  504 + //UrlConvertida = new URL(newurl);
  505 +
  506 +
  507 + System.out.println(newurl);
  508 + //Código copiado da classe WebAgent.java para garantir o acesso
  509 + //aos links da página por meio do cliente da API Jakarta Commons VErsão 3.1
  510 + clienteHTTPJakartaCommons = new HttpClient();
  511 + clienteHTTPJakartaCommons.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3,false));
  512 + clienteHTTPJakartaCommons.getParams().setParameter("http.protocol.allow-circular-redirects", true);
  513 +
  514 + metodoRequisicaoGET = new GetMethod(URLEncoder.encode(newurl, "UTF-8"));
  515 + metodoRequisicaoGET.setRequestHeader("http.agent", "Jakarta Commons-HttpClient/3.1");
  516 + metodoRequisicaoGET.setFollowRedirects(true);
  517 +
  518 + codResponse = clienteHTTPJakartaCommons.executeMethod(metodoRequisicaoGET);
  519 +
  520 + } catch (MalformedURLException e) {
  521 + e.printStackTrace();
  522 + return "aviso";
  523 +
493 524 } catch (IOException e) {
494   - return true;
  525 + e.printStackTrace();
  526 + return "aviso";
  527 +
  528 + } catch (IllegalArgumentException e) {
  529 + e.printStackTrace();
  530 + return "aviso";
  531 +
  532 + } finally {
  533 +
  534 + if (metodoRequisicaoGET != null){
  535 + metodoRequisicaoGET.releaseConnection();
  536 + }
  537 +
495 538 }
496 539  
497 540 /*
... ... @@ -500,13 +543,13 @@ public class ContentEvaluation extends Evaluation {
500 543 */
501 544 for (int cod : codErro) {
502 545 if (codResponse == cod) {
503   - return true;
  546 + return "erro";
504 547 }
505 548  
506 549 }
507 550 }
508 551  
509   - return false;
  552 + return "false";
510 553 }
511 554  
512 555 private static boolean IsMatch(String s, String pattern) {
... ... @@ -551,7 +594,7 @@ public class ContentEvaluation extends Evaluation {
551 594 return temConteudo;
552 595  
553 596 /* quantidadeElementosDentroLink = link.getAllElements().size();
554   - //Verifica se há elementos como img, span e outros dentro do link. o valor 1 já está atribuído a tag <a> (link avaliado)
  597 + //Verifica se h� elementos como img, span e outros dentro do link. o valor 1 j� est� atribu�do a tag <a> (link avaliado)
555 598 if(quantidadeElementosDentroLink > 1)
556 599 {
557 600 temConteudo = true;
... ... @@ -934,89 +977,47 @@ public class ContentEvaluation extends Evaluation {
934 977  
935 978 private List<Occurrence> checkRecommendation26() {
936 979 List<Occurrence> occurrences = new ArrayList<Occurrence>();
937   -
938   - /*
939   - * for (Element table : getDocument().getAllElements("table")) {
940   - * occurrences.add(buildOccurrence("3.10", false, table.toString(),
941   - * table, "1")); }
942   - */
943   -
944   - /*
945   - * for (Element table : getDocument().getAllElements("table")) { for
946   - * (Element caption : table.getAllElements("caption")) { if (caption ==
947   - * null || caption.isEmpty()) occurrences.add(buildOccurrence("3.10",
948   - * true, table .getStartTag().toString(), table, "1")); } }
949   - */
950   -
  980 + boolean temAssociacao = false;
  981 +
951 982 for (Element table : getDocument().getAllElements("table")) {
952 983 // Attribute summary = table.getAttributes().get("summary");
953 984  
954   - boolean THusaScope = false;
955   - boolean THusaId = false;
956   - boolean THusaHeaders = false;
957   - boolean TDusaScope = false;
958   - boolean TDusaId = false;
959   - boolean TDusaHeaders = false;
960   - boolean usaThead = false;
961   - boolean usaTfoot = false;
962   - boolean usaTbody = false;
963   -
964   - /*
965   - * if (summary == null || summary.getValue().equals(""))
966   - * occurrences.add(buildOccurrence("3.10", true, table
967   - * .getStartTag().toString(), table, "1"));
968   - */
969   -
970   - for (Element thead : table.getAllElements("thead")) {
971   - if (thead != null)
972   - usaThead = true;
  985 + temAssociacao = false;
  986 +
  987 + if(table.getAllElements("thead").size() > 0 && table.getAllElements("tbody").size() > 0)
  988 + {
  989 + temAssociacao = true;
973 990 }
974   -
975   - for (Element tfoot : table.getAllElements("tfoot")) {
976   - if (tfoot != null)
977   - usaTfoot = true;
  991 + else
  992 + {
  993 + for (Element coluna : table.getAllElements("td")) {
  994 + if(coluna.getAttributes().get("id") != null || coluna.getAttributes().get("headers") != null ||
  995 + coluna.getAttributes().get("scope") != null || coluna.getAttributes().get("axis") != null)
  996 + {
  997 + temAssociacao = true;
  998 + }
  999 +
  1000 + }
  1001 + if(!temAssociacao)
  1002 + {
  1003 + for (Element coluna : table.getAllElements("th")) {
  1004 + if(coluna.getAttributes().get("id") != null || coluna.getAttributes().get("headers") != null ||
  1005 + coluna.getAttributes().get("scope") != null || coluna.getAttributes().get("axis") != null)
  1006 + {
  1007 + temAssociacao = true;
  1008 + }
  1009 +
  1010 + }
  1011 + }
978 1012 }
979   -
980   - for (Element tbody : table.getAllElements("tbody")) {
981   - if (tbody != null)
982   - usaTbody = true;
  1013 +
  1014 + if(!temAssociacao)
  1015 + {
  1016 + occurrences.add(this.buildOccurrence("3.10", true, table.getStartTag().toString(), table, "1"));
983 1017 }
984   -
985   - /*
986   - * if(!usaThead && !usaTbody && !usaTfoot){
987   - *
988   - * //occurrences.add(this.buildOccurrence("3.10", true,
989   - * table.getAllStartTags("table").get(0).toString(), table, "1"));
990   - *
991   - * for (Element th : table.getAllElements("th")) { Attribute scope =
992   - * th.getAttributes().get("scope"); Attribute headers =
993   - * th.getAttributes().get("headers"); Attribute id =
994   - * th.getAttributes().get("id"); if (scope != null &&
995   - * !scope.getValue().equals("")) { THusaScope = true; } else if
996   - * (headers != null && !headers.getValue().equals("")) {
997   - * THusaHeaders = true; } else if (id != null &&
998   - * !id.getValue().equals("")) { THusaId = true; }
999   - *
1000   - * if(!THusaScope && !THusaHeaders && !THusaId){
1001   - * occurrences.add(this.buildOccurrence("3.10", true, th.toString(),
1002   - * th, "1")); } }
1003   - *
1004   - * for (Element td : table.getAllElements("td")) { Attribute tdscope
1005   - * = td.getAttributes().get("scope"); Attribute tdheaders =
1006   - * td.getAttributes().get("headers"); Attribute tdid =
1007   - * td.getAttributes().get("id"); if (tdscope != null &&
1008   - * !tdscope.getValue().equals("")) { TDusaScope = true; } else if
1009   - * (tdheaders != null && !tdheaders.getValue().equals("")) {
1010   - * TDusaHeaders = true; } else if (tdid != null &&
1011   - * !tdid.getValue().equals("")) { TDusaId = true; }
1012   - *
1013   - * if(!TDusaScope && !TDusaHeaders && !TDusaId){
1014   - * occurrences.add(this.buildOccurrence("3.10", true, td.toString(),
1015   - * td, "1")); } }
1016   - *
1017   - * }
1018   - */
  1018 +
1019 1019 }
  1020 +
1020 1021 // Sorting
1021 1022 Collections.sort(occurrences, new Comparator<Occurrence>() {
1022 1023 public int compare(Occurrence occurrence1, Occurrence occurrence2) {
... ...