Commit 197a9d976d50c92f867dbfa750207c856f6f3cd2
1 parent
2c1cf7d9
Exists in
master
and in
1 other branch
Código copiado da classe WebAgent.java para garantir o acesso
aos links da página por meio do cliente da API Jakarta Commons VErsão 3.1 e retirada do método para formatar a "url" sem arquivo da página, ficando uma chamada para um método externo.
Showing
1 changed file
with
116 additions
and
115 deletions
Show diff stats
src/main/java/br/com/checker/emag/core/ContentEvaluation.java
1 | 1 | package br.com.checker.emag.core; |
2 | 2 | |
3 | 3 | import java.io.IOException; |
4 | -import java.net.HttpURLConnection; | |
4 | +import java.io.UnsupportedEncodingException; | |
5 | 5 | import java.net.MalformedURLException; |
6 | 6 | import java.net.URL; |
7 | +import java.net.URLEncoder; | |
7 | 8 | import java.util.ArrayList; |
8 | 9 | import java.util.Collections; |
9 | 10 | import java.util.Comparator; |
... | ... | @@ -15,14 +16,19 @@ import java.util.regex.Pattern; |
15 | 16 | |
16 | 17 | import net.htmlparser.jericho.Attribute; |
17 | 18 | import net.htmlparser.jericho.Element; |
18 | -import net.htmlparser.jericho.Segment; | |
19 | 19 | import net.htmlparser.jericho.Source; |
20 | 20 | |
21 | +import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; | |
22 | +import org.apache.commons.httpclient.HttpClient; | |
23 | +import org.apache.commons.httpclient.HttpMethod; | |
24 | +import org.apache.commons.httpclient.methods.GetMethod; | |
25 | +import org.apache.commons.httpclient.params.HttpMethodParams; | |
21 | 26 | import org.apache.commons.lang3.StringUtils; |
22 | 27 | |
23 | 28 | import br.com.checker.emag.Occurrence; |
24 | 29 | import br.com.checker.emag.OccurrenceClassification; |
25 | 30 | import br.com.checker.emag.core.SpecificRecommendation.ContentRecommendation; |
31 | +import br.com.checker.emag.util.UrlSemArquiNoFinal; | |
26 | 32 | |
27 | 33 | public class ContentEvaluation extends Evaluation { |
28 | 34 | |
... | ... | @@ -49,7 +55,6 @@ public class ContentEvaluation extends Evaluation { |
49 | 55 | public SpecificRecommendation recommendation17() { |
50 | 56 | return new EvaluationRecommendation17(); |
51 | 57 | } |
52 | - | |
53 | 58 | public SpecificRecommendation recommendation18() { |
54 | 59 | return new EvaluationRecommendation18(); |
55 | 60 | } |
... | ... | @@ -300,13 +305,12 @@ public class ContentEvaluation extends Evaluation { |
300 | 305 | |
301 | 306 | String titulo_site = ""; |
302 | 307 | |
303 | - if (titulo != null) { | |
304 | - titulo_site = titulo.getContent().getTextExtractor().toString(); | |
308 | + if (titulo != null) { | |
309 | + | |
310 | + titulo_site = titulo.getContent().getTextExtractor().toString(); | |
305 | 311 | } |
306 | - | |
312 | + | |
307 | 313 | return titulo_site; |
308 | - | |
309 | - | |
310 | 314 | } |
311 | 315 | |
312 | 316 | private List<Occurrence> checkRecommendation19() { |
... | ... | @@ -321,12 +325,7 @@ public class ContentEvaluation extends Evaluation { |
321 | 325 | // if(head != null) { |
322 | 326 | Element title = getDocument().getFirstElement("title"); |
323 | 327 | if (title == null) { |
324 | - occurrences | |
325 | - .add(this | |
326 | - .buildOccurrence( | |
327 | - "3.3", | |
328 | - true, | |
329 | - " Observação – Sem Fonte (Não existe título na página)", | |
328 | + occurrences.add(this.buildOccurrence("3.3",true," Observação – Sem Fonte (Não existe título na página)", | |
330 | 329 | getDocument().getFirstElement(), "1")); |
331 | 330 | // occurrences.add(new Occurrence("3.3", true, |
332 | 331 | // "Sem fonte (n�o existe t�tulo na p�gina)",OccurrenceClassification.CONTENT_INFORMATION,"1")); |
... | ... | @@ -349,9 +348,13 @@ public class ContentEvaluation extends Evaluation { |
349 | 348 | |
350 | 349 | private List<Occurrence> checkRecommendation21() { |
351 | 350 | List<Occurrence> occurrences = new ArrayList<Occurrence>(); |
351 | + UrlSemArquiNoFinal objetoUrlSemArquiNoFinal = new UrlSemArquiNoFinal(); | |
352 | + | |
353 | + String urlSemArquiNoFinal = objetoUrlSemArquiNoFinal.urlSemArquivoNoFinal(getUrl()); | |
352 | 354 | |
353 | 355 | Element LinkComImg; |
354 | 356 | |
357 | + | |
355 | 358 | for (Element link : getDocument().getAllElements("a")) { |
356 | 359 | String href = link.getAttributeValue("href"); |
357 | 360 | String title = link.getAttributeValue("title"); |
... | ... | @@ -431,32 +434,47 @@ public class ContentEvaluation extends Evaluation { |
431 | 434 | if (link != null && hasLongContent(link)) |
432 | 435 | occurrences.add(this.buildOccurrence("3.5", false, |
433 | 436 | link.toString(), link, "13"));//"9")); |
434 | - | |
435 | - if (link != null && isLinkUnavailable(link, getUrl())) | |
436 | - occurrences.add(this.buildOccurrence("3.5", true, | |
437 | - link.toString(), link, "14"));//"10")); | |
438 | - | |
437 | + | |
438 | + String retorno = ""; | |
439 | + if (link != null) | |
440 | + retorno = isLinkUnavailable(link, urlSemArquiNoFinal); | |
441 | + if(retorno.equalsIgnoreCase("erro")) | |
442 | + { | |
443 | + occurrences.add(this.buildOccurrence("3.5", true, | |
444 | + link.toString(), link, "14"));//"10")); | |
445 | + }else if(retorno.equalsIgnoreCase("aviso")) | |
446 | + { | |
447 | + occurrences.add(this.buildOccurrence("3.5", false, | |
448 | + link.toString(), link, "15"));//"10")); | |
449 | + } | |
450 | + | |
439 | 451 | } |
440 | 452 | return occurrences; |
441 | 453 | } |
442 | - | |
443 | - private boolean isLinkUnavailable(Element link, String url) { | |
454 | + | |
455 | + private String isLinkUnavailable(Element link, String url) { | |
444 | 456 | |
445 | 457 | String href = link.getAttributeValue("href"); |
446 | - | |
458 | + | |
447 | 459 | if (href != null && href.startsWith("www")) |
460 | + { | |
448 | 461 | href = "http://" + href; |
462 | + } | |
449 | 463 | |
450 | 464 | if (href != null && !href.startsWith("http") && url != null) |
465 | + { | |
451 | 466 | href = url + "/" + link.getAttributeValue("href"); |
452 | - | |
467 | + | |
468 | + } | |
469 | + | |
453 | 470 | if (link.getAttributeValue("href") != null |
454 | 471 | && !link.getAttributeValue("href").toString().trim().equalsIgnoreCase("") |
455 | 472 | && !link.getAttributeValue("href").substring(0, 1).equals("#") |
456 | 473 | && !link.getAttributeValue("href").substring(0, 1).equals("/") |
457 | 474 | && !link.getAttributeValue("href").contains("javascript") |
458 | 475 | && !link.getAttributeValue("href").contains("@")) { |
459 | - | |
476 | + | |
477 | + | |
460 | 478 | |
461 | 479 | /* |
462 | 480 | * int[] codErro = { 400, 401, 402, 403, 404, 405, 406, 407, 408, |
... | ... | @@ -472,6 +490,9 @@ public class ContentEvaluation extends Evaluation { |
472 | 490 | // if(!IsMatch(href,regex)) |
473 | 491 | // return true; |
474 | 492 | |
493 | + HttpMethod metodoRequisicaoGET = null; | |
494 | + HttpClient clienteHTTPJakartaCommons; | |
495 | + URL UrlConvertida; | |
475 | 496 | try { |
476 | 497 | String[] test = href.split("\\../"); |
477 | 498 | String newurl = ""; |
... | ... | @@ -479,19 +500,41 @@ public class ContentEvaluation extends Evaluation { |
479 | 500 | newurl = newurl + tes.trim(); |
480 | 501 | |
481 | 502 | newurl = newurl.replace(" ", "%20"); |
482 | - | |
483 | - URL u = new URL(newurl); | |
484 | - | |
485 | - HttpURLConnection huc = (HttpURLConnection) u.openConnection(); | |
486 | - huc.setRequestMethod("GET"); | |
487 | - // huc.setRequestMethod("HEAD"); | |
488 | - huc.connect(); | |
489 | - codResponse = huc.getResponseCode(); | |
490 | - huc.disconnect(); | |
491 | - } catch (MalformedURLException e) { | |
492 | - return true; | |
503 | + | |
504 | + //UrlConvertida = new URL(newurl); | |
505 | + | |
506 | + | |
507 | + System.out.println(newurl); | |
508 | + //Código copiado da classe WebAgent.java para garantir o acesso | |
509 | + //aos links da página por meio do cliente da API Jakarta Commons VErsão 3.1 | |
510 | + clienteHTTPJakartaCommons = new HttpClient(); | |
511 | + clienteHTTPJakartaCommons.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3,false)); | |
512 | + clienteHTTPJakartaCommons.getParams().setParameter("http.protocol.allow-circular-redirects", true); | |
513 | + | |
514 | + metodoRequisicaoGET = new GetMethod(URLEncoder.encode(newurl, "UTF-8")); | |
515 | + metodoRequisicaoGET.setRequestHeader("http.agent", "Jakarta Commons-HttpClient/3.1"); | |
516 | + metodoRequisicaoGET.setFollowRedirects(true); | |
517 | + | |
518 | + codResponse = clienteHTTPJakartaCommons.executeMethod(metodoRequisicaoGET); | |
519 | + | |
520 | + } catch (MalformedURLException e) { | |
521 | + e.printStackTrace(); | |
522 | + return "aviso"; | |
523 | + | |
493 | 524 | } catch (IOException e) { |
494 | - return true; | |
525 | + e.printStackTrace(); | |
526 | + return "aviso"; | |
527 | + | |
528 | + } catch (IllegalArgumentException e) { | |
529 | + e.printStackTrace(); | |
530 | + return "aviso"; | |
531 | + | |
532 | + } finally { | |
533 | + | |
534 | + if (metodoRequisicaoGET != null){ | |
535 | + metodoRequisicaoGET.releaseConnection(); | |
536 | + } | |
537 | + | |
495 | 538 | } |
496 | 539 | |
497 | 540 | /* |
... | ... | @@ -500,13 +543,13 @@ public class ContentEvaluation extends Evaluation { |
500 | 543 | */ |
501 | 544 | for (int cod : codErro) { |
502 | 545 | if (codResponse == cod) { |
503 | - return true; | |
546 | + return "erro"; | |
504 | 547 | } |
505 | 548 | |
506 | 549 | } |
507 | 550 | } |
508 | 551 | |
509 | - return false; | |
552 | + return "false"; | |
510 | 553 | } |
511 | 554 | |
512 | 555 | private static boolean IsMatch(String s, String pattern) { |
... | ... | @@ -551,7 +594,7 @@ public class ContentEvaluation extends Evaluation { |
551 | 594 | return temConteudo; |
552 | 595 | |
553 | 596 | /* quantidadeElementosDentroLink = link.getAllElements().size(); |
554 | - //Verifica se há elementos como img, span e outros dentro do link. o valor 1 já está atribuído a tag <a> (link avaliado) | |
597 | + //Verifica se h� elementos como img, span e outros dentro do link. o valor 1 j� est� atribu�do a tag <a> (link avaliado) | |
555 | 598 | if(quantidadeElementosDentroLink > 1) |
556 | 599 | { |
557 | 600 | temConteudo = true; |
... | ... | @@ -934,89 +977,47 @@ public class ContentEvaluation extends Evaluation { |
934 | 977 | |
935 | 978 | private List<Occurrence> checkRecommendation26() { |
936 | 979 | List<Occurrence> occurrences = new ArrayList<Occurrence>(); |
937 | - | |
938 | - /* | |
939 | - * for (Element table : getDocument().getAllElements("table")) { | |
940 | - * occurrences.add(buildOccurrence("3.10", false, table.toString(), | |
941 | - * table, "1")); } | |
942 | - */ | |
943 | - | |
944 | - /* | |
945 | - * for (Element table : getDocument().getAllElements("table")) { for | |
946 | - * (Element caption : table.getAllElements("caption")) { if (caption == | |
947 | - * null || caption.isEmpty()) occurrences.add(buildOccurrence("3.10", | |
948 | - * true, table .getStartTag().toString(), table, "1")); } } | |
949 | - */ | |
950 | - | |
980 | + boolean temAssociacao = false; | |
981 | + | |
951 | 982 | for (Element table : getDocument().getAllElements("table")) { |
952 | 983 | // Attribute summary = table.getAttributes().get("summary"); |
953 | 984 | |
954 | - boolean THusaScope = false; | |
955 | - boolean THusaId = false; | |
956 | - boolean THusaHeaders = false; | |
957 | - boolean TDusaScope = false; | |
958 | - boolean TDusaId = false; | |
959 | - boolean TDusaHeaders = false; | |
960 | - boolean usaThead = false; | |
961 | - boolean usaTfoot = false; | |
962 | - boolean usaTbody = false; | |
963 | - | |
964 | - /* | |
965 | - * if (summary == null || summary.getValue().equals("")) | |
966 | - * occurrences.add(buildOccurrence("3.10", true, table | |
967 | - * .getStartTag().toString(), table, "1")); | |
968 | - */ | |
969 | - | |
970 | - for (Element thead : table.getAllElements("thead")) { | |
971 | - if (thead != null) | |
972 | - usaThead = true; | |
985 | + temAssociacao = false; | |
986 | + | |
987 | + if(table.getAllElements("thead").size() > 0 && table.getAllElements("tbody").size() > 0) | |
988 | + { | |
989 | + temAssociacao = true; | |
973 | 990 | } |
974 | - | |
975 | - for (Element tfoot : table.getAllElements("tfoot")) { | |
976 | - if (tfoot != null) | |
977 | - usaTfoot = true; | |
991 | + else | |
992 | + { | |
993 | + for (Element coluna : table.getAllElements("td")) { | |
994 | + if(coluna.getAttributes().get("id") != null || coluna.getAttributes().get("headers") != null || | |
995 | + coluna.getAttributes().get("scope") != null || coluna.getAttributes().get("axis") != null) | |
996 | + { | |
997 | + temAssociacao = true; | |
998 | + } | |
999 | + | |
1000 | + } | |
1001 | + if(!temAssociacao) | |
1002 | + { | |
1003 | + for (Element coluna : table.getAllElements("th")) { | |
1004 | + if(coluna.getAttributes().get("id") != null || coluna.getAttributes().get("headers") != null || | |
1005 | + coluna.getAttributes().get("scope") != null || coluna.getAttributes().get("axis") != null) | |
1006 | + { | |
1007 | + temAssociacao = true; | |
1008 | + } | |
1009 | + | |
1010 | + } | |
1011 | + } | |
978 | 1012 | } |
979 | - | |
980 | - for (Element tbody : table.getAllElements("tbody")) { | |
981 | - if (tbody != null) | |
982 | - usaTbody = true; | |
1013 | + | |
1014 | + if(!temAssociacao) | |
1015 | + { | |
1016 | + occurrences.add(this.buildOccurrence("3.10", true, table.getStartTag().toString(), table, "1")); | |
983 | 1017 | } |
984 | - | |
985 | - /* | |
986 | - * if(!usaThead && !usaTbody && !usaTfoot){ | |
987 | - * | |
988 | - * //occurrences.add(this.buildOccurrence("3.10", true, | |
989 | - * table.getAllStartTags("table").get(0).toString(), table, "1")); | |
990 | - * | |
991 | - * for (Element th : table.getAllElements("th")) { Attribute scope = | |
992 | - * th.getAttributes().get("scope"); Attribute headers = | |
993 | - * th.getAttributes().get("headers"); Attribute id = | |
994 | - * th.getAttributes().get("id"); if (scope != null && | |
995 | - * !scope.getValue().equals("")) { THusaScope = true; } else if | |
996 | - * (headers != null && !headers.getValue().equals("")) { | |
997 | - * THusaHeaders = true; } else if (id != null && | |
998 | - * !id.getValue().equals("")) { THusaId = true; } | |
999 | - * | |
1000 | - * if(!THusaScope && !THusaHeaders && !THusaId){ | |
1001 | - * occurrences.add(this.buildOccurrence("3.10", true, th.toString(), | |
1002 | - * th, "1")); } } | |
1003 | - * | |
1004 | - * for (Element td : table.getAllElements("td")) { Attribute tdscope | |
1005 | - * = td.getAttributes().get("scope"); Attribute tdheaders = | |
1006 | - * td.getAttributes().get("headers"); Attribute tdid = | |
1007 | - * td.getAttributes().get("id"); if (tdscope != null && | |
1008 | - * !tdscope.getValue().equals("")) { TDusaScope = true; } else if | |
1009 | - * (tdheaders != null && !tdheaders.getValue().equals("")) { | |
1010 | - * TDusaHeaders = true; } else if (tdid != null && | |
1011 | - * !tdid.getValue().equals("")) { TDusaId = true; } | |
1012 | - * | |
1013 | - * if(!TDusaScope && !TDusaHeaders && !TDusaId){ | |
1014 | - * occurrences.add(this.buildOccurrence("3.10", true, td.toString(), | |
1015 | - * td, "1")); } } | |
1016 | - * | |
1017 | - * } | |
1018 | - */ | |
1018 | + | |
1019 | 1019 | } |
1020 | + | |
1020 | 1021 | // Sorting |
1021 | 1022 | Collections.sort(occurrences, new Comparator<Occurrence>() { |
1022 | 1023 | public int compare(Occurrence occurrence1, Occurrence occurrence2) { | ... | ... |