Commit df26209056692b85564a56d380c6fadf24b72232
1 parent
8692771a
Exists in
master
criação de método para verificar a conexão dos link, adicionado timeout
ao método.
Showing
1 changed file
with
239 additions
and
195 deletions
Show diff stats
src/main/java/br/com/checker/emag/core/ContentEvaluation.java
@@ -22,9 +22,11 @@ import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; | @@ -22,9 +22,11 @@ import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; | ||
22 | import org.apache.commons.httpclient.HttpClient; | 22 | import org.apache.commons.httpclient.HttpClient; |
23 | import org.apache.commons.httpclient.HttpMethod; | 23 | import org.apache.commons.httpclient.HttpMethod; |
24 | import org.apache.commons.httpclient.methods.GetMethod; | 24 | import org.apache.commons.httpclient.methods.GetMethod; |
25 | +import org.apache.commons.httpclient.params.HttpConnectionParams; | ||
25 | import org.apache.commons.httpclient.params.HttpMethodParams; | 26 | import org.apache.commons.httpclient.params.HttpMethodParams; |
26 | import org.apache.commons.lang3.StringUtils; | 27 | import org.apache.commons.lang3.StringUtils; |
27 | 28 | ||
29 | + | ||
28 | import br.com.checker.emag.Occurrence; | 30 | import br.com.checker.emag.Occurrence; |
29 | import br.com.checker.emag.OccurrenceClassification; | 31 | import br.com.checker.emag.OccurrenceClassification; |
30 | import br.com.checker.emag.core.SpecificRecommendation.ContentRecommendation; | 32 | import br.com.checker.emag.core.SpecificRecommendation.ContentRecommendation; |
@@ -55,6 +57,7 @@ public class ContentEvaluation extends Evaluation { | @@ -55,6 +57,7 @@ public class ContentEvaluation extends Evaluation { | ||
55 | public SpecificRecommendation recommendation17() { | 57 | public SpecificRecommendation recommendation17() { |
56 | return new EvaluationRecommendation17(); | 58 | return new EvaluationRecommendation17(); |
57 | } | 59 | } |
60 | + | ||
58 | public SpecificRecommendation recommendation18() { | 61 | public SpecificRecommendation recommendation18() { |
59 | return new EvaluationRecommendation18(); | 62 | return new EvaluationRecommendation18(); |
60 | } | 63 | } |
@@ -190,11 +193,12 @@ public class ContentEvaluation extends Evaluation { | @@ -190,11 +193,12 @@ public class ContentEvaluation extends Evaluation { | ||
190 | getOccurrences().addAll(checkRecommendation17()); | 193 | getOccurrences().addAll(checkRecommendation17()); |
191 | getOccurrences().addAll(checkRecommendation18()); | 194 | getOccurrences().addAll(checkRecommendation18()); |
192 | getOccurrences().addAll(checkRecommendation19()); | 195 | getOccurrences().addAll(checkRecommendation19()); |
193 | - //getOccurrences().addAll(checkRecommendation20()); comentado por Gibran | 196 | + // getOccurrences().addAll(checkRecommendation20()); comentado por |
197 | + // Gibran | ||
194 | getOccurrences().addAll(checkRecommendation21()); | 198 | getOccurrences().addAll(checkRecommendation21()); |
195 | getOccurrences().addAll(checkRecommendation22()); | 199 | getOccurrences().addAll(checkRecommendation22()); |
196 | getOccurrences().addAll(checkRecommendation23()); | 200 | getOccurrences().addAll(checkRecommendation23()); |
197 | - //getOccurrences().addAll(checkRecommendation24()); | 201 | + // getOccurrences().addAll(checkRecommendation24()); |
198 | getOccurrences().addAll(checkRecommendation25()); | 202 | getOccurrences().addAll(checkRecommendation25()); |
199 | getOccurrences().addAll(checkRecommendation26()); | 203 | getOccurrences().addAll(checkRecommendation26()); |
200 | getOccurrences().addAll(checkRecommendation27()); | 204 | getOccurrences().addAll(checkRecommendation27()); |
@@ -250,16 +254,16 @@ public class ContentEvaluation extends Evaluation { | @@ -250,16 +254,16 @@ public class ContentEvaluation extends Evaluation { | ||
250 | 254 | ||
251 | if (lang == null && (xmlLang != null || xmlns != null)) { | 255 | if (lang == null && (xmlLang != null || xmlns != null)) { |
252 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, | 256 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, |
253 | - html, "3"));//"2")); | 257 | + html, "3"));// "2")); |
254 | } else if (lang != null && lang.getValue().isEmpty()) { | 258 | } else if (lang != null && lang.getValue().isEmpty()) { |
255 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, | 259 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, |
256 | - html, "3"));//"2")); | 260 | + html, "3"));// "2")); |
257 | } else if (xmlLang != null && xmlLang.getValue().isEmpty()) { | 261 | } else if (xmlLang != null && xmlLang.getValue().isEmpty()) { |
258 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, | 262 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, |
259 | - html, "3"));//"2")); | 263 | + html, "3"));// "2")); |
260 | } else if (xmlns != null && xmlns.getValue().isEmpty()) { | 264 | } else if (xmlns != null && xmlns.getValue().isEmpty()) { |
261 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, | 265 | occurrences.add(this.buildOccurrence("3.1", false, tagHtml, |
262 | - html, "3"));//"2")); | 266 | + html, "3"));// "2")); |
263 | } | 267 | } |
264 | 268 | ||
265 | /* | 269 | /* |
@@ -300,17 +304,16 @@ public class ContentEvaluation extends Evaluation { | @@ -300,17 +304,16 @@ public class ContentEvaluation extends Evaluation { | ||
300 | } | 304 | } |
301 | 305 | ||
302 | public String retornarTituloSiteAvaliado() { | 306 | public String retornarTituloSiteAvaliado() { |
303 | - | 307 | + |
304 | Element titulo = getDocument().getFirstElement("title"); | 308 | Element titulo = getDocument().getFirstElement("title"); |
305 | - | 309 | + |
306 | String titulo_site = ""; | 310 | String titulo_site = ""; |
307 | - | ||
308 | - | ||
309 | - if (titulo != null) { | ||
310 | - | ||
311 | - titulo_site = titulo.getContent().getTextExtractor().toString(); | ||
312 | - } | ||
313 | - | 311 | + |
312 | + if (titulo != null) { | ||
313 | + | ||
314 | + titulo_site = titulo.getContent().getTextExtractor().toString(); | ||
315 | + } | ||
316 | + | ||
314 | return titulo_site; | 317 | return titulo_site; |
315 | } | 318 | } |
316 | 319 | ||
@@ -326,7 +329,12 @@ public class ContentEvaluation extends Evaluation { | @@ -326,7 +329,12 @@ public class ContentEvaluation extends Evaluation { | ||
326 | // if(head != null) { | 329 | // if(head != null) { |
327 | Element title = getDocument().getFirstElement("title"); | 330 | Element title = getDocument().getFirstElement("title"); |
328 | if (title == null) { | 331 | if (title == null) { |
329 | - occurrences.add(this.buildOccurrence("3.3",true," Observação – Sem Fonte (Não existe título na página)", | 332 | + occurrences |
333 | + .add(this | ||
334 | + .buildOccurrence( | ||
335 | + "3.3", | ||
336 | + true, | ||
337 | + " Observação – Sem Fonte (Não existe título na página)", | ||
330 | getDocument().getFirstElement(), "1")); | 338 | getDocument().getFirstElement(), "1")); |
331 | // occurrences.add(new Occurrence("3.3", true, | 339 | // occurrences.add(new Occurrence("3.3", true, |
332 | // "Sem fonte (n�o existe t�tulo na p�gina)",OccurrenceClassification.CONTENT_INFORMATION,"1")); | 340 | // "Sem fonte (n�o existe t�tulo na p�gina)",OccurrenceClassification.CONTENT_INFORMATION,"1")); |
@@ -350,60 +358,46 @@ public class ContentEvaluation extends Evaluation { | @@ -350,60 +358,46 @@ public class ContentEvaluation extends Evaluation { | ||
350 | private List<Occurrence> checkRecommendation21() { | 358 | private List<Occurrence> checkRecommendation21() { |
351 | List<Occurrence> occurrences = new ArrayList<Occurrence>(); | 359 | List<Occurrence> occurrences = new ArrayList<Occurrence>(); |
352 | UrlSemArquiNoFinal objetoUrlSemArquiNoFinal = new UrlSemArquiNoFinal(); | 360 | UrlSemArquiNoFinal objetoUrlSemArquiNoFinal = new UrlSemArquiNoFinal(); |
353 | - | 361 | + |
354 | String urlSemArquiNoFinal = objetoUrlSemArquiNoFinal.urlSemArquivoNoFinal(getUrl()); | 362 | String urlSemArquiNoFinal = objetoUrlSemArquiNoFinal.urlSemArquivoNoFinal(getUrl()); |
355 | - | 363 | + |
356 | Element LinkComImg; | 364 | Element LinkComImg; |
357 | - | ||
358 | - | 365 | + |
359 | for (Element link : getDocument().getAllElements("a")) { | 366 | for (Element link : getDocument().getAllElements("a")) { |
360 | String href = link.getAttributeValue("href"); | 367 | String href = link.getAttributeValue("href"); |
361 | String title = link.getAttributeValue("title"); | 368 | String title = link.getAttributeValue("title"); |
362 | String content = link.getContent().toString(); | 369 | String content = link.getContent().toString(); |
363 | - | 370 | + |
364 | if (hasEqualsContentHref(link) && isRegistroBr(content)) | 371 | if (hasEqualsContentHref(link) && isRegistroBr(content)) |
365 | occurrences.add(this.buildOccurrence("3.5", false, | 372 | occurrences.add(this.buildOccurrence("3.5", false, |
366 | link.toString(), link, "1")); | 373 | link.toString(), link, "1")); |
367 | - | ||
368 | - | 374 | + |
369 | LinkComImg = link.getFirstElement("img"); | 375 | LinkComImg = link.getFirstElement("img"); |
370 | - | ||
371 | - if(LinkComImg == null) | ||
372 | - { | ||
373 | - if (!hasContent(link)) | ||
374 | - { | 376 | + |
377 | + if (LinkComImg == null) { | ||
378 | + if (!hasContent(link)) { | ||
375 | occurrences.add(this.buildOccurrence("3.5", true, | 379 | occurrences.add(this.buildOccurrence("3.5", true, |
376 | - link.toString(), link, "3"));//"2")); | 380 | + link.toString(), link, "3"));// "2")); |
377 | } | 381 | } |
378 | - } | ||
379 | - else | ||
380 | - { | ||
381 | - if (hasLinkComImgWithoutAlt(link)) | ||
382 | - { | 382 | + } else { |
383 | + if (hasLinkComImgWithoutAlt(link)) { | ||
383 | occurrences.add(this.buildOccurrence("3.5", true, | 384 | occurrences.add(this.buildOccurrence("3.5", true, |
384 | - link.toString(), link, "3"));//"2")); | 385 | + link.toString(), link, "3"));// "2")); |
385 | } | 386 | } |
386 | } | 387 | } |
387 | - | ||
388 | - | ||
389 | - if(LinkComImg == null) | ||
390 | - { | 388 | + |
389 | + if (LinkComImg == null) { | ||
391 | // if(hasTitle(link) && isNotAlt(link)) | 390 | // if(hasTitle(link) && isNotAlt(link)) |
392 | - if (hasTitle(link) | ||
393 | - && !hasContent(link)) | 391 | + if (hasTitle(link) && !hasContent(link)) |
394 | occurrences.add(this.buildOccurrence("3.5", true, | 392 | occurrences.add(this.buildOccurrence("3.5", true, |
395 | - link.toString(), link, "4"));//"3")); | ||
396 | - } | ||
397 | - else | ||
398 | - { | 393 | + link.toString(), link, "4"));// "3")); |
394 | + } else { | ||
399 | // if(hasTitle(link) && isNotAlt(link)) | 395 | // if(hasTitle(link) && isNotAlt(link)) |
400 | - if (hasTitle(link) | ||
401 | - && !hasContent(link) | 396 | + if (hasTitle(link) && !hasContent(link) |
402 | && hasLinkComImgWithoutAlt(link)) | 397 | && hasLinkComImgWithoutAlt(link)) |
403 | occurrences.add(this.buildOccurrence("3.5", true, | 398 | occurrences.add(this.buildOccurrence("3.5", true, |
404 | - link.toString(), link, "4"));//"3")); | 399 | + link.toString(), link, "4"));// "3")); |
405 | } | 400 | } |
406 | - | ||
407 | 401 | ||
408 | /* | 402 | /* |
409 | * if(!hasTitle(link) && !hasContent(link) && | 403 | * if(!hasTitle(link) && !hasContent(link) && |
@@ -414,69 +408,64 @@ public class ContentEvaluation extends Evaluation { | @@ -414,69 +408,64 @@ public class ContentEvaluation extends Evaluation { | ||
414 | 408 | ||
415 | if (hasImgWithoutAlt(link)) | 409 | if (hasImgWithoutAlt(link)) |
416 | occurrences.add(this.buildOccurrence("3.5", true, | 410 | occurrences.add(this.buildOccurrence("3.5", true, |
417 | - link.toString(), link, "5"));//"4")); | 411 | + link.toString(), link, "5"));// "4")); |
418 | 412 | ||
419 | if (hasLeiaMaisDescription(link)) | 413 | if (hasLeiaMaisDescription(link)) |
420 | occurrences.add(this.buildOccurrence("3.5", true, | 414 | occurrences.add(this.buildOccurrence("3.5", true, |
421 | - link.toString(), link, "6"));//"5")); | 415 | + link.toString(), link, "6"));// "5")); |
422 | 416 | ||
423 | if (hasDiferenteContentSameLink(link)) | 417 | if (hasDiferenteContentSameLink(link)) |
424 | occurrences.add(this.buildOccurrence("3.5", false, | 418 | occurrences.add(this.buildOccurrence("3.5", false, |
425 | - link.toString(), link, "10"));//"6")); | 419 | + link.toString(), link, "10"));// "6")); |
426 | 420 | ||
427 | if (isTitleEqualsContent(link)) | 421 | if (isTitleEqualsContent(link)) |
428 | occurrences.add(this.buildOccurrence("3.5", true, | 422 | occurrences.add(this.buildOccurrence("3.5", true, |
429 | - link.toString(), link, "12"));//"8")); | 423 | + link.toString(), link, "12"));// "8")); |
430 | 424 | ||
431 | if (hasSameContentDiferentLink(link)) | 425 | if (hasSameContentDiferentLink(link)) |
432 | occurrences.add(this.buildOccurrence("3.5", true, | 426 | occurrences.add(this.buildOccurrence("3.5", true, |
433 | - link.toString(), link, "11"));//"7")); | 427 | + link.toString(), link, "11"));// "7")); |
434 | 428 | ||
435 | if (link != null && hasLongContent(link)) | 429 | if (link != null && hasLongContent(link)) |
436 | occurrences.add(this.buildOccurrence("3.5", false, | 430 | occurrences.add(this.buildOccurrence("3.5", false, |
437 | - link.toString(), link, "13"));//"9")); | ||
438 | - | 431 | + link.toString(), link, "13"));// "9")); |
432 | + | ||
439 | String retorno = ""; | 433 | String retorno = ""; |
440 | if (link != null) | 434 | if (link != null) |
441 | retorno = isLinkUnavailable(link, urlSemArquiNoFinal); | 435 | retorno = isLinkUnavailable(link, urlSemArquiNoFinal); |
442 | - if(retorno.equalsIgnoreCase("erro")) | ||
443 | - { | ||
444 | - occurrences.add(this.buildOccurrence("3.5", true, | ||
445 | - link.toString(), link, "14"));//"10")); | ||
446 | - }else if(retorno.equalsIgnoreCase("aviso")) | ||
447 | - { | ||
448 | - occurrences.add(this.buildOccurrence("3.5", false, | ||
449 | - link.toString(), link, "15"));//"10")); | ||
450 | - } | ||
451 | - | 436 | + if (retorno.equalsIgnoreCase("erro")) { |
437 | + occurrences.add(this.buildOccurrence("3.5", true, | ||
438 | + link.toString(), link, "14"));// "10")); | ||
439 | + } else if (retorno.equalsIgnoreCase("aviso")) { | ||
440 | + occurrences.add(this.buildOccurrence("3.5", false, | ||
441 | + link.toString(), link, "15"));// "10")); | ||
442 | + } | ||
443 | + | ||
452 | } | 444 | } |
453 | return occurrences; | 445 | return occurrences; |
454 | } | 446 | } |
455 | - | 447 | + |
456 | private String isLinkUnavailable(Element link, String url) { | 448 | private String isLinkUnavailable(Element link, String url) { |
457 | 449 | ||
458 | String href = link.getAttributeValue("href"); | 450 | String href = link.getAttributeValue("href"); |
459 | - | ||
460 | - if (href != null && href.startsWith("www")) | ||
461 | - { | 451 | + |
452 | + if (href != null && href.startsWith("www")) { | ||
462 | href = "http://" + href; | 453 | href = "http://" + href; |
463 | } | 454 | } |
464 | 455 | ||
465 | - if (href != null && !href.startsWith("http") && url != null) | ||
466 | - { | 456 | + if (href != null && !href.startsWith("http") && url != null) { |
467 | href = url + "/" + link.getAttributeValue("href"); | 457 | href = url + "/" + link.getAttributeValue("href"); |
468 | - | 458 | + |
469 | } | 459 | } |
470 | - | ||
471 | - if (link.getAttributeValue("href") != null | ||
472 | - && !link.getAttributeValue("href").toString().trim().equalsIgnoreCase("") | 460 | + |
461 | + if (link.getAttributeValue("href") != null | ||
462 | + && !link.getAttributeValue("href").toString().trim() | ||
463 | + .equalsIgnoreCase("") | ||
473 | && !link.getAttributeValue("href").substring(0, 1).equals("#") | 464 | && !link.getAttributeValue("href").substring(0, 1).equals("#") |
474 | && !link.getAttributeValue("href").substring(0, 1).equals("/") | 465 | && !link.getAttributeValue("href").substring(0, 1).equals("/") |
475 | && !link.getAttributeValue("href").contains("javascript") | 466 | && !link.getAttributeValue("href").contains("javascript") |
476 | && !link.getAttributeValue("href").contains("@")) { | 467 | && !link.getAttributeValue("href").contains("@")) { |
477 | - | ||
478 | - | ||
479 | - | 468 | + |
480 | /* | 469 | /* |
481 | * int[] codErro = { 400, 401, 402, 403, 404, 405, 406, 407, 408, | 470 | * int[] codErro = { 400, 401, 402, 403, 404, 405, 406, 407, 408, |
482 | * 409, 410, 411, 412, 414, 415, 416, 417, 418, 422, 423, 424, 425, | 471 | * 409, 410, 411, 412, 414, 415, 416, 417, 418, 422, 423, 424, 425, |
@@ -494,52 +483,59 @@ public class ContentEvaluation extends Evaluation { | @@ -494,52 +483,59 @@ public class ContentEvaluation extends Evaluation { | ||
494 | HttpMethod metodoRequisicaoGET = null; | 483 | HttpMethod metodoRequisicaoGET = null; |
495 | HttpClient clienteHTTPJakartaCommons; | 484 | HttpClient clienteHTTPJakartaCommons; |
496 | URL UrlConvertida; | 485 | URL UrlConvertida; |
497 | - try { | ||
498 | - String[] test = href.split("\\../"); | ||
499 | - String newurl = ""; | ||
500 | - for (String tes : test) | ||
501 | - newurl = newurl + tes.trim(); | ||
502 | - | ||
503 | - newurl = newurl.replace(" ", "%20"); | ||
504 | - | ||
505 | - UrlConvertida = new URL(newurl); | ||
506 | - | ||
507 | - | ||
508 | - //Código copiado da classe WebAgent.java para garantir o acesso | ||
509 | - //aos links da página por meio do cliente da API Jakarta Commons VErsão 3.1 | ||
510 | - clienteHTTPJakartaCommons = new HttpClient(); | ||
511 | - clienteHTTPJakartaCommons.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3,false)); | ||
512 | - clienteHTTPJakartaCommons.getParams().setParameter("http.protocol.allow-circular-redirects", true); | ||
513 | - | ||
514 | - metodoRequisicaoGET = new GetMethod(UrlConvertida.toExternalForm());//URLEncoder.encode(UrlConvertida.toExternalForm(), "UTF-8")); | 486 | + // try { |
515 | 487 | ||
516 | - metodoRequisicaoGET.setRequestHeader("user-agent", "NewUseAgent/1.0"); | ||
517 | - metodoRequisicaoGET.setRequestHeader("http.agent", "Jakarta Commons-HttpClient/3.1"); | ||
518 | - metodoRequisicaoGET.setFollowRedirects(true); | ||
519 | - | ||
520 | - codResponse = clienteHTTPJakartaCommons.executeMethod(metodoRequisicaoGET); | ||
521 | - | ||
522 | - } catch (MalformedURLException e) { | ||
523 | - e.printStackTrace(); | ||
524 | - return "aviso"; | ||
525 | - | ||
526 | - } catch (IOException e) { | ||
527 | - e.printStackTrace(); | ||
528 | - return "aviso"; | ||
529 | - | ||
530 | - } catch (IllegalArgumentException e) { | ||
531 | - e.printStackTrace(); | ||
532 | - return "aviso"; | ||
533 | - | ||
534 | - } finally { | ||
535 | - | ||
536 | - if (metodoRequisicaoGET != null){ | ||
537 | - metodoRequisicaoGET.releaseConnection(); | 488 | + |
489 | + // Código copiado da classe WebAgent.java para garantir o acesso | ||
490 | + // aos links da página por meio do cliente da API Jakarta Commons | ||
491 | + // VErsão 3.1 | ||
492 | + /* | ||
493 | + * clienteHTTPJakartaCommons = new HttpClient(); | ||
494 | + * clienteHTTPJakartaCommons | ||
495 | + * .getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new | ||
496 | + * DefaultHttpMethodRetryHandler(3,false)); | ||
497 | + * clienteHTTPJakartaCommons | ||
498 | + * .getParams().setParameter("http.protocol.allow-circular-redirects" | ||
499 | + * , true); clienteHTTPJakartaCommons.getParams().setParameter( | ||
500 | + * HttpConnectionParams.CONNECTION_TIMEOUT, new Integer(2000)); | ||
501 | + * | ||
502 | + * metodoRequisicaoGET = new | ||
503 | + * GetMethod(UrlConvertida.toExternalForm() | ||
504 | + * );//URLEncoder.encode(UrlConvertida.toExternalForm(), "UTF-8")); | ||
505 | + * | ||
506 | + * metodoRequisicaoGET.setRequestHeader("user-agent", | ||
507 | + * "NewUseAgent/1.0"); | ||
508 | + * metodoRequisicaoGET.setRequestHeader("http.agent", | ||
509 | + * "Jakarta Commons-HttpClient/3.1"); | ||
510 | + * metodoRequisicaoGET.setFollowRedirects(true); | ||
511 | + */ | ||
512 | + | ||
513 | + codResponse = verificarConexao(href, "user-agent","NewUseAgent/1.0"); | ||
514 | + if (codResponse != 200) { | ||
515 | + codResponse = verificarConexao(href, "http.agent", "Jakarta Commons-HttpClient/3.1"); | ||
516 | + if (codResponse == -1) { | ||
517 | + return "aviso"; | ||
538 | } | 518 | } |
539 | - | ||
540 | } | 519 | } |
541 | 520 | ||
542 | /* | 521 | /* |
522 | + * } catch (MalformedURLException e) { e.printStackTrace(); return | ||
523 | + * "aviso"; | ||
524 | + * | ||
525 | + * } catch (IOException e) { e.printStackTrace(); return "aviso"; | ||
526 | + * | ||
527 | + * } catch (IllegalArgumentException e) { e.printStackTrace(); | ||
528 | + * return "aviso"; | ||
529 | + * | ||
530 | + * } finally { | ||
531 | + * | ||
532 | + * if (metodoRequisicaoGET != null){ | ||
533 | + * metodoRequisicaoGET.releaseConnection(); } | ||
534 | + * | ||
535 | + * } | ||
536 | + */ | ||
537 | + | ||
538 | + /* | ||
543 | * if(huc.getResponseCode() != HttpURLConnection.HTTP_OK) | 539 | * if(huc.getResponseCode() != HttpURLConnection.HTTP_OK) |
544 | * System.out.println(link.toString()); | 540 | * System.out.println(link.toString()); |
545 | */ | 541 | */ |
@@ -548,12 +544,68 @@ public class ContentEvaluation extends Evaluation { | @@ -548,12 +544,68 @@ public class ContentEvaluation extends Evaluation { | ||
548 | return "erro"; | 544 | return "erro"; |
549 | } | 545 | } |
550 | 546 | ||
547 | + // } | ||
548 | + | ||
549 | + | ||
551 | } | 550 | } |
552 | } | 551 | } |
553 | - | ||
554 | return "false"; | 552 | return "false"; |
555 | } | 553 | } |
556 | 554 | ||
555 | + private Integer verificarConexao(String href, String usuario, String agente) { | ||
556 | + | ||
557 | + HttpMethod metodoRequisicaoGET = null; | ||
558 | + HttpClient clienteHTTPJakartaCommons; | ||
559 | + URL UrlConvertida; | ||
560 | + try { | ||
561 | + | ||
562 | + String[] test = href.split("\\../"); | ||
563 | + String newurl = ""; | ||
564 | + for (String tes : test) | ||
565 | + newurl = newurl + tes.trim(); | ||
566 | + | ||
567 | + newurl = newurl.replace(" ", "%20"); | ||
568 | + | ||
569 | + UrlConvertida = new URL(newurl); | ||
570 | + | ||
571 | + // Código copiado da classe WebAgent.java para garantir o acesso | ||
572 | + // aos links da página por meio do cliente da API Jakarta Commons | ||
573 | + // VErsão 3.1 | ||
574 | + clienteHTTPJakartaCommons = new HttpClient(); | ||
575 | + clienteHTTPJakartaCommons.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,new DefaultHttpMethodRetryHandler(3, false)); | ||
576 | + clienteHTTPJakartaCommons.getParams().setParameter("http.protocol.allow-circular-redirects", true); | ||
577 | + clienteHTTPJakartaCommons.getParams().setParameter(HttpConnectionParams.CONNECTION_TIMEOUT, new Integer(2000)); | ||
578 | + | ||
579 | + metodoRequisicaoGET = new GetMethod(UrlConvertida.toExternalForm());// URLEncoder.encode(UrlConvertida.toExternalForm(), | ||
580 | + // "UTF-8")); | ||
581 | + | ||
582 | + metodoRequisicaoGET.setRequestHeader(usuario, agente); | ||
583 | + metodoRequisicaoGET.setFollowRedirects(true); | ||
584 | + | ||
585 | + return clienteHTTPJakartaCommons.executeMethod(metodoRequisicaoGET); | ||
586 | + | ||
587 | + } catch (MalformedURLException e) { | ||
588 | + e.printStackTrace(); | ||
589 | + return -1; | ||
590 | + | ||
591 | + } catch (IOException e) { | ||
592 | + e.printStackTrace(); | ||
593 | + return -1; | ||
594 | + | ||
595 | + } catch (IllegalArgumentException e) { | ||
596 | + e.printStackTrace(); | ||
597 | + return -1; | ||
598 | + | ||
599 | + } finally { | ||
600 | + | ||
601 | + if (metodoRequisicaoGET != null) { | ||
602 | + metodoRequisicaoGET.releaseConnection(); | ||
603 | + } | ||
604 | + | ||
605 | + } | ||
606 | + | ||
607 | + } | ||
608 | + | ||
557 | private static boolean IsMatch(String s, String pattern) { | 609 | private static boolean IsMatch(String s, String pattern) { |
558 | try { | 610 | try { |
559 | Pattern patt = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE); | 611 | Pattern patt = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE); |
@@ -573,40 +625,36 @@ public class ContentEvaluation extends Evaluation { | @@ -573,40 +625,36 @@ public class ContentEvaluation extends Evaluation { | ||
573 | return link.getContent().getTextExtractor().toString().length() > 500 ? true | 625 | return link.getContent().getTextExtractor().toString().length() > 500 ? true |
574 | : false; | 626 | : false; |
575 | } | 627 | } |
576 | - | ||
577 | - | 628 | + |
578 | private boolean hasContent(Element link) { | 629 | private boolean hasContent(Element link) { |
579 | - | 630 | + |
580 | boolean temConteudo; | 631 | boolean temConteudo; |
581 | - | ||
582 | - | ||
583 | - temConteudo = StringUtils.isNotBlank(link.getContent().getTextExtractor().toString()); | ||
584 | - | ||
585 | - if(!temConteudo) | ||
586 | - { | 632 | + |
633 | + temConteudo = StringUtils.isNotBlank(link.getContent() | ||
634 | + .getTextExtractor().toString()); | ||
635 | + | ||
636 | + if (!temConteudo) { | ||
587 | for (Element elemento : link.getAllElements()) { | 637 | for (Element elemento : link.getAllElements()) { |
588 | - temConteudo = StringUtils.isNotBlank(elemento.getContent().getTextExtractor().toString()); | ||
589 | - | ||
590 | - if(temConteudo) | ||
591 | - { | 638 | + temConteudo = StringUtils.isNotBlank(elemento.getContent() |
639 | + .getTextExtractor().toString()); | ||
640 | + | ||
641 | + if (temConteudo) { | ||
592 | break; | 642 | break; |
593 | } | 643 | } |
594 | - } | 644 | + } |
595 | } | 645 | } |
596 | return temConteudo; | 646 | return temConteudo; |
597 | - | ||
598 | -/* quantidadeElementosDentroLink = link.getAllElements().size(); | ||
599 | - //Verifica se h� elementos como img, span e outros dentro do link. o valor 1 j� est� atribu�do a tag <a> (link avaliado) | ||
600 | - if(quantidadeElementosDentroLink > 1) | ||
601 | - { | ||
602 | - temConteudo = true; | ||
603 | - } | ||
604 | - //avalia apenas o texto dentro do link ex: <a>texto entre o link</a> | ||
605 | - else | ||
606 | - { | ||
607 | - temConteudo = StringUtils.isNotBlank(link.getContent().getTextExtractor().toString()); | ||
608 | - } | ||
609 | - return temConteudo;*/ | 647 | + |
648 | + /* | ||
649 | + * quantidadeElementosDentroLink = link.getAllElements().size(); | ||
650 | + * //Verifica se h� elementos como img, span e outros dentro do link. o | ||
651 | + * valor 1 j� est� atribu�do a tag <a> (link avaliado) | ||
652 | + * if(quantidadeElementosDentroLink > 1) { temConteudo = true; } | ||
653 | + * //avalia apenas o texto dentro do link ex: <a>texto entre o link</a> | ||
654 | + * else { temConteudo = | ||
655 | + * StringUtils.isNotBlank(link.getContent().getTextExtractor | ||
656 | + * ().toString()); } return temConteudo; | ||
657 | + */ | ||
610 | } | 658 | } |
611 | 659 | ||
612 | private boolean hasTitle(Element link) { | 660 | private boolean hasTitle(Element link) { |
@@ -614,25 +662,21 @@ public class ContentEvaluation extends Evaluation { | @@ -614,25 +662,21 @@ public class ContentEvaluation extends Evaluation { | ||
614 | return title != null && StringUtils.isNotBlank(title); | 662 | return title != null && StringUtils.isNotBlank(title); |
615 | } | 663 | } |
616 | 664 | ||
617 | - | ||
618 | - | ||
619 | private boolean hasLinkComImgWithoutAlt(Element link) { | 665 | private boolean hasLinkComImgWithoutAlt(Element link) { |
620 | - | 666 | + |
621 | boolean temImgSemAlt = false; | 667 | boolean temImgSemAlt = false; |
622 | - | ||
623 | - String alt; | ||
624 | - | ||
625 | - for (Element elementoImagem : link.getAllElements("img")) { | ||
626 | - | ||
627 | - alt = elementoImagem.getAttributeValue("alt"); | ||
628 | - temImgSemAlt = (alt == null || StringUtils.isBlank(alt)); | ||
629 | - if(!temImgSemAlt) | ||
630 | - { | 668 | + |
669 | + String alt; | ||
670 | + | ||
671 | + for (Element elementoImagem : link.getAllElements("img")) { | ||
672 | + | ||
673 | + alt = elementoImagem.getAttributeValue("alt"); | ||
674 | + temImgSemAlt = (alt == null || StringUtils.isBlank(alt)); | ||
675 | + if (!temImgSemAlt) { | ||
631 | break; | 676 | break; |
632 | - } | ||
633 | } | 677 | } |
634 | - | ||
635 | - | 678 | + } |
679 | + | ||
636 | return temImgSemAlt; | 680 | return temImgSemAlt; |
637 | } | 681 | } |
638 | 682 | ||
@@ -643,7 +687,7 @@ public class ContentEvaluation extends Evaluation { | @@ -643,7 +687,7 @@ public class ContentEvaluation extends Evaluation { | ||
643 | String alt = img.getAttributeValue("alt"); | 687 | String alt = img.getAttributeValue("alt"); |
644 | return alt == null || StringUtils.isBlank(alt); | 688 | return alt == null || StringUtils.isBlank(alt); |
645 | } | 689 | } |
646 | - | 690 | + |
647 | private boolean hasLeiaMaisDescription(Element link) { | 691 | private boolean hasLeiaMaisDescription(Element link) { |
648 | String title = link.getAttributeValue("title"); | 692 | String title = link.getAttributeValue("title"); |
649 | String content = link.getContent().getTextExtractor().toString(); | 693 | String content = link.getContent().getTextExtractor().toString(); |
@@ -789,8 +833,7 @@ public class ContentEvaluation extends Evaluation { | @@ -789,8 +833,7 @@ public class ContentEvaluation extends Evaluation { | ||
789 | 833 | ||
790 | String[] parts = null; | 834 | String[] parts = null; |
791 | 835 | ||
792 | - String[] descricoes = { "figura", "imagem", "alt", "descrição", | ||
793 | - "foto" }; | 836 | + String[] descricoes = { "figura", "imagem", "alt", "descrição", "foto" }; |
794 | 837 | ||
795 | for (Element img : getDocument().getAllElements("img")) { | 838 | for (Element img : getDocument().getAllElements("img")) { |
796 | Attribute alt = img.getAttributes().get("alt"); | 839 | Attribute alt = img.getAttributes().get("alt"); |
@@ -863,7 +906,7 @@ public class ContentEvaluation extends Evaluation { | @@ -863,7 +906,7 @@ public class ContentEvaluation extends Evaluation { | ||
863 | "3.6", | 906 | "3.6", |
864 | false, | 907 | false, |
865 | imgA.toString(), | 908 | imgA.toString(), |
866 | - imgA, "7"));//"5")); | 909 | + imgA, "7"));// "5")); |
867 | aMap.put(srcAtt.getValue(), img); | 910 | aMap.put(srcAtt.getValue(), img); |
868 | isVerificado = true; | 911 | isVerificado = true; |
869 | } | 912 | } |
@@ -879,7 +922,7 @@ public class ContentEvaluation extends Evaluation { | @@ -879,7 +922,7 @@ public class ContentEvaluation extends Evaluation { | ||
879 | 922 | ||
880 | if (isVerificado) | 923 | if (isVerificado) |
881 | occurrences.add(buildOccurrence("3.6", false, | 924 | occurrences.add(buildOccurrence("3.6", false, |
882 | - img.toString(), img, "7"));//"5")); | 925 | + img.toString(), img, "7"));// "5")); |
883 | } | 926 | } |
884 | } | 927 | } |
885 | 928 | ||
@@ -905,7 +948,7 @@ public class ContentEvaluation extends Evaluation { | @@ -905,7 +948,7 @@ public class ContentEvaluation extends Evaluation { | ||
905 | if (alt != null && title != null) { | 948 | if (alt != null && title != null) { |
906 | if (title.getValue().equals(alt.getValue())) | 949 | if (title.getValue().equals(alt.getValue())) |
907 | occurrences.add(buildOccurrence("3.6", true, | 950 | occurrences.add(buildOccurrence("3.6", true, |
908 | - img.toString(), img, "8"));//"6")); | 951 | + img.toString(), img, "8"));// "6")); |
909 | } | 952 | } |
910 | } | 953 | } |
911 | 954 | ||
@@ -980,46 +1023,45 @@ public class ContentEvaluation extends Evaluation { | @@ -980,46 +1023,45 @@ public class ContentEvaluation extends Evaluation { | ||
980 | private List<Occurrence> checkRecommendation26() { | 1023 | private List<Occurrence> checkRecommendation26() { |
981 | List<Occurrence> occurrences = new ArrayList<Occurrence>(); | 1024 | List<Occurrence> occurrences = new ArrayList<Occurrence>(); |
982 | boolean temAssociacao = false; | 1025 | boolean temAssociacao = false; |
983 | - | 1026 | + |
984 | for (Element table : getDocument().getAllElements("table")) { | 1027 | for (Element table : getDocument().getAllElements("table")) { |
985 | // Attribute summary = table.getAttributes().get("summary"); | 1028 | // Attribute summary = table.getAttributes().get("summary"); |
986 | 1029 | ||
987 | temAssociacao = false; | 1030 | temAssociacao = false; |
988 | - | ||
989 | - if(table.getAllElements("thead").size() > 0 && table.getAllElements("tbody").size() > 0) | ||
990 | - { | 1031 | + |
1032 | + if (table.getAllElements("thead").size() > 0 | ||
1033 | + && table.getAllElements("tbody").size() > 0) { | ||
991 | temAssociacao = true; | 1034 | temAssociacao = true; |
992 | - } | ||
993 | - else | ||
994 | - { | 1035 | + } else { |
995 | for (Element coluna : table.getAllElements("td")) { | 1036 | for (Element coluna : table.getAllElements("td")) { |
996 | - if(coluna.getAttributes().get("id") != null || coluna.getAttributes().get("headers") != null || | ||
997 | - coluna.getAttributes().get("scope") != null || coluna.getAttributes().get("axis") != null) | ||
998 | - { | 1037 | + if (coluna.getAttributes().get("id") != null |
1038 | + || coluna.getAttributes().get("headers") != null | ||
1039 | + || coluna.getAttributes().get("scope") != null | ||
1040 | + || coluna.getAttributes().get("axis") != null) { | ||
999 | temAssociacao = true; | 1041 | temAssociacao = true; |
1000 | } | 1042 | } |
1001 | - | 1043 | + |
1002 | } | 1044 | } |
1003 | - if(!temAssociacao) | ||
1004 | - { | 1045 | + if (!temAssociacao) { |
1005 | for (Element coluna : table.getAllElements("th")) { | 1046 | for (Element coluna : table.getAllElements("th")) { |
1006 | - if(coluna.getAttributes().get("id") != null || coluna.getAttributes().get("headers") != null || | ||
1007 | - coluna.getAttributes().get("scope") != null || coluna.getAttributes().get("axis") != null) | ||
1008 | - { | 1047 | + if (coluna.getAttributes().get("id") != null |
1048 | + || coluna.getAttributes().get("headers") != null | ||
1049 | + || coluna.getAttributes().get("scope") != null | ||
1050 | + || coluna.getAttributes().get("axis") != null) { | ||
1009 | temAssociacao = true; | 1051 | temAssociacao = true; |
1010 | } | 1052 | } |
1011 | - | 1053 | + |
1012 | } | 1054 | } |
1013 | } | 1055 | } |
1014 | } | 1056 | } |
1015 | - | ||
1016 | - if(!temAssociacao) | ||
1017 | - { | ||
1018 | - occurrences.add(this.buildOccurrence("3.10", true, table.getStartTag().toString(), table, "1")); | 1057 | + |
1058 | + if (!temAssociacao) { | ||
1059 | + occurrences.add(this.buildOccurrence("3.10", true, table | ||
1060 | + .getStartTag().toString(), table, "1")); | ||
1019 | } | 1061 | } |
1020 | - | 1062 | + |
1021 | } | 1063 | } |
1022 | - | 1064 | + |
1023 | // Sorting | 1065 | // Sorting |
1024 | Collections.sort(occurrences, new Comparator<Occurrence>() { | 1066 | Collections.sort(occurrences, new Comparator<Occurrence>() { |
1025 | public int compare(Occurrence occurrence1, Occurrence occurrence2) { | 1067 | public int compare(Occurrence occurrence1, Occurrence occurrence2) { |
@@ -1049,9 +1091,11 @@ public class ContentEvaluation extends Evaluation { | @@ -1049,9 +1091,11 @@ public class ContentEvaluation extends Evaluation { | ||
1049 | * paragrafo.toString(), paragrafo, "1")); } | 1091 | * paragrafo.toString(), paragrafo, "1")); } |
1050 | */ | 1092 | */ |
1051 | 1093 | ||
1052 | - /*if (paragrafo.getContent().length() > 1000) | ||
1053 | - occurrences.add(this.buildOccurrence("3.11", false, | ||
1054 | - paragrafo.toString(), paragrafo, "1"));*/ //comentado por Gibran | 1094 | + /* |
1095 | + * if (paragrafo.getContent().length() > 1000) | ||
1096 | + * occurrences.add(this.buildOccurrence("3.11", false, | ||
1097 | + * paragrafo.toString(), paragrafo, "1")); | ||
1098 | + */// comentado por Gibran | ||
1055 | 1099 | ||
1056 | String align = paragrafo.getAttributeValue("align"); | 1100 | String align = paragrafo.getAttributeValue("align"); |
1057 | if ("justify".equals(align)) | 1101 | if ("justify".equals(align)) |