/* * GoogleTranslater.java * * Created on March 23, 2007, 10:01 PM * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * Baseado no language-translate: * * http://code.google.com/p/language-translate/ * * 09/12/2009 * */ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.Iterator; import java.util.List; /* * 3rd party Apache Libs */ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.methods.GetMethod; /* * 3rd party Jericho libs */ import au.id.jericho.lib.html.*; /** * A module for taking an input string along with country codes (source/target) * and converts it to the target language. It uses Google's Language Tool * site to do the actual translating. * * This program was done to simplify and speed up my ability to translate * quick message. * * @author uuklanger */ public class GoogleDictionary { String paginaHtml = null; // ============================================================= // Constructors and Init // ============================================================= /** * Creates a new instance of GoogleTranslater */ public GoogleDictionary() { } // ============================================================= // Public Methods // ============================================================= public void setDictionary(String word, String sourcelang, String targetlang) { String s = word; try { s = URLEncoder.encode(word, "UTF-8"); } catch (UnsupportedEncodingException ex) { Logger.getLogger(GoogleDictionary.class.getName()).log(Level.SEVERE, null, ex); } paginaHtml = getUrl("http://www.google.com/dictionary?source=translation&hl=pt&q="+ s + "&langpair=" + sourcelang + "|" + targetlang); } public void setTranslate(String frase, String sourcelang, String targetlang) { String s = frase; try { s = URLEncoder.encode(frase, "UTF-8"); } catch (UnsupportedEncodingException ex) { Logger.getLogger(GoogleDictionary.class.getName()).log(Level.SEVERE, null, ex); } paginaHtml = getUrl("http://www.google.com/translate_t?hl=en&ie=UTF8&text=" + s + "&langpair=" + sourcelang + "%7C" + targetlang); } /** * translates source string (sin) into a response using the source/target * languages. * * @param sin String in * @param sourcelang source language (ISO3166 code) * @param targetlang target language (ISO3166 code) * @return returns a text string in the target language */ public String getUrl(String url) { HttpClient client = new HttpClient(); HttpMethod method = null; String responsebody = null; try { //System.out.println(url); client.getHttpConnectionManager().getParams().setConnectionTimeout(5000); method = new GetMethod(url); method.setFollowRedirects(true); client.executeMethod(method); responsebody = method.getResponseBodyAsString(); method.releaseConnection(); } catch (HttpException he) { System.err.println("Http error connecting to '" + url + "'"); System.err.println(he.getMessage()); } catch (IOException ioe){ System.err.println("Unable to connect to '" + url + "'"); } catch(Exception ex) { System.err.println("EXCEPTION: " + ex.getMessage()); } // end-try-catch return responsebody; } // ============================================================= // Private Methods // ============================================================= /** * Using jericho libraries, this will extract the response * text from the HTML blob returned from google Language Tools. * * @param html HTML blob * @return Response text */ public int getClassesGramaticais() { //if (true) return html; String attribute = null; Element linkElement = null; List linkElements = null; String traducao; int classes = ClassesGramaticais.UNKNOWN; try { Source source = new Source(paginaHtml); source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr source.fullSequentialParse(); linkElements=source.findAllElements(HTMLElementName.SPAN); for (Iterator i=linkElements.iterator(); i.hasNext();) { linkElement=(Element)i.next(); attribute=linkElement.getAttributeValue("title"); if (attribute != null) { if (attribute.equals("Part-of-speech")) { traducao = linkElement.getContent().extractText(); if (traducao.compareToIgnoreCase("article")==0) classes |= ClassesGramaticais.ARTIGO; if (traducao.compareToIgnoreCase("adjective")==0) classes |= ClassesGramaticais.ADJETIVO; if (traducao.compareToIgnoreCase("adverb")==0) classes |= ClassesGramaticais.ADVERBIO; if (traducao.compareToIgnoreCase("conjunction")==0) classes |= ClassesGramaticais.CONJUNCAO; if (traducao.compareToIgnoreCase("interjection")==0) classes |= ClassesGramaticais.INTERJEICAO; if (traducao.compareToIgnoreCase("numeral")==0) classes |= ClassesGramaticais.NUMERAL; if (traducao.compareToIgnoreCase("preposition")==0) classes |= ClassesGramaticais.PREPOSICAO; if (traducao.compareToIgnoreCase("pronoun")==0) classes |= ClassesGramaticais.PRONOME; if (traducao.compareToIgnoreCase("noun")==0) classes |= ClassesGramaticais.SUBSTANTIVO; if (traducao.compareToIgnoreCase("auxiliary")==0) classes |= ClassesGramaticais.VERBO; if (traducao.compareToIgnoreCase("verb")==0) classes |= ClassesGramaticais.VERBO; /* Esses aqui sao unknown if (traducao.compareToIgnoreCase("prefix")==0) traducao = "unk"; if (traducao.compareToIgnoreCase("abbreviation")==0) traducao = "unk"; if (traducao.compareToIgnoreCase("phrase")==0) traducao = "unk"; if (traducao.compareToIgnoreCase("particle")==0) traducao = "unk"; */ } // end-if } // end-if } // end-for } catch(Exception ex) { System.err.println("EXCEPTION: " + ex.getMessage()); } // end-try-catch return classes; } public String getSignificado() { //if (true) return html; String attribute = null; Element linkElement = null; List linkElements = null; String significado=""; try { Source source = new Source(paginaHtml); source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr source.fullSequentialParse(); linkElements=source.findAllElements(HTMLElementName.SPAN); for (Iterator i=linkElements.iterator(); i.hasNext();) { linkElement=(Element)i.next(); attribute=linkElement.getAttributeValue("title"); if (attribute != null) { if (attribute.equals("Part-of-speech")) { significado += "\n" + linkElement.getContent().extractText() + "\n"; } // end-if } // end-if attribute=linkElement.getAttributeValue("class"); if (attribute != null) { if (attribute.equals("dct-tt")) { significado += "\t" + linkElement.getContent().extractText() + "\n"; } // end-if } // end-if } // end-for } catch(Exception ex) { System.err.println("EXCEPTION: " + ex.getMessage()); } // end-try-catch return significado; } // ============================================================= // TEST MAIN // ============================================================= /** * Test main for development * @param args the command line arguments */ public static void main(String[] args) { GoogleDictionary gt = new GoogleDictionary(); String palavra = "casa"; int classes = ClassesGramaticais.UNKNOWN; String significado; gt.setDictionary(palavra,"pt","en"); classes = gt.getClassesGramaticais(); significado = gt.getSignificado(); System.out.print(palavra + " [" + ClassesGramaticais.getClassesGramaticaisStr(classes)+"]\n"); System.out.println("significado:\n" + significado); System.exit(0); } } // end-class