GoogleDictionary.java 10.1 KB
/*
 * GoogleTranslater.java
 *
 * Created on March 23, 2007, 10:01 PM
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

/*
 * Baseado no language-translate:
 * 
 * http://code.google.com/p/language-translate/
 *
 * 09/12/2009
 *
 */


import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.List;

/*
 * 3rd party Apache Libs
 */
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;

/*
 * 3rd party Jericho libs
 */
import au.id.jericho.lib.html.*;


/**
 * A module for taking an input string along with country codes (source/target)
 * and converts it to the target language.  It uses Google's Language Tool
 * site to do the actual translating.
 *
 * This program was done to simplify and speed up my ability to translate
 * quick message.
 *
 * @author uuklanger
 */
public class GoogleDictionary {

    String paginaHtml = null;

    // =============================================================
    //  Constructors and Init
    // =============================================================


    /**
     * Creates a new instance of GoogleTranslater
     */
    public GoogleDictionary() {
    }

    // =============================================================
    //  Public Methods
    // =============================================================

    
    public void setDictionary(String word, String sourcelang, String targetlang) {
        String s = word;
        try {
            s = URLEncoder.encode(word, "UTF-8");
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(GoogleDictionary.class.getName()).log(Level.SEVERE, null, ex);
        }
        paginaHtml = getUrl("http://www.google.com/dictionary?source=translation&hl=pt&q="+ s + "&langpair=" + sourcelang + "|"   + targetlang);
    }


    public void setTranslate(String frase, String sourcelang, String targetlang) {
        String s = frase;
        try {
            s = URLEncoder.encode(frase, "UTF-8");
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(GoogleDictionary.class.getName()).log(Level.SEVERE, null, ex);
        }
        paginaHtml = getUrl("http://www.google.com/translate_t?hl=en&ie=UTF8&text="       + s + "&langpair=" + sourcelang + "%7C" + targetlang);
    }

    /**
     * translates source string (sin) into a response using the source/target
     * languages.
     *
     * @param sin String in
     * @param sourcelang source language (ISO3166 code)
     * @param targetlang target language (ISO3166 code)
     * @return returns a text string in the target language
     */
    public String getUrl(String url) {
        HttpClient client = new HttpClient();
        HttpMethod method = null;
        String responsebody = null;

        try {
            //System.out.println(url);
            client.getHttpConnectionManager().getParams().setConnectionTimeout(5000);

            method = new GetMethod(url);
            method.setFollowRedirects(true);

            client.executeMethod(method);
            responsebody = method.getResponseBodyAsString();
            method.releaseConnection();
        } catch (HttpException he) {
            System.err.println("Http error connecting to '" + url + "'");
            System.err.println(he.getMessage());

        } catch (IOException ioe){
            System.err.println("Unable to connect to '" + url + "'");

        } catch(Exception ex) {
            System.err.println("EXCEPTION: " + ex.getMessage());
        } // end-try-catch

        return responsebody;
    }

    // =============================================================
    //  Private Methods
    // =============================================================


    /**
     * Using jericho libraries, this will extract the response
     * text from the HTML blob returned from google Language Tools.
     *
     * @param html HTML blob
     * @return Response text
     */
    public int getClassesGramaticais() {
        //if (true) return html;
        String attribute = null;
        Element linkElement = null;
        List linkElements = null;
        String traducao;
        int classes = ClassesGramaticais.UNKNOWN;

        try {
            Source source = new Source(paginaHtml);
            source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
            source.fullSequentialParse();

            linkElements=source.findAllElements(HTMLElementName.SPAN);

            for (Iterator i=linkElements.iterator(); i.hasNext();) {
                linkElement=(Element)i.next();
                attribute=linkElement.getAttributeValue("title");

                if (attribute != null) {
                    if (attribute.equals("Part-of-speech")) {
                        traducao = linkElement.getContent().extractText();
                        if (traducao.compareToIgnoreCase("article")==0)
                            classes |= ClassesGramaticais.ARTIGO;
                        if (traducao.compareToIgnoreCase("adjective")==0)
                            classes |= ClassesGramaticais.ADJETIVO;
                        if (traducao.compareToIgnoreCase("adverb")==0)
                            classes |= ClassesGramaticais.ADVERBIO;
                        if (traducao.compareToIgnoreCase("conjunction")==0)
                            classes |= ClassesGramaticais.CONJUNCAO;
                        if (traducao.compareToIgnoreCase("interjection")==0)
                            classes |= ClassesGramaticais.INTERJEICAO;
                        if (traducao.compareToIgnoreCase("numeral")==0)
                            classes |= ClassesGramaticais.NUMERAL;
                        if (traducao.compareToIgnoreCase("preposition")==0)
                            classes |= ClassesGramaticais.PREPOSICAO;
                        if (traducao.compareToIgnoreCase("pronoun")==0)
                            classes |= ClassesGramaticais.PRONOME;
                        if (traducao.compareToIgnoreCase("noun")==0)
                            classes |= ClassesGramaticais.SUBSTANTIVO;
                        if (traducao.compareToIgnoreCase("auxiliary")==0)
                            classes |= ClassesGramaticais.VERBO;
                        if (traducao.compareToIgnoreCase("verb")==0)
                            classes |= ClassesGramaticais.VERBO;

/* Esses aqui sao unknown
                        if (traducao.compareToIgnoreCase("prefix")==0)
                            traducao = "unk";
                        if (traducao.compareToIgnoreCase("abbreviation")==0)
                            traducao = "unk";
                        if (traducao.compareToIgnoreCase("phrase")==0)
                            traducao = "unk";
                        if (traducao.compareToIgnoreCase("particle")==0)
                            traducao = "unk";
 */
                    } // end-if
                } // end-if
            } // end-for

        } catch(Exception ex) {
            System.err.println("EXCEPTION: " + ex.getMessage());
        } // end-try-catch

        return classes;
    }



    public String getSignificado() {
        //if (true) return html;
        String attribute = null;
        Element linkElement = null;
        List linkElements = null;
        String significado="";

        try {
            Source source = new Source(paginaHtml);
            source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
            source.fullSequentialParse();

            linkElements=source.findAllElements(HTMLElementName.SPAN);

            for (Iterator i=linkElements.iterator(); i.hasNext();) {
                linkElement=(Element)i.next();

                attribute=linkElement.getAttributeValue("title");
                if (attribute != null) {
                    if (attribute.equals("Part-of-speech")) {
                        significado += "\n" + linkElement.getContent().extractText() + "\n";
                    } // end-if
                } // end-if


                attribute=linkElement.getAttributeValue("class");
                if (attribute != null) {
                    if (attribute.equals("dct-tt")) {
                        significado += "\t" + linkElement.getContent().extractText() + "\n";
                    } // end-if
                } // end-if

            } // end-for

        } catch(Exception ex) {
            System.err.println("EXCEPTION: " + ex.getMessage());
        } // end-try-catch

        return significado;
    }


    // =============================================================
    //  TEST MAIN
    // =============================================================


    /**
     * Test main for development
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        GoogleDictionary gt = new GoogleDictionary();

        String palavra = "casa";
        int classes = ClassesGramaticais.UNKNOWN;
        String significado;
        
        gt.setDictionary(palavra,"pt","en");
        classes = gt.getClassesGramaticais();
        significado = gt.getSignificado();


        System.out.print(palavra + " [" + ClassesGramaticais.getClassesGramaticaisStr(classes)+"]\n");
        System.out.println("significado:\n" + significado);

        System.exit(0);
    }




} // end-class