diff --git a/Makefile b/Makefile index 9e38f47..ea98f6f 100644 --- a/Makefile +++ b/Makefile @@ -4,17 +4,18 @@ REVISION=`svn info |grep Rev | sed 2d | cut -d" " -f2` CC=g++ FLAGS=-g -#LIBS= -L libs/jthread/lib -L libs/jcommon/lib -L libs/jsocket/lib -ljthread -lpthread -ljcommon -ljsocket - LIBS= \ - `pkg-config --cflags jlibcpp` \ `pkg-config --libs jlibcpp` \ - `pkg-config --cflags jsoncpp` \ `pkg-config --libs jsoncpp` \ - -lpython2.7 -llavid_base -llavid_io -llavid_net + -lpython2.7 -llavid_base -llavid_io -llavid_net \ + -ljulius -ldl -lsent INCLUDES= \ + `pkg-config --cflags jlibcpp` \ + `pkg-config --cflags jsoncpp` \ -I /usr/include/python2.7 \ + -I /usr/include/julius \ + -I /usr/include/sent \ -I util/src/include \ -I tradutor/src/include \ -I extrator/src/include \ @@ -23,8 +24,7 @@ INCLUDES= \ -I synchronizer/src/include \ -I renderer/src/include \ -I mixer/src/include \ - -I recognize/src/include -#-I libs/jthread/include -I libs/jcommon/include -I libs/jsocket/include -Wall + -I recognize/src/include utilObjs= \ logger.o @@ -58,7 +58,10 @@ mixerObjs= \ recognizeObjs = \ recognize.o \ - recognizeException.o + recognizeException.o \ + wavcut.o \ + audiofile.o \ + recognizer.o utilObjsPre = $(addprefix util/src/,$(utilObjs) ) tradutorObjsPre = $(addprefix tradutor/src/,$(tradutorObjs) ) @@ -79,6 +82,7 @@ OBJECTS = \ $(rendererObjsPre) \ $(mixerObjsPre) \ $(recognizeObjsPre) \ + $(wavcutObjsPre) $(grecognizerObjsPre) \ main.o all: user_config libras @@ -91,7 +95,7 @@ user_config: fi libras: $(OBJECTS) - $(CC) -o vlibras $(OBJECTS) $(LIBS) $(INCLUDES) $(FLAGS) + $(CC) $(INCLUDES) -o vlibras $(OBJECTS) $(LIBS) $(FLAGS) .c.o: $< diff --git a/recognize/src/audiofile.cpp b/recognize/src/audiofile.cpp new file mode 100644 index 0000000..a2a2ed7 --- /dev/null +++ b/recognize/src/audiofile.cpp @@ -0,0 +1,13 @@ +#include "audiofile.h" + +Audiofile::Audiofile(char *_file_path,float _start_seg,float _end_seg){ + + file_path = _file_path; + start_seg = _start_seg; + end_seg = _end_seg; +} + +Audiofile::~Audiofile(){ + + +} \ No newline at end of file diff --git a/recognize/src/include/audiofile.h b/recognize/src/include/audiofile.h new file mode 100644 index 0000000..a95df1c --- /dev/null +++ b/recognize/src/include/audiofile.h @@ -0,0 +1,21 @@ +#ifndef AUDIOFILE_H +#define AUDIOFILE_H + +class Audiofile +{ + +public: + + Audiofile(char *_file_path,float _start_seg,float _end_seg); + ~Audiofile(); + + char* file_path; + float start_seg; + float end_seg; + + + + +}; + +#endif // AUDIOFILE_H \ No newline at end of file diff --git a/recognize/src/include/recognize.h b/recognize/src/include/recognize.h index 6a16d73..8641183 100644 --- a/recognize/src/include/recognize.h +++ b/recognize/src/include/recognize.h @@ -1,3 +1,7 @@ +#ifndef RECOGNIZE_H +#define RECOGNIZE_H + + #include "jthread.h" #include #include @@ -17,13 +21,18 @@ #include "recognizeListener.h" #include "recognizeException.h" +#include "wavcut.h" +#include "recognizer.h" + #define FREQUENCY_PATTERN 22050 #define INPUT_PATTERN 1 /* 1 = Raw file, 2 = Mic */ #define BLOCS_PATTERN 10 #define SIZE_BUFFER 256 #define CONFIDENCE_RATE 0.10 +#define RECOGNIZER_MODE 1 // 0 = Julius, 1 = Google #define PATH_JCONFIG "vlibras_user/vlibras-core/recognize/src/julius.jconf" +#define PATH_WCONFIG "vlibras_user/vlibras-core/recognize/src/wavcut.jconf" #define PATH_AUDIO_ORIGIN "/audio/origin/audio_origin.wav" #define PATH_AUDIO_PARTS "/audio/parts/" #define FILENAME_RECOGNIZED_OUT "/audio/recognized.out" @@ -110,4 +119,9 @@ private: int64_t calcula_pts(double msec); int64_t convert_pts(string pts); + void executeGoogleEngine(); + Jconf* load_config(); + }; + +#endif // RECOGNIZE_H \ No newline at end of file diff --git a/recognize/src/include/recognizer.h b/recognize/src/include/recognizer.h new file mode 100644 index 0000000..b1507c7 --- /dev/null +++ b/recognize/src/include/recognizer.h @@ -0,0 +1,37 @@ +#ifndef RECOGNIZER_H +#define RECOGNIZER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +class Recognizer +{ + +public: + Recognizer(); + ~Recognizer(); + int recognize(string file_in); + string getsentence(); + float getconfidence(); + + +private: + + Jconf *jconf; + Recog *recog; + +}; + +#endif // RECOGNIZER_H diff --git a/recognize/src/include/wavcut.h b/recognize/src/include/wavcut.h new file mode 100644 index 0000000..8865353 --- /dev/null +++ b/recognize/src/include/wavcut.h @@ -0,0 +1,30 @@ +#ifndef WAVCUT_H +#define WAVCUT_H + +#include +#include +#include +#undef min +#undef max +#include +#include "audiofile.h" + + + +using namespace std; +class Wavcut{ + +public: + + Wavcut(char* _pathAudio, char* _outputPath, char* _id); + ~Wavcut(); + + int initialize(Jconf *jconf); + vector list_audio_files(); + int count_lines; + +private: + static int adin_callback_file(SP16 *now, int len, Recog *recog); +}; + +#endif // WAVCUT_H \ No newline at end of file diff --git a/recognize/src/recognize.cpp b/recognize/src/recognize.cpp index 4448583..2e6e8c3 100644 --- a/recognize/src/recognize.cpp +++ b/recognize/src/recognize.cpp @@ -68,14 +68,17 @@ void Recognize::Run(){ finish = false; createDir(); extractAudioFromVideo(); - breakVideoParts(getTimeMediaSec()); - executeJuliusEngine(); - generateConfidence(); - //filterOutputJulius(); - //cleanFiles(); - + if (RECOGNIZER_MODE == 0){ + breakVideoParts(getTimeMediaSec()); + executeJuliusEngine(); + generateConfidence(); + } else { + executeGoogleEngine(); + } + finish = true; - //notifyEndExtraction(count_lines); + notifyEndExtraction(count_lines); + cleanFiles(); } void Recognize::setFrequency(int freq) { @@ -209,7 +212,7 @@ void Recognize::breakVideoParts(int timeTotal) { void Recognize::executeJuliusEngine() { string type, freqStr; - string command = "julius -C "; + string command = "julius -quiet -C "; char cfreq[10]; char* jPath; @@ -292,14 +295,12 @@ void Recognize::generateConfidence() { scores.push_back(avgScores/sizeAvgScores); }else if(pass==0){ - notifyListeners((char*) "SENTENCA COM BAIXA QUALIDADE", 0); - notifyEndExtraction(count_lines); + notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", 0); return; } } while (!in.eof()); in.close(); filterOutputJulius(); - notifyEndExtraction(count_lines); } } @@ -420,3 +421,100 @@ void Recognize::createDir(){ .append(" && mkdir -p ").append(path_contents).append("/").append(id).append("/audio/origin"); system(command.c_str()); } + +void Recognize::executeGoogleEngine() { + jlog_set_output(NULL); + + Wavcut* wavcut; + Recognizer* recog; + + vector audioList; + string file_in = ""; + file_in.append(path_contents).append("/").append(id).append(PATH_AUDIO_ORIGIN); + string path_out= ""; + path_out.append(path_contents).append("/").append(id).append("/"); + char* pathAudio = new char[file_in.size()+1]; + char* outputPath = new char[path_out.size()+1]; + char* _id = new char[id.size()+1]; + strcpy(pathAudio, file_in.c_str()); + strcpy(outputPath, path_out.c_str()); + strcpy(_id, id.c_str()); + + + wavcut = new Wavcut(pathAudio,outputPath,_id); + Jconf *jconf; + jconf = j_config_load_file_new(PATH_WCONFIG); + + if(wavcut->initialize(jconf)<2){ + + wavcut->initialize(load_config()); + } + + audioList = wavcut->list_audio_files(); + + + delete wavcut; + recog = new Recognizer(); + + int ii; + count_lines = 0; + for(ii=0; ii < audioList.size(); ii++) + { + + /*chama o reconhecedor passando cada arquivo de audio*/ + recog->recognize(audioList[ii].file_path); + + /*remove o audio ja reconhecido*/ + remove(audioList[ii].file_path); + + /*segundos do inicio do audio reconhecido*/ + //cout<< calcula_pts(audioList[ii].start_seg) << endl; + + /* texto do audio reconhecido*/ + //cout<< recog->getsentence() << endl; + + /* porcentagem de confiança do reconhecimento (de 0 a 1) */ + //cout<< recog->getconfidence() << endl; + + if(recog->getconfidence() >= confidenceRate) + notifyListeners((char*)recog->getsentence().c_str(), (int64_t)(audioList[ii].start_seg*1000)); + else + notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", (int64_t)(audioList[ii].start_seg*1000)); + if(recog->getconfidence()> 0) + count_lines++; + } + delete recog; + +} + +Jconf* Recognize::load_config(){ + + Jconf* jconf; + jconf = j_jconf_new(); + + char *parametros[12]; + + parametros[1]="-lv"; + parametros[2]="3000"; + parametros[3]="-zc"; + parametros[4]="150"; + parametros[5]="-headmargin"; + parametros[6]="200"; + parametros[7]="-tailmargin"; + parametros[8]="150"; + parametros[9]="-rejectshort"; + parametros[10]="1500"; + //cout << "load_config" << endl; + /* read arguments and set parameters */ + if (j_config_load_args(jconf, 11, parametros) == -1) { + fprintf(stderr, "Error reading arguments\n"); + + } + + jconf->input.type = INPUT_WAVEFORM; + jconf->input.speech_input = SP_RAWFILE; + jconf->detect.silence_cut = 1; + + +return jconf; +} diff --git a/recognize/src/recognizer.cpp b/recognize/src/recognizer.cpp new file mode 100644 index 0000000..74e58af --- /dev/null +++ b/recognize/src/recognizer.cpp @@ -0,0 +1,101 @@ + + + +#include "recognizer.h" +#include + + +static boolean reconhecendo ; +static string sentence; +static float confidence; +string lenguage = "pt-BR"; + +using namespace std; + + +void resultado(Recog *recog, void *dummy); + + + +Recognizer::Recognizer(){ + +} + + +Recognizer::~Recognizer(){ + +} + + + +int Recognizer::recognize(string file_in) +{ + FILE *file; + string cmFinal; + char message[100]; + message[0] = '\0'; + string jsonResult =""; + string vozReconhecida = ""; + int indexLineSrt =0; + stringstream comand; + stringstream index; + + + string fileName = file_in; + + // inicio preparação requisição + + comand << " curl -ss -X POST --data-binary @"; + comand << fileName; + + //requisição para o google speech + comand<< " --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/l16; rate=22050;' 'https://www.google.com/speech-api/v2/recognize?client=chromium&lang="<1){ + jsonResult+= message; + + } + } + //cout<< jsonResult < +// #include + + +int sfreq = 22050; ///< Temporal storage of sample rate +int speechlen; ///< samples of one recorded segments +FILE *fp = NULL; ///< File pointer for WAV output +int sid = 0; ///< current file ID (for SPOUT_FILE) +char *outpath = NULL; ///< work space for output file name formatting +boolean writing_file = FALSE; ///< TRUE if writing to a file +int trigger_sample; +char *pathAudio; +char* outputPath; +char* id_file; +static vector audioList; + + +Wavcut::Wavcut(char* _pathAudio, char* _outputPath, char* _id){ + + pathAudio = _pathAudio; + outputPath = _outputPath; + id_file = _id; + //printf("%s\n","Entrou aquiiiii" ) +} + +Wavcut::~Wavcut(){ + //printf("%s\n","Wavcut finalizado" ); +} + + +int Wavcut::adin_callback_file(SP16 *now, int len, Recog *recog) + { + /* cria novo arquivo wav para salvar o audio sem silencio*/ + if (speechlen == 0) { + + sprintf(outpath, "%s%d.wav",outputPath, sid); + + if (access(outpath, F_OK) == 0) { + if (access(outpath, W_OK) != 0) { + return(-1); + } + + } + + if ((fp = wrwav_open(outpath, sfreq)) != NULL) { + //fprintf(stderr, "novo arquivo\n"); + }else{ + return -1; + } + writing_file = TRUE; + } + + /* write recorded sample to file */ + if (wrwav_data(fp, &(now[0]), len) == FALSE) { + return -1; + } + + /* accumulate sample num of this segment */ +speechlen += len; + +return(0); +} + +//acumula o tempo de cada segmento +void registra_tempo(Recog *recog, void *data) +{ + trigger_sample = recog->adin->last_trigger_sample; +} + +boolean close_files() +{ + if (writing_file) { + + if (wrwav_close(fp) == FALSE) { + fprintf(stderr, "adinrec: failed to close file\n"); + return FALSE; + } + char* fileout=(char *)mymalloc(100);; + // sprintf(fileout,*outpath); + sprintf(fileout, outpath); + audioList.push_back(Audiofile(fileout,(float)trigger_sample / (float)sfreq, + (float)(trigger_sample + speechlen) / (float)sfreq)); + + writing_file = FALSE; +} + +return TRUE; +} + + + int Wavcut::initialize(Jconf *jconf) { + + sid = 0; + audioList.clear(); + //Jconf *jconf; + Recog *recog; + + int ret; + boolean is_continues; + + /* cria instancia do reconhecedor */ + recog = j_recog_new(); + /* carrega as configurações contidas no jconfig */ + // jconf = j_config_load_file_new("/home/ezequiel/speech-recognizer/wavcut.jconf"); + + jconf->input.sfreq = sfreq; + /*adciona a configuração ao reconhecedor*/ + recog->jconf = jconf; + + outpath = (char *)mymalloc(256); + +/*registra calback do contador de tempo*/ + callback_add(recog, CALLBACK_EVENT_SPEECH_START, registra_tempo, NULL); + + /*Inicializa o reconhecedor*/ + if (j_adin_init(recog) == FALSE) { + fprintf(stderr, "Error in initializing adin device\n"); + return 0; + } + + /*Abre o quivo de áudio para ser cortado*/ + if(j_open_stream(recog,pathAudio) == -2) + return sid; + + /* loop de detecção de voz*/ + + do { + + speechlen = 0; + + ret = adin_go(adin_callback_file, NULL, recog); + + + switch(ret) { + case -1: /* device read error or callback error */ + //fprintf(stderr, "[error]\n"); + break; + case 0: /* reached to end of input */ + //fprintf(stderr, "[eof]\n"); + return sid; + break; + default: + break; + } + + if (ret == -1) { + /* error in input device or callback function, so terminate program here */ + return sid; + } + /* um intervalo de silencio detectado */ + + if (close_files() == FALSE) + return sid; + + /* incremento do contador de partes cortadas */ + + + sid++; + + is_continues = FALSE; + if (ret > 0 || ret == -2) { + is_continues = TRUE; + } + + } while (is_continues); + + /*Quando termina de ler todo áudio finaliza*/ + adin_end(recog->adin); + + return sid; + +} + +vector Wavcut::list_audio_files(){ + + + return audioList; +} + + + + + + + diff --git a/recognize/src/wavcut.jconf b/recognize/src/wavcut.jconf new file mode 100644 index 0000000..7983a9a --- /dev/null +++ b/recognize/src/wavcut.jconf @@ -0,0 +1,24 @@ +-smpFreq 22050 +-lv 1000 +-zc 60 +-headmargin 200 +-tailmargin 150 +-rejectshort 1500 +-input rawfile +-cutsilence + + + +#-smpFreq 22050 +#-lv 3000 +#-zc 150 +#-headmargin 200 +#-tailmargin 150 +#-rejectshort 1500 +#-input rawfile +#-cutsilence + + + + + diff --git a/servico/src/include/serviceWindowGeneration.h b/servico/src/include/serviceWindowGeneration.h index 0b4e371..170399b 100644 --- a/servico/src/include/serviceWindowGeneration.h +++ b/servico/src/include/serviceWindowGeneration.h @@ -15,7 +15,7 @@ #include "listenerTradutor.h" #include "tradutorPortGlosa.h" #include "serviceException.h" -#include +#include #include #define DEVELOPER "devel" @@ -26,7 +26,6 @@ #define PATH_CONF_FILE "vlibras_user/.vlibras-config/params.json" #define MAX_SIZE_PATH 256 -using namespace Json; using namespace Tradutor; using namespace jthread; using namespace std; @@ -39,8 +38,8 @@ protected: Renderer* renderer; Mixer* mixer; - Value root; - Reader reader; + Json::Value root; + Json::Reader reader; vector* vetor_pts; bool finish; -- libgit2 0.21.2