Adiciona reconhecimento de voz pelo Google [DEFAULT]

Erickson Silva
1 parent b886dc37
Showing 11 changed files with 552 additions and 24 deletions Show diff stats
Makefile
recognize/src/audiofile.cpp
recognize/src/include/audiofile.h
recognize/src/include/recognize.h
recognize/src/include/recognizer.h
recognize/src/include/wavcut.h
recognize/src/recognize.cpp
recognize/src/recognizer.cpp
recognize/src/wavcut.cpp
recognize/src/wavcut.jconf
servico/src/include/serviceWindowGeneration.h
@@ -4,17 +4,18 @@ REVISION=`svn info |grep Rev | sed 2d | cut -d&quot; &quot; -f2`
 CC=g++
 FLAGS=-g
-#LIBS= -L libs/jthread/lib -L libs/jcommon/lib -L libs/jsocket/lib -ljthread -lpthread -ljcommon -ljsocket
-
 LIBS= \
-	`pkg-config --cflags jlibcpp` \
 	`pkg-config --libs jlibcpp` \
-	`pkg-config --cflags jsoncpp` \
 	`pkg-config --libs jsoncpp` \
-	-lpython2.7 -llavid_base -llavid_io -llavid_net
+	-lpython2.7 -llavid_base -llavid_io -llavid_net \
+	-ljulius -ldl -lsent
 INCLUDES= \
+	`pkg-config --cflags jlibcpp` \
+	`pkg-config --cflags jsoncpp` \
  	-I /usr/include/python2.7 \
+ 	-I /usr/include/julius \
+ 	-I /usr/include/sent \
  	-I util/src/include \
  	-I tradutor/src/include \
  	-I extrator/src/include \
@@ -23,8 +24,7 @@ INCLUDES= \
  	-I synchronizer/src/include \
  	-I renderer/src/include \
  	-I mixer/src/include \
- 	-I recognize/src/include
-#-I libs/jthread/include -I libs/jcommon/include -I libs/jsocket/include -Wall
+ 	-I recognize/src/include 
 utilObjs= \
 	logger.o
@@ -58,7 +58,10 @@ mixerObjs= \
 recognizeObjs = \
 	recognize.o \
- 	recognizeException.o
+ 	recognizeException.o \
+ 	wavcut.o \
+ 	audiofile.o \
+ 	recognizer.o
 utilObjsPre  = $(addprefix util/src/,$(utilObjs) )
 tradutorObjsPre = $(addprefix tradutor/src/,$(tradutorObjs) )
@@ -79,6 +82,7 @@ OBJECTS = \
  	$(rendererObjsPre) \
  	$(mixerObjsPre) \
  	$(recognizeObjsPre) \
+ 	$(wavcutObjsPre) $(grecognizerObjsPre) \
  	main.o
 all: user_config libras
@@ -91,7 +95,7 @@ user_config:
 	fi
 libras: $(OBJECTS)
-	$(CC) -o vlibras $(OBJECTS) $(LIBS) $(INCLUDES) $(FLAGS)
+	$(CC) $(INCLUDES) -o vlibras $(OBJECTS) $(LIBS) $(FLAGS)
 .c.o: $<
@@ -0,0 +1,13 @@
+#include "audiofile.h"
+
+Audiofile::Audiofile(char *_file_path,float _start_seg,float _end_seg){
+
+	 file_path = _file_path;
+	 start_seg = _start_seg;
+	 end_seg = _end_seg;
+}
+
+Audiofile::~Audiofile(){
+	
+
+}
 \ No newline at end of file
@@ -0,0 +1,21 @@
+#ifndef AUDIOFILE_H
+#define AUDIOFILE_H
+
+class Audiofile
+{
+
+public:
+	
+	Audiofile(char *_file_path,float _start_seg,float _end_seg);
+	~Audiofile();
+	
+	char* file_path;
+	float start_seg;
+	float end_seg;
+
+
+	
+	 
+};
+
+#endif // AUDIOFILE_H
 \ No newline at end of file
+#ifndef RECOGNIZE_H
+#define RECOGNIZE_H
+
+
 #include "jthread.h"
 #include <iostream>
 #include <stdlib.h>
@@ -17,13 +21,18 @@
 #include "recognizeListener.h"
 #include "recognizeException.h"
+#include "wavcut.h"
+#include "recognizer.h"
+
 #define FREQUENCY_PATTERN 22050
 #define INPUT_PATTERN 1 /* 1 = Raw file, 2 = Mic */
 #define BLOCS_PATTERN 10
 #define SIZE_BUFFER 256
 #define CONFIDENCE_RATE 0.10
+#define RECOGNIZER_MODE 1 // 0 = Julius, 1 = Google
 #define PATH_JCONFIG "vlibras_user/vlibras-core/recognize/src/julius.jconf"
+#define PATH_WCONFIG "vlibras_user/vlibras-core/recognize/src/wavcut.jconf"
 #define PATH_AUDIO_ORIGIN "/audio/origin/audio_origin.wav"
 #define PATH_AUDIO_PARTS "/audio/parts/"
 #define FILENAME_RECOGNIZED_OUT "/audio/recognized.out"
@@ -110,4 +119,9 @@ private:
 	int64_t calcula_pts(double msec);
 	int64_t convert_pts(string pts);
+	void executeGoogleEngine();
+	Jconf* load_config();
+
 };
+
+#endif // RECOGNIZE_H
 \ No newline at end of file
@@ -0,0 +1,37 @@
+#ifndef RECOGNIZER_H
+#define RECOGNIZER_H
+
+#include <julius/juliuslib.h>
+#include <iostream>
+#include <sstream>
+#include <sys/stat.h>
+#include <fstream>
+#include <iostream>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <list>
+#include <stdint.h>
+
+using namespace std;
+
+class Recognizer
+{
+
+public:
+	Recognizer();
+	~Recognizer();
+	int recognize(string file_in);
+	string getsentence();
+	float getconfidence();
+
+	
+private:
+
+	Jconf *jconf;
+  	Recog *recog;
+  	
+};
+
+#endif // RECOGNIZER_H
@@ -0,0 +1,30 @@
+#ifndef WAVCUT_H
+#define WAVCUT_H
+
+#include <julius/juliuslib.h>
+#include <string>
+#include <algorithm>
+#undef min
+#undef max
+#include <vector>
+#include "audiofile.h"
+
+
+
+using namespace std;
+class Wavcut{
+
+public:	
+		
+	Wavcut(char* _pathAudio, char* _outputPath,  char* _id);
+	~Wavcut();
+
+	int initialize(Jconf *jconf);
+	vector<Audiofile> list_audio_files();
+	int count_lines;
+	
+private:
+	static int adin_callback_file(SP16 *now, int len, Recog *recog);
+}; 
+
+#endif // WAVCUT_H
 \ No newline at end of file
@@ -68,14 +68,17 @@ void Recognize::Run(){
 	finish = false;
 	createDir();
 	extractAudioFromVideo();
-	breakVideoParts(getTimeMediaSec());
-	executeJuliusEngine();
-	generateConfidence();
-	//filterOutputJulius();
-	//cleanFiles();
-
+	if (RECOGNIZER_MODE == 0){
+		breakVideoParts(getTimeMediaSec());
+		executeJuliusEngine();
+		generateConfidence();
+	} else {
+		executeGoogleEngine();
+	}
+	
 	finish = true;
-	//notifyEndExtraction(count_lines);
+	notifyEndExtraction(count_lines);
+	cleanFiles();
 }
 void Recognize::setFrequency(int freq) {
@@ -209,7 +212,7 @@ void Recognize::breakVideoParts(int timeTotal) {
 void Recognize::executeJuliusEngine() {	
 	string type, freqStr;
-	string command = "julius -C ";
+	string command = "julius -quiet -C ";
 	char cfreq[10];
 	char* jPath;
@@ -292,14 +295,12 @@ void Recognize::generateConfidence() {
          		scores.push_back(avgScores/sizeAvgScores);
 			}else if(pass==0){
-				notifyListeners((char*) "SENTENCA COM BAIXA QUALIDADE", 0);
-				notifyEndExtraction(count_lines);
+				notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", 0);
 				return;
 			}
 		} while (!in.eof());
 		in.close();
 		filterOutputJulius();
-		notifyEndExtraction(count_lines);
 	}
 }
@@ -420,3 +421,100 @@ void Recognize::createDir(){
 	.append(" && mkdir -p ").append(path_contents).append("/").append(id).append("/audio/origin");
 	system(command.c_str());
 }
+
+void Recognize::executeGoogleEngine() {
+	jlog_set_output(NULL);
+		
+	Wavcut* wavcut;
+	Recognizer* recog;
+	
+	vector<Audiofile> audioList;
+	string file_in = "";
+	file_in.append(path_contents).append("/").append(id).append(PATH_AUDIO_ORIGIN);
+	string path_out= "";
+	path_out.append(path_contents).append("/").append(id).append("/");
+	char* pathAudio = new char[file_in.size()+1];
+	char* outputPath = new char[path_out.size()+1];
+	char* _id = new char[id.size()+1];
+	strcpy(pathAudio, file_in.c_str());
+	strcpy(outputPath, path_out.c_str());
+	strcpy(_id, id.c_str());
+	
+
+	wavcut =  new Wavcut(pathAudio,outputPath,_id);
+	Jconf *jconf;
+	jconf = j_config_load_file_new(PATH_WCONFIG);
+
+	if(wavcut->initialize(jconf)<2){
+		
+		wavcut->initialize(load_config());
+	}
+	
+	audioList = wavcut->list_audio_files();
+	
+	
+	delete wavcut;
+	recog = new Recognizer();
+
+	int ii;
+	count_lines = 0;
+	   for(ii=0; ii < audioList.size(); ii++)
+	   {	
+
+	   	/*chama o reconhecedor passando cada arquivo de audio*/
+	   	 recog->recognize(audioList[ii].file_path);
+
+	   	 /*remove o audio ja reconhecido*/
+	   	 remove(audioList[ii].file_path);
+
+	   	/*segundos do inicio do audio reconhecido*/
+	   	//cout<< calcula_pts(audioList[ii].start_seg) << endl;
+
+	   	 /* texto do audio reconhecido*/
+	     //cout<< recog->getsentence() << endl;
+	      
+	      /* porcentagem de confiança do reconhecimento (de 0 a 1) */
+	     //cout<< recog->getconfidence() << endl;
+	    
+	     if(recog->getconfidence() >= confidenceRate)
+			notifyListeners((char*)recog->getsentence().c_str(), (int64_t)(audioList[ii].start_seg*1000));
+		else
+			notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", (int64_t)(audioList[ii].start_seg*1000));
+		if(recog->getconfidence()> 0)
+			count_lines++;
+	   }
+		delete recog;
+	
+}
+
+Jconf* Recognize::load_config(){
+
+	Jconf* jconf;
+	jconf = j_jconf_new();
+  
+	char *parametros[12];
+
+	parametros[1]="-lv";
+  	parametros[2]="3000";
+  	parametros[3]="-zc";
+  	parametros[4]="150";
+  	parametros[5]="-headmargin";
+  	parametros[6]="200";
+  	parametros[7]="-tailmargin";
+  	parametros[8]="150";
+  	parametros[9]="-rejectshort";
+  	parametros[10]="1500";
+  	//cout << "load_config" << endl;
+  	/* read arguments and set parameters */
+  if (j_config_load_args(jconf, 11, parametros) == -1) {
+    fprintf(stderr, "Error reading arguments\n");
+    
+  }
+
+	jconf->input.type = INPUT_WAVEFORM;
+	jconf->input.speech_input = SP_RAWFILE;
+	jconf->detect.silence_cut = 1;
+
+  
+return jconf;
+}
@@ -0,0 +1,101 @@
+
+
+
+#include "recognizer.h"
+#include <json/json.h>
+
+
+static boolean reconhecendo ;
+static string sentence;
+static float confidence;
+string lenguage = "pt-BR";
+
+using namespace std;
+
+
+void resultado(Recog *recog, void *dummy);
+
+
+
+Recognizer::Recognizer(){
+ 
+}
+
+
+Recognizer::~Recognizer(){
+  
+}
+
+
+
+int Recognizer::recognize(string file_in)
+{
+   FILE *file;
+  string cmFinal;
+  char message[100];
+  message[0] = '\0';
+  string jsonResult ="";
+  string vozReconhecida = "";
+  int indexLineSrt =0;
+  stringstream comand;
+  stringstream index;
+
+
+      string fileName = file_in;
+     
+      // inicio preparação requisição
+      
+      comand << " curl -ss -X POST --data-binary @";
+      comand << fileName; 
+
+      //requisição para o google speech
+      comand<< " --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/l16; rate=22050;' 'https://www.google.com/speech-api/v2/recognize?client=chromium&lang="<<lenguage<<"&maxresults=1&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw'";
+      //AIzaSyBeeYW4l2OuCwiUfzBaUXXeWAO6Uy-u0F8'";
+      //public key Ezequiel project 1
+      //AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw'";
+      //fim
+      cmFinal = comand.str();
+      file = popen(cmFinal.c_str(),"r");
+      if(file == NULL)
+                printf("ERROR\n");
+              int vez =0;
+      while (fgets(message, sizeof (message), file)) 
+      {   
+        vez++;
+        
+        if(vez>1){ 
+          jsonResult+= message;
+          
+        } 
+      }
+      //cout<< jsonResult <<endl<<endl;
+    Json::Value root;
+    Json::Reader reader;
+    bool parsingSuccessful = reader.parse(jsonResult, root);
+   // cout << root["result"]<<endl;
+    confidence = 0.0;
+    sentence = "";
+    if (parsingSuccessful)
+    {
+        sentence = root["result"][0]["alternative"][0]["transcript"].asString();
+        confidence = root["result"][0]["alternative"][0]["confidence"].asFloat();
+    }
+
+    
+      fclose(file);
+  
+      jsonResult = "";
+      comand.str("");
+
+      return 1;
+}
+    
+
+float Recognizer::getconfidence(){
+  return confidence;
+}
+string Recognizer::getsentence(){
+ // printf("palavra %s\n",sentence.c_str() );
+  return sentence;
+}
+
@@ -0,0 +1,187 @@
+#include "wavcut.h"
+
+#include <vector>
+// #include <iostream>
+
+
+int sfreq = 22050;		///< Temporal storage of sample rate
+int speechlen;		///< samples of one recorded segments
+FILE *fp = NULL;		///< File pointer for WAV output
+int sid = 0;		///< current file ID (for SPOUT_FILE)
+char *outpath = NULL;	///< work space for output file name formatting
+boolean writing_file = FALSE; ///< TRUE if writing to a file
+int trigger_sample;
+char *pathAudio;
+char* outputPath;
+char* id_file;
+static vector<Audiofile> audioList;
+
+
+Wavcut::Wavcut(char* _pathAudio, char* _outputPath,  char* _id){
+
+	pathAudio = _pathAudio;
+	outputPath = _outputPath;
+	id_file = _id;
+	//printf("%s\n","Entrou aquiiiii" )
+}
+
+Wavcut::~Wavcut(){
+	//printf("%s\n","Wavcut finalizado" );
+}
+
+
+int Wavcut::adin_callback_file(SP16 *now, int len, Recog *recog)
+ {
+	  /* cria novo arquivo wav para salvar o audio sem silencio*/
+	if (speechlen == 0) {
+	  
+	     sprintf(outpath, "%s%d.wav",outputPath, sid);
+	
+		 if (access(outpath, F_OK) == 0) {
+		 	 if (access(outpath, W_OK) != 0) {
+		   		return(-1);
+		 	} 
+       
+        }
+
+	  	if ((fp = wrwav_open(outpath, sfreq)) != NULL) {
+	  	 	//fprintf(stderr, "novo arquivo\n");
+	    }else{
+	   		return -1;
+	 	}
+		writing_file = TRUE;
+	}
+ 
+  /* write recorded sample to file */
+  if (wrwav_data(fp, &(now[0]), len) == FALSE) {
+    return -1;
+  }
+
+  /* accumulate sample num of this segment */
+speechlen += len;
+
+return(0);
+}
+
+//acumula o tempo de cada segmento
+void registra_tempo(Recog *recog, void *data)
+{
+  trigger_sample = recog->adin->last_trigger_sample;
+}
+
+boolean close_files()
+{
+  if (writing_file) {
+ 
+    if (wrwav_close(fp) == FALSE) {
+     fprintf(stderr, "adinrec: failed to close file\n");
+     return FALSE;
+   }
+   char* fileout=(char *)mymalloc(100);;
+  // sprintf(fileout,*outpath);
+   sprintf(fileout, outpath);
+  audioList.push_back(Audiofile(fileout,(float)trigger_sample / (float)sfreq,
+    (float)(trigger_sample + speechlen) / (float)sfreq));
+  
+ writing_file = FALSE;
+}
+
+return TRUE;
+}  
+
+
+ int Wavcut::initialize(Jconf *jconf) {
+	
+  sid = 0;
+  audioList.clear();
+  //Jconf *jconf;
+  Recog *recog;
+  
+  int ret;
+  boolean is_continues;
+
+  /* cria instancia do reconhecedor */
+  recog = j_recog_new();
+  /* carrega as configurações contidas no jconfig */
+ // jconf = j_config_load_file_new("/home/ezequiel/speech-recognizer/wavcut.jconf");
+  
+	jconf->input.sfreq = sfreq;
+  /*adciona a configuração ao reconhecedor*/
+  recog->jconf = jconf;
+
+  outpath = (char *)mymalloc(256);
+
+/*registra calback do contador de tempo*/
+  callback_add(recog, CALLBACK_EVENT_SPEECH_START, registra_tempo, NULL);
+
+  	/*Inicializa o reconhecedor*/
+	if (j_adin_init(recog) == FALSE) {
+  		fprintf(stderr, "Error in initializing adin device\n");
+  	return 0;
+	} 
+
+	/*Abre o quivo de áudio para ser cortado*/
+  	if(j_open_stream(recog,pathAudio) == -2)
+   		return sid;
+    
+    /* loop de detecção de voz*/
+
+  do {
+  
+   speechlen = 0;
+
+   ret = adin_go(adin_callback_file, NULL, recog);
+
+  
+   switch(ret) {
+      case -1:		     /* device read error or callback error */
+     //fprintf(stderr, "[error]\n");
+     break;
+      case 0:			/* reached to end of input */
+     //fprintf(stderr, "[eof]\n");
+     return sid;
+     break;
+      default:	
+     break;
+   }
+
+   if (ret == -1) {
+	/* error in input device or callback function, so terminate program here */
+     return sid;
+   }
+      /* um intervalo de silencio detectado */
+   
+     if (close_files() == FALSE) 
+      return sid;
+      
+      /* incremento do contador de partes cortadas */
+    
+    
+	 sid++;
+
+	is_continues = FALSE;
+	if (ret > 0 || ret == -2) {
+		is_continues = TRUE;
+	}
+
+   } while (is_continues); 
+
+  /*Quando termina de ler todo áudio finaliza*/
+  adin_end(recog->adin);
+
+  return sid;
+
+}
+
+vector<Audiofile> Wavcut::list_audio_files(){
+
+
+  return audioList;
+}
+
+
+
+
+
+ 
+
@@ -0,0 +1,24 @@
+-smpFreq 22050
+-lv 1000
+-zc 60
+-headmargin 200
+-tailmargin 150
+-rejectshort 1500
+-input rawfile	
+-cutsilence
+
+
+
+#-smpFreq 22050
+#-lv 3000
+#-zc 150
+#-headmargin 200
+#-tailmargin 150
+#-rejectshort 1500
+#-input rawfile	
+#-cutsilence
+
+
+
+
+
@@ -15,7 +15,7 @@
 #include "listenerTradutor.h"
 #include "tradutorPortGlosa.h"
 #include "serviceException.h"
-#include <json/json.h>
+#include <json/json.h>	
 #include <lavidlib/base/RuntimeException.h>
 #define DEVELOPER "devel"
@@ -26,7 +26,6 @@
 #define PATH_CONF_FILE "vlibras_user/.vlibras-config/params.json"
 #define MAX_SIZE_PATH 256
-using namespace Json;
 using namespace Tradutor;
 using namespace jthread;
 using namespace std;
@@ -39,8 +38,8 @@ protected:
 	Renderer* renderer;
 	Mixer* mixer;
-	Value root;
-	Reader reader;
+	Json::Value root;
+	Json::Reader reader;
 	vector<int64_t>* vetor_pts;
 	bool finish;
	@@ -0,0 +1,13 @@		@@ -0,0 +1,13 @@
		1	+#include "audiofile.h"
		2	+
		3	+Audiofile::Audiofile(char *_file_path,float _start_seg,float _end_seg){
		4	+
		5	+ file_path = _file_path;
		6	+ start_seg = _start_seg;
		7	+ end_seg = _end_seg;
		8	+}
		9	+
		10	+Audiofile::~Audiofile(){
		11	+
		12	+
		13	+}
0	\ No newline at end of file	14	\ No newline at end of file
	@@ -0,0 +1,21 @@		@@ -0,0 +1,21 @@
		1	+#ifndef AUDIOFILE_H
		2	+#define AUDIOFILE_H
		3	+
		4	+class Audiofile
		5	+{
		6	+
		7	+public:
		8	+
		9	+ Audiofile(char *_file_path,float _start_seg,float _end_seg);
		10	+ ~Audiofile();
		11	+
		12	+ char* file_path;
		13	+ float start_seg;
		14	+ float end_seg;
		15	+
		16	+
		17	+
		18	+
		19	+};
		20	+
		21	+#endif // AUDIOFILE_H
0	\ No newline at end of file	22	\ No newline at end of file
@@ -0,0 +1,37 @@		@@ -0,0 +1,37 @@
	1	+#ifndef RECOGNIZER_H
	2	+#define RECOGNIZER_H
	3	+
	4	+#include <julius/juliuslib.h>
	5	+#include <iostream>
	6	+#include <sstream>
	7	+#include <sys/stat.h>
	8	+#include <fstream>
	9	+#include <iostream>
	10	+#include <string.h>
	11	+#include <stdio.h>
	12	+#include <stdlib.h>
	13	+#include <string>
	14	+#include <list>
	15	+#include <stdint.h>
	16	+
	17	+using namespace std;
	18	+
	19	+class Recognizer
	20	+{
	21	+
	22	+public:
	23	+ Recognizer();
	24	+ ~Recognizer();
	25	+ int recognize(string file_in);
	26	+ string getsentence();
	27	+ float getconfidence();
	28	+
	29	+
	30	+private:
	31	+
	32	+ Jconf *jconf;
	33	+ Recog *recog;
	34	+
	35	+};
	36	+
	37	+#endif // RECOGNIZER_H
	@@ -0,0 +1,30 @@		@@ -0,0 +1,30 @@
		1	+#ifndef WAVCUT_H
		2	+#define WAVCUT_H
		3	+
		4	+#include <julius/juliuslib.h>
		5	+#include <string>
		6	+#include <algorithm>
		7	+#undef min
		8	+#undef max
		9	+#include <vector>
		10	+#include "audiofile.h"
		11	+
		12	+
		13	+
		14	+using namespace std;
		15	+class Wavcut{
		16	+
		17	+public:
		18	+
		19	+ Wavcut(char* _pathAudio, char* _outputPath, char* _id);
		20	+ ~Wavcut();
		21	+
		22	+ int initialize(Jconf *jconf);
		23	+ vector<Audiofile> list_audio_files();
		24	+ int count_lines;
		25	+
		26	+private:
		27	+ static int adin_callback_file(SP16 now, int len, Recog recog);
		28	+};
		29	+
		30	+#endif // WAVCUT_H
0	\ No newline at end of file	31	\ No newline at end of file
	@@ -68,14 +68,17 @@ void Recognize::Run(){		@@ -68,14 +68,17 @@ void Recognize::Run(){
68	finish = false;	68	finish = false;
69	createDir();	69	createDir();
70	extractAudioFromVideo();	70	extractAudioFromVideo();
71	- breakVideoParts(getTimeMediaSec());
72	- executeJuliusEngine();
73	- generateConfidence();
74	- //filterOutputJulius();
75	- //cleanFiles();
76	-	71	+ if (RECOGNIZER_MODE == 0){
		72	+ breakVideoParts(getTimeMediaSec());
		73	+ executeJuliusEngine();
		74	+ generateConfidence();
		75	+ } else {
		76	+ executeGoogleEngine();
		77	+ }
		78	+
77	finish = true;	79	finish = true;
78	- //notifyEndExtraction(count_lines);	80	+ notifyEndExtraction(count_lines);
		81	+ cleanFiles();
79	}	82	}
80		83
81	void Recognize::setFrequency(int freq) {	84	void Recognize::setFrequency(int freq) {
	@@ -209,7 +212,7 @@ void Recognize::breakVideoParts(int timeTotal) {		@@ -209,7 +212,7 @@ void Recognize::breakVideoParts(int timeTotal) {
209	void Recognize::executeJuliusEngine() {	212	void Recognize::executeJuliusEngine() {
210		213
211	string type, freqStr;	214	string type, freqStr;
212	- string command = "julius -C ";	215	+ string command = "julius -quiet -C ";
213	char cfreq[10];	216	char cfreq[10];
214		217
215	char* jPath;	218	char* jPath;
	@@ -292,14 +295,12 @@ void Recognize::generateConfidence() {		@@ -292,14 +295,12 @@ void Recognize::generateConfidence() {
292	scores.push_back(avgScores/sizeAvgScores);	295	scores.push_back(avgScores/sizeAvgScores);
293		296
294	}else if(pass==0){	297	}else if(pass==0){
295	- notifyListeners((char*) "SENTENCA COM BAIXA QUALIDADE", 0);
296	- notifyEndExtraction(count_lines);	298	+ notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", 0);
297	return;	299	return;
298	}	300	}
299	} while (!in.eof());	301	} while (!in.eof());
300	in.close();	302	in.close();
301	filterOutputJulius();	303	filterOutputJulius();
302	- notifyEndExtraction(count_lines);
303	}	304	}
304	}	305	}
305		306
	@@ -420,3 +421,100 @@ void Recognize::createDir(){		@@ -420,3 +421,100 @@ void Recognize::createDir(){
420	.append(" && mkdir -p ").append(path_contents).append("/").append(id).append("/audio/origin");	421	.append(" && mkdir -p ").append(path_contents).append("/").append(id).append("/audio/origin");
421	system(command.c_str());	422	system(command.c_str());
422	}	423	}
		424	+
		425	+void Recognize::executeGoogleEngine() {
		426	+ jlog_set_output(NULL);
		427	+
		428	+ Wavcut* wavcut;
		429	+ Recognizer* recog;
		430	+
		431	+ vector<Audiofile> audioList;
		432	+ string file_in = "";
		433	+ file_in.append(path_contents).append("/").append(id).append(PATH_AUDIO_ORIGIN);
		434	+ string path_out= "";
		435	+ path_out.append(path_contents).append("/").append(id).append("/");
		436	+ char* pathAudio = new char[file_in.size()+1];
		437	+ char* outputPath = new char[path_out.size()+1];
		438	+ char* _id = new char[id.size()+1];
		439	+ strcpy(pathAudio, file_in.c_str());
		440	+ strcpy(outputPath, path_out.c_str());
		441	+ strcpy(_id, id.c_str());
		442	+
		443	+
		444	+ wavcut = new Wavcut(pathAudio,outputPath,_id);
		445	+ Jconf *jconf;
		446	+ jconf = j_config_load_file_new(PATH_WCONFIG);
		447	+
		448	+ if(wavcut->initialize(jconf)<2){
		449	+
		450	+ wavcut->initialize(load_config());
		451	+ }
		452	+
		453	+ audioList = wavcut->list_audio_files();
		454	+
		455	+
		456	+ delete wavcut;
		457	+ recog = new Recognizer();
		458	+
		459	+ int ii;
		460	+ count_lines = 0;
		461	+ for(ii=0; ii < audioList.size(); ii++)
		462	+ {
		463	+
		464	+ /chama o reconhecedor passando cada arquivo de audio/
		465	+ recog->recognize(audioList[ii].file_path);
		466	+
		467	+ /remove o audio ja reconhecido/
		468	+ remove(audioList[ii].file_path);
		469	+
		470	+ /segundos do inicio do audio reconhecido/
		471	+ //cout<< calcula_pts(audioList[ii].start_seg) << endl;
		472	+
		473	+ /* texto do audio reconhecido*/
		474	+ //cout<< recog->getsentence() << endl;
		475	+
		476	+ /* porcentagem de confiança do reconhecimento (de 0 a 1) */
		477	+ //cout<< recog->getconfidence() << endl;
		478	+
		479	+ if(recog->getconfidence() >= confidenceRate)
		480	+ notifyListeners((char)recog->getsentence().c_str(), (int64_t)(audioList[ii].start_seg1000));
		481	+ else
		482	+ notifyListeners((char) "SENTENCA_COM_BAIXA_QUALIDADE", (int64_t)(audioList[ii].start_seg1000));
		483	+ if(recog->getconfidence()> 0)
		484	+ count_lines++;
		485	+ }
		486	+ delete recog;
		487	+
		488	+}
		489	+
		490	+Jconf* Recognize::load_config(){
		491	+
		492	+ Jconf* jconf;
		493	+ jconf = j_jconf_new();
		494	+
		495	+ char *parametros[12];
		496	+
		497	+ parametros[1]="-lv";
		498	+ parametros[2]="3000";
		499	+ parametros[3]="-zc";
		500	+ parametros[4]="150";
		501	+ parametros[5]="-headmargin";
		502	+ parametros[6]="200";
		503	+ parametros[7]="-tailmargin";
		504	+ parametros[8]="150";
		505	+ parametros[9]="-rejectshort";
		506	+ parametros[10]="1500";
		507	+ //cout << "load_config" << endl;
		508	+ /* read arguments and set parameters */
		509	+ if (j_config_load_args(jconf, 11, parametros) == -1) {
		510	+ fprintf(stderr, "Error reading arguments\n");
		511	+
		512	+ }
		513	+
		514	+ jconf->input.type = INPUT_WAVEFORM;
		515	+ jconf->input.speech_input = SP_RAWFILE;
		516	+ jconf->detect.silence_cut = 1;
		517	+
		518	+
		519	+return jconf;
		520	+}
@@ -0,0 +1,101 @@		@@ -0,0 +1,101 @@
	1	+
	2	+
	3	+
	4	+#include "recognizer.h"
	5	+#include <json/json.h>
	6	+
	7	+
	8	+static boolean reconhecendo ;
	9	+static string sentence;
	10	+static float confidence;
	11	+string lenguage = "pt-BR";
	12	+
	13	+using namespace std;
	14	+
	15	+
	16	+void resultado(Recog recog, void dummy);
	17	+
	18	+
	19	+
	20	+Recognizer::Recognizer(){
	21	+
	22	+}
	23	+
	24	+
	25	+Recognizer::~Recognizer(){
	26	+
	27	+}
	28	+
	29	+
	30	+
	31	+int Recognizer::recognize(string file_in)
	32	+{
	33	+ FILE *file;
	34	+ string cmFinal;
	35	+ char message[100];
	36	+ message[0] = '\0';
	37	+ string jsonResult ="";
	38	+ string vozReconhecida = "";
	39	+ int indexLineSrt =0;
	40	+ stringstream comand;
	41	+ stringstream index;
	42	+
	43	+
	44	+ string fileName = file_in;
	45	+
	46	+ // inicio preparação requisição
	47	+
	48	+ comand << " curl -ss -X POST --data-binary @";
	49	+ comand << fileName;
	50	+
	51	+ //requisição para o google speech
	52	+ comand<< " --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/l16; rate=22050;' 'https://www.google.com/speech-api/v2/recognize?client=chromium&lang="<<lenguage<<"&maxresults=1&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw'";
	53	+ //AIzaSyBeeYW4l2OuCwiUfzBaUXXeWAO6Uy-u0F8'";
	54	+ //public key Ezequiel project 1
	55	+ //AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw'";
	56	+ //fim
	57	+ cmFinal = comand.str();
	58	+ file = popen(cmFinal.c_str(),"r");
	59	+ if(file == NULL)
	60	+ printf("ERROR\n");
	61	+ int vez =0;
	62	+ while (fgets(message, sizeof (message), file))
	63	+ {
	64	+ vez++;
	65	+
	66	+ if(vez>1){
	67	+ jsonResult+= message;
	68	+
	69	+ }
	70	+ }
	71	+ //cout<< jsonResult <<endl<<endl;
	72	+ Json::Value root;
	73	+ Json::Reader reader;
	74	+ bool parsingSuccessful = reader.parse(jsonResult, root);
	75	+ // cout << root["result"]<<endl;
	76	+ confidence = 0.0;
	77	+ sentence = "";
	78	+ if (parsingSuccessful)
	79	+ {
	80	+ sentence = root["result"][0]["alternative"][0]["transcript"].asString();
	81	+ confidence = root["result"][0]["alternative"][0]["confidence"].asFloat();
	82	+ }
	83	+
	84	+
	85	+ fclose(file);
	86	+
	87	+ jsonResult = "";
	88	+ comand.str("");
	89	+
	90	+ return 1;
	91	+}
	92	+
	93	+
	94	+float Recognizer::getconfidence(){
	95	+ return confidence;
	96	+}
	97	+string Recognizer::getsentence(){
	98	+ // printf("palavra %s\n",sentence.c_str() );
	99	+ return sentence;
	100	+}
	101	+
@@ -0,0 +1,187 @@		@@ -0,0 +1,187 @@
	1	+#include "wavcut.h"
	2	+
	3	+#include <vector>
	4	+// #include <iostream>
	5	+
	6	+
	7	+int sfreq = 22050; ///< Temporal storage of sample rate
	8	+int speechlen; ///< samples of one recorded segments
	9	+FILE *fp = NULL; ///< File pointer for WAV output
	10	+int sid = 0; ///< current file ID (for SPOUT_FILE)
	11	+char *outpath = NULL; ///< work space for output file name formatting
	12	+boolean writing_file = FALSE; ///< TRUE if writing to a file
	13	+int trigger_sample;
	14	+char *pathAudio;
	15	+char* outputPath;
	16	+char* id_file;
	17	+static vector<Audiofile> audioList;
	18	+
	19	+
	20	+Wavcut::Wavcut(char* _pathAudio, char* _outputPath, char* _id){
	21	+
	22	+ pathAudio = _pathAudio;
	23	+ outputPath = _outputPath;
	24	+ id_file = _id;
	25	+ //printf("%s\n","Entrou aquiiiii" )
	26	+}
	27	+
	28	+Wavcut::~Wavcut(){
	29	+ //printf("%s\n","Wavcut finalizado" );
	30	+}
	31	+
	32	+
	33	+int Wavcut::adin_callback_file(SP16 now, int len, Recog recog)
	34	+ {
	35	+ /* cria novo arquivo wav para salvar o audio sem silencio*/
	36	+ if (speechlen == 0) {
	37	+
	38	+ sprintf(outpath, "%s%d.wav",outputPath, sid);
	39	+
	40	+ if (access(outpath, F_OK) == 0) {
	41	+ if (access(outpath, W_OK) != 0) {
	42	+ return(-1);
	43	+ }
	44	+
	45	+ }
	46	+
	47	+ if ((fp = wrwav_open(outpath, sfreq)) != NULL) {
	48	+ //fprintf(stderr, "novo arquivo\n");
	49	+ }else{
	50	+ return -1;
	51	+ }
	52	+ writing_file = TRUE;
	53	+ }
	54	+
	55	+ /* write recorded sample to file */
	56	+ if (wrwav_data(fp, &(now[0]), len) == FALSE) {
	57	+ return -1;
	58	+ }
	59	+
	60	+ /* accumulate sample num of this segment */
	61	+speechlen += len;
	62	+
	63	+return(0);
	64	+}
	65	+
	66	+//acumula o tempo de cada segmento
	67	+void registra_tempo(Recog recog, void data)
	68	+{
	69	+ trigger_sample = recog->adin->last_trigger_sample;
	70	+}
	71	+
	72	+boolean close_files()
	73	+{
	74	+ if (writing_file) {
	75	+
	76	+ if (wrwav_close(fp) == FALSE) {
	77	+ fprintf(stderr, "adinrec: failed to close file\n");
	78	+ return FALSE;
	79	+ }
	80	+ char* fileout=(char *)mymalloc(100);;
	81	+ // sprintf(fileout,*outpath);
	82	+ sprintf(fileout, outpath);
	83	+ audioList.push_back(Audiofile(fileout,(float)trigger_sample / (float)sfreq,
	84	+ (float)(trigger_sample + speechlen) / (float)sfreq));
	85	+
	86	+ writing_file = FALSE;
	87	+}
	88	+
	89	+return TRUE;
	90	+}
	91	+
	92	+
	93	+ int Wavcut::initialize(Jconf *jconf) {
	94	+
	95	+ sid = 0;
	96	+ audioList.clear();
	97	+ //Jconf *jconf;
	98	+ Recog *recog;
	99	+
	100	+ int ret;
	101	+ boolean is_continues;
	102	+
	103	+ /* cria instancia do reconhecedor */
	104	+ recog = j_recog_new();
	105	+ /* carrega as configurações contidas no jconfig */
	106	+ // jconf = j_config_load_file_new("/home/ezequiel/speech-recognizer/wavcut.jconf");
	107	+
	108	+ jconf->input.sfreq = sfreq;
	109	+ /adciona a configuração ao reconhecedor/
	110	+ recog->jconf = jconf;
	111	+
	112	+ outpath = (char *)mymalloc(256);
	113	+
	114	+/registra calback do contador de tempo/
	115	+ callback_add(recog, CALLBACK_EVENT_SPEECH_START, registra_tempo, NULL);
	116	+
	117	+ /Inicializa o reconhecedor/
	118	+ if (j_adin_init(recog) == FALSE) {
	119	+ fprintf(stderr, "Error in initializing adin device\n");
	120	+ return 0;
	121	+ }
	122	+
	123	+ /Abre o quivo de áudio para ser cortado/
	124	+ if(j_open_stream(recog,pathAudio) == -2)
	125	+ return sid;
	126	+
	127	+ /* loop de detecção de voz*/
	128	+
	129	+ do {
	130	+
	131	+ speechlen = 0;
	132	+
	133	+ ret = adin_go(adin_callback_file, NULL, recog);
	134	+
	135	+
	136	+ switch(ret) {
	137	+ case -1: /* device read error or callback error */
	138	+ //fprintf(stderr, "[error]\n");
	139	+ break;
	140	+ case 0: /* reached to end of input */
	141	+ //fprintf(stderr, "[eof]\n");
	142	+ return sid;
	143	+ break;
	144	+ default:
	145	+ break;
	146	+ }
	147	+
	148	+ if (ret == -1) {
	149	+ /* error in input device or callback function, so terminate program here */
	150	+ return sid;
	151	+ }
	152	+ /* um intervalo de silencio detectado */
	153	+
	154	+ if (close_files() == FALSE)
	155	+ return sid;
	156	+
	157	+ /* incremento do contador de partes cortadas */
	158	+
	159	+
	160	+ sid++;
	161	+
	162	+ is_continues = FALSE;
	163	+ if (ret > 0 \|\| ret == -2) {
	164	+ is_continues = TRUE;
	165	+ }
	166	+
	167	+ } while (is_continues);
	168	+
	169	+ /Quando termina de ler todo áudio finaliza/
	170	+ adin_end(recog->adin);
	171	+
	172	+ return sid;
	173	+
	174	+}
	175	+
	176	+vector<Audiofile> Wavcut::list_audio_files(){
	177	+
	178	+
	179	+ return audioList;
	180	+}
	181	+
	182	+
	183	+
	184	+
	185	+
	186	+
	187	+
@@ -0,0 +1,24 @@		@@ -0,0 +1,24 @@
	1	+-smpFreq 22050
	2	+-lv 1000
	3	+-zc 60
	4	+-headmargin 200
	5	+-tailmargin 150
	6	+-rejectshort 1500
	7	+-input rawfile
	8	+-cutsilence
	9	+
	10	+
	11	+
	12	+#-smpFreq 22050
	13	+#-lv 3000
	14	+#-zc 150
	15	+#-headmargin 200
	16	+#-tailmargin 150
	17	+#-rejectshort 1500
	18	+#-input rawfile
	19	+#-cutsilence
	20	+
	21	+
	22	+
	23	+
	24	+