#include Recognize::Recognize(char* _pathVideo, char* _id) { listeners = new list(); pathVideo = _pathVideo; inputType = INPUT_PATTERN; frequency = FREQUENCY_PATTERN; sizeBlocs = BLOCS_PATTERN; stringstream ss; ss << _id; ss >> id; confidenceRate=CONFIDENCE_RATE; PRINTL(util::_DEBUG, "Recognize Done!\n"); } Recognize::Recognize(char* _pathVideo, char* _id, char* rate) { listeners = new list(); pathVideo = _pathVideo; inputType = INPUT_PATTERN; frequency = FREQUENCY_PATTERN; sizeBlocs = BLOCS_PATTERN; stringstream ss; ss << _id; ss >> id; istringstream(rate) >> confidenceRate; if (confidenceRate == 0) confidenceRate=CONFIDENCE_RATE; PRINTL(util::_DEBUG, "Recognize Done!\n"); } Recognize::Recognize(char* _pathVideo, int _inputType, char* _id) { listeners = new list(); pathVideo = _pathVideo; inputType = _inputType; frequency = FREQUENCY_PATTERN; sizeBlocs = BLOCS_PATTERN; id = _id; PRINTL(util::_DEBUG, "Recognize Done!\n"); } Recognize::~Recognize() { listeners->clear(); delete listeners; PRINTL(util::_DEBUG, "Recognize finalized!\n"); } void Recognize::initialize() { PRINTL(util::_INFO, "Reconhecendo áudio...\n"); /**printf("*** Initialized Recognition ***\n\nVideo: %s\nType [1-File; 2-Mic]: %d\nFrequency: %d\n\n", this->pathVideo, this->inputType, this->frequency);**/ ifstream file(pathVideo, ifstream::binary); if(!file.is_open()){ finish = true; Logging::instance()->writeLog("recognize.cpp Arquivo não encontrado."); throw RecognizeException("Falha ao abrir o arquivo! Verifique se o mesmo existe."); } this->Start(); } void Recognize::Run(){ finish = false; createDir(); extractAudioFromVideo(); if (RECOGNIZER_MODE == 0){ breakVideoParts(getTimeMediaSec()); executeJuliusEngine(); generateConfidence(); } else { executeGoogleEngine(); } finish = true; notifyEndExtraction(count_lines); cleanFiles(); } void Recognize::setFrequency(int freq) { frequency = freq; } void Recognize::setSizeAudioBlocs(int sec) { sizeBlocs = sec; } void Recognize::setPathAudioContents(char* path){ path_contents = path; } char* Recognize::extractAudioFromVideo() { string command = PROGRAM; command.append(" -i "). append((string) pathVideo). append(" -ar "); string strFreq; std::stringstream sstr; sstr << frequency; strFreq = sstr.str(); //command.append(strFreq).append(" -ac 1 -f wav ").append(PATH_AUDIO_ORIGIN).append(" &"); command.append(strFreq). append(" -ac 1 -f wav "). append(path_contents).append("/").append(id). append(PATH_AUDIO_ORIGIN).append(" -v quiet"); /*string tmp = "echo "; tmp.append(PATH_AUDIO_ORIGIN).append(" >> ").append(FILENAME_AUDIOLIST); system(tmp.c_str());*/ system(command.c_str()); } int Recognize::getTimeMediaSec() { string command = PROGRAM; command.append(" -i ").append(path_contents).append("/").append(id).append(PATH_AUDIO_ORIGIN) .append(" 2>&1 | grep Duration >> outfile"); system(command.c_str()); ifstream in("outfile"); if (!in) return -1; std::string line; in >> line >> line; char* timetok; timetok = strtok((char*)line.c_str(), " :,."); int seconds = 0; seconds += (atoi(timetok) * 60 * 60); // hora timetok = strtok(NULL, " :,."); seconds += (atoi(timetok) * 60); // min timetok = strtok(NULL, " :,."); seconds += atoi(timetok); // seg system("rm outfile"); return seconds; } void Recognize::breakVideoParts(int timeTotal) { string ss_str, t_str, command, aplist; int count = 1; int ss = 0; int t = 0; bool consume = true; string filename= FILENAME_AUDIOPART; char tmp [filename.length()]; sprintf(tmp, "%i", ss); ss_str = tmp; sprintf(tmp, "%i", sizeBlocs); t_str = tmp; if (timeTotal < sizeBlocs) sizeBlocs = timeTotal; do { ss = t; if(timeTotal >= sizeBlocs && (timeTotal - sizeBlocs) > (sizeBlocs/2)) { t += sizeBlocs; timeTotal -= sizeBlocs; } else { t += timeTotal; timeTotal = 0; consume = false; sprintf(tmp, "%i", t); t_str = tmp; } sprintf(tmp, "%i", ss); ss_str = tmp; pts.push_back(convert_pts(ss_str)); command = "sox "; command.append(path_contents).append("/").append(id).append(PATH_AUDIO_ORIGIN).append(" ") .append(path_contents).append("/").append(id).append(PATH_AUDIO_PARTS); sprintf(tmp, "%i", count++); filename.append(tmp).append(".wav"); command.append(filename).append(" trim ").append(ss_str).append(" ").append(t_str); system(command.c_str()); string apcomm = "echo "; apcomm.append(path_contents).append("/").append(id).append(PATH_AUDIO_PARTS).append(filename).append(" >> ") .append(path_contents).append("/").append(id).append(FILENAME_AUDIOLIST); system(apcomm.c_str()); filename = FILENAME_AUDIOPART; aplist = ""; } while (consume); } void Recognize::executeJuliusEngine() { string type, freqStr; string command = "julius -quiet -C "; char cfreq[10]; char* jPath; jPath = getenv("JCONFIG"); if(jPath != NULL) command.append(jPath); else command.append(PATH_JCONFIG); command.append(" -input "); if (inputType == 1) type = "rawfile"; else type = "mic"; command.append(type).append(" -filelist ").append(path_contents).append("/").append(id).append(FILENAME_AUDIOLIST); sprintf(cfreq, "%i", frequency); command.append(" -smpFreq "). append(cfreq). append(" >> "); command.append(path_contents).append("/").append(id).append(FILENAME_RECOGNIZED_OUT); //Command of execute Julius //printf("\n\nCommand for executeJuliusEngine: %s\n", command.c_str()); system(command.c_str()); } void Recognize::generateConfidence() { string command = "cat "; command.append(path_contents).append("/").append(id).append(FILENAME_RECOGNIZED_OUT).append(" | grep "). append(FIND_CONFIDENCE).append(" >> ").append(path_contents).append("/").append(id).append(FILENAME_CONFIDENCEOUT); system(command.c_str()); //printf("\n\n---> command: %s\n\n", command.c_str()); string path; path.append(path_contents).append("/").append(id).append(FILENAME_CONFIDENCEOUT); ifstream in(path.c_str()); if (!in) { perror("Error: "); } else { string line; float tmp; int pass = 0; do { getline(in, line); std::istringstream buf(line); if (line.length() > 0) { pass++; istream_iterator beg(buf), end; vector tokens(beg, end); float sizeAvgScores = 0; float sizeLowScores = 0; float avgScores = 0; float lowScores = 0; int i; for(i=2; i < tokens.size()-1; i++){ istringstream(tokens[i]) >> tmp; if (tmp > confidenceRate){ avgScores += tmp; sizeAvgScores++; } else{ lowScores += tmp; sizeLowScores++; } } if (lowScores > 0){ lowScores = lowScores/sizeLowScores; int i; for(i=0; i < sizeLowScores/2; i++){ avgScores += lowScores; sizeAvgScores++; } } scores.push_back(avgScores/sizeAvgScores); }else if(pass==0){ notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", 0); return; } } while (!in.eof()); in.close(); filterOutputJulius(); } } bool Recognize::getConfidence(){ //cout << "CONFIDENCE: " << scores[i] << endl; if (scores.front() < confidenceRate) return false; return true; } void Recognize::filterOutputJulius() { std::list *sentences; sentences = new std::list(); string command = "cat "; command.append(path_contents).append("/").append(id).append(FILENAME_RECOGNIZED_OUT).append(" | grep -e "). append(FIND_SENTENCE).append(" -e \"").append(AUDIO_SILENT).append("\"").append(" >> ").append(path_contents).append("/").append(id).append(FILENAME_FILTEROUT); system(command.c_str()); //printf("\n\n---> command: %s\n\n", command.c_str()); count_lines = 0; string path; path.append(path_contents).append("/").append(id).append(FILENAME_FILTEROUT); ifstream in(path.c_str()); string strFilter; if (!in) { perror("Error: "); } else { string line; int sizeLine; char* sentence_ptr; do { getline(in, line); if (line.length() > 0) { if (line != AUDIO_SILENT){ sizeLine = (int)line.length(); strFilter = line.substr(strlen(FIND_SENTENCE), sizeLine); sentence_ptr = new char[strFilter.length()+1]; strcpy(sentence_ptr, (char*) strFilter.c_str()); if(getConfidence()) notifyListeners(sentence_ptr, pts.front()); else{ notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", pts.front()); } scores.erase(scores.begin()); count_lines++; } pts.erase(pts.begin()); } } while (!in.eof()); in.close(); //notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", pts.front()); } /*char* ptr_strFilter; ptr_strFilter = (char*) malloc (strFilter.length()+1); strcpy(ptr_strFilter, (char*) strFilter.c_str());*/ } void Recognize::notifyListeners(char* text, int64_t pts) { //cout << "NOTIFY: " << text << endl; for(list::iterator it = listeners->begin(); it != listeners->end(); it++){ //(*it)->notifyTextRecognized((unsigned char*) text, calcula_pts(pts)); (*it)->notifyTextRecognized((unsigned char*) text, pts); } } void Recognize::notifyEndExtraction(int sentences_size) { PRINTL(util::_DEBUG, "Recognizer concluiu o reconhecimento: %d sentenças.\n", sentences_size); for(list::iterator it = listeners->begin(); it != listeners->end(); it++){ (*it)->notifyEnd(sentences_size); } } int64_t Recognize::calcula_pts(double msec) { return (int64_t)(1000 /*pcr_base*/ + ((msec/1000) * 90000.0)); } int64_t Recognize::convert_pts(string pts){ int64_t ui64; stringstream ss; ss << pts; ss >> ui64; return ui64*1000; } void Recognize::addListener(RecognizeListener* listener) { listeners->push_back(listener); } bool Recognize::isFinished() { return finish; } void Recognize::cleanFiles() { string command = "rm -r "; command.append(path_contents).append("/").append(id).append("/audio"); system(command.c_str()); } void Recognize::createDir(){ string command = "mkdir -p "; command.append(path_contents).append("/").append(id).append("/audio") .append(" && mkdir -p ").append(path_contents).append("/").append(id).append("/audio/parts") .append(" && mkdir -p ").append(path_contents).append("/").append(id).append("/audio/origin"); system(command.c_str()); } void Recognize::executeGoogleEngine() { jlog_set_output(NULL); Wavcut* wavcut; Recognizer* recog; vector audioList; string file_in = ""; file_in.append(path_contents).append("/").append(id).append(PATH_AUDIO_ORIGIN); string path_out= ""; path_out.append(path_contents).append("/").append(id).append("/"); char* pathAudio = new char[file_in.size()+1]; char* outputPath = new char[path_out.size()+1]; char* _id = new char[id.size()+1]; strcpy(pathAudio, file_in.c_str()); strcpy(outputPath, path_out.c_str()); strcpy(_id, id.c_str()); wavcut = new Wavcut(pathAudio,outputPath,_id); Jconf *jconf; char* wPath; wPath = getenv("WCONFIG"); if(wPath != NULL) jconf = j_config_load_file_new(wPath); else jconf = j_config_load_file_new(PATH_WCONFIG); if(wavcut->initialize(jconf)<2){ wavcut->initialize(load_config()); } audioList = wavcut->list_audio_files(); delete wavcut; recog = new Recognizer(); int ii; count_lines = 0; for(ii=0; ii < audioList.size(); ii++) { /*chama o reconhecedor passando cada arquivo de audio*/ recog->recognize(audioList[ii].file_path); /*remove o audio ja reconhecido*/ remove(audioList[ii].file_path); /*segundos do inicio do audio reconhecido*/ //cout<< calcula_pts(audioList[ii].start_seg) << endl; /* texto do audio reconhecido*/ //cout<< recog->getsentence() << endl; /* porcentagem de confiança do reconhecimento (de 0 a 1) */ //cout<< recog->getconfidence() << endl; if(recog->getconfidence() >= confidenceRate) notifyListeners((char*)recog->getsentence().c_str(), (int64_t)(audioList[ii].start_seg*1000)); else notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", (int64_t)(audioList[ii].start_seg*1000)); if(recog->getconfidence()> 0) count_lines++; } delete recog; } Jconf* Recognize::load_config(){ Jconf* jconf; jconf = j_jconf_new(); char *parametros[12]; parametros[1]="-lv"; parametros[2]="3000"; parametros[3]="-zc"; parametros[4]="150"; parametros[5]="-headmargin"; parametros[6]="200"; parametros[7]="-tailmargin"; parametros[8]="150"; parametros[9]="-rejectshort"; parametros[10]="1500"; //cout << "load_config" << endl; /* read arguments and set parameters */ if (j_config_load_args(jconf, 11, parametros) == -1) { fprintf(stderr, "Error reading arguments\n"); } jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_RAWFILE; jconf->detect.silence_cut = 1; return jconf; }