#include #include #include #include #include #include #include #include #include #include #include #include #include #define FREQUENCY_PATTERN 22050 #define INPUT_PATTERN 1 /* 1 = Raw file, 2 = Mic */ #define BLOCS_PATTERN 10 #define SIZE_BUFFER 256 #define CONFIDENCE_RATE 0.45 #define PATH_API_UPLOADS "vlibras_user/vlibras-api/uploads/" #define PATH_AUDIO_ORIGIN "/audio/origin/audio_origin.wav" #define PATH_AUDIO_PARTS "/audio/parts/" #define FILENAME_RECOGNIZED_OUT "/audio/recognized.out" #define FILENAME_AUDIOLIST "/audio/audiolist" #define FILENAME_FILTEROUT "/audio/filter" #define FILENAME_CONFIDENCEOUT "/audio/confidence" #define FIND_CONFIDENCE "\"cmscore1:\"" #define FIND_SENTENCE "\"sentence1:\"" #define FILENAME_AUDIOPART "audio00" #define AUDIO_SILENT "" #define PROGRAM "ffmpeg" // ffmpeg #define PTS_PATTERN 1000 using namespace std; Recognize::Recognize(char* _pathVideo, char* _id) { listeners = new list(); pathVideo = _pathVideo; inputType = INPUT_PATTERN; frequency = FREQUENCY_PATTERN; sizeBlocs = BLOCS_PATTERN; stringstream ss; ss << _id; ss >> id; confidenceRate=CONFIDENCE_RATE; pcr_base = 0; hasPCRBase = false; DPRINTF("Done!\n"); } Recognize::Recognize(char* _pathVideo, char* _id, char* rate) { listeners = new list(); pathVideo = _pathVideo; inputType = INPUT_PATTERN; frequency = FREQUENCY_PATTERN; sizeBlocs = BLOCS_PATTERN; pcr_base = 0; hasPCRBase = false; stringstream ss; ss << _id; ss >> id; istringstream(rate) >> confidenceRate; if (confidenceRate == 0) confidenceRate=CONFIDENCE_RATE; DPRINTF("Done!\n"); } Recognize::Recognize(char* _pathVideo, int _inputType, char* _id) { listeners = new list(); pathVideo = _pathVideo; inputType = _inputType; frequency = FREQUENCY_PATTERN; sizeBlocs = BLOCS_PATTERN; id = _id; pcr_base = 0; hasPCRBase = false; DPRINTF("Done!\n"); } Recognize::~Recognize() { listeners->clear(); delete listeners; DDDPRINTF("Recognize finalized!\n"); } void Recognize::initialize() { DDPRINTF("Recognizing...\n"); /**printf("*** Initialized Recognition ***\n\nVideo: %s\nType [1-File; 2-Mic]: %d\nFrequency: %d\n\n", this->pathVideo, this->inputType, this->frequency);**/ ifstream file(pathVideo, ifstream::binary); if(!file.is_open()){ finished = true; Util::Logger::Instance()->writeLog((char*) "[ERRO: recognize.cpp] Arquivo de vídeo não encontrado."); throw RecognizeException("Falha ao abrir o arquivo de vídeo! Verifique se o mesmo existe."); } this->Start(); } void Recognize::Run(){ finished = false; createDir(); extractAudioFromVideo(); breakVideoParts(getTimeMediaSec()); executeJuliusEngine(); generateConfidence(); filterOutputJulius(); //cleanFiles(); finished = true; notifyEndExtraction(count_lines); } void Recognize::setFrequency(int freq) { frequency = freq; } void Recognize::setSizeAudioBlocs(int sec) { sizeBlocs = sec; } char* Recognize::extractAudioFromVideo() { string command = PROGRAM; command.append(" -i "). append((string) pathVideo). append(" -ar "); string strFreq; std::stringstream sstr; sstr << frequency; strFreq = sstr.str(); //command.append(strFreq).append(" -ac 1 -f wav ").append(PATH_AUDIO_ORIGIN).append(" &"); command.append(strFreq). append(" -ac 1 -f wav "). append(PATH_API_UPLOADS). append(id). append(PATH_AUDIO_ORIGIN). append(" -v quiet"); /*string tmp = "echo "; tmp.append(PATH_AUDIO_ORIGIN).append(" >> ").append(FILENAME_AUDIOLIST); system(tmp.c_str());*/ system(command.c_str()); } int Recognize::getTimeMediaSec() { string command = PROGRAM; command.append(" -i ").append(PATH_API_UPLOADS).append(id).append(PATH_AUDIO_ORIGIN); command.append(" 2>&1 | grep Duration >> outfile"); system(command.c_str()); ifstream in("outfile"); if (!in) return -1; std::string line; in >> line >> line; char* timetok; timetok = strtok((char*)line.c_str(), " :,."); int seconds = 0; seconds += (atoi(timetok) * 60 * 60); // hora timetok = strtok(NULL, " :,."); seconds += (atoi(timetok) * 60); // min timetok = strtok(NULL, " :,."); seconds += atoi(timetok); // seg system("rm outfile"); return seconds; } void Recognize::breakVideoParts(int timeTotal) { string ss_str, t_str, command, aplist; int count = 1; int ss = 0; int t = 0; bool consume = true; string filename= FILENAME_AUDIOPART; char tmp [filename.length()]; sprintf(tmp, "%i", ss); ss_str = tmp; sprintf(tmp, "%i", sizeBlocs); t_str = tmp; if (timeTotal < sizeBlocs) sizeBlocs = timeTotal; do { ss = t; if(timeTotal >= sizeBlocs && (timeTotal - sizeBlocs) > (sizeBlocs/2)) { t += sizeBlocs; timeTotal -= sizeBlocs; } else { t += timeTotal; timeTotal = 0; consume = false; sprintf(tmp, "%i", t); t_str = tmp; } sprintf(tmp, "%i", ss); ss_str = tmp; pts.push_back(convert_pts(ss_str)); command = "sox "; command.append(PATH_API_UPLOADS).append(id).append(PATH_AUDIO_ORIGIN).append(" ").append(PATH_API_UPLOADS).append(id).append(PATH_AUDIO_PARTS); sprintf(tmp, "%i", count++); filename.append(tmp).append(".wav"); command.append(filename).append(" trim ").append(ss_str).append(" ").append(t_str); system(command.c_str()); string apcomm = "echo "; apcomm.append(PATH_API_UPLOADS).append(id).append(PATH_AUDIO_PARTS).append(filename).append(" >> ").append(PATH_API_UPLOADS).append(id).append(FILENAME_AUDIOLIST); system(apcomm.c_str()); filename = FILENAME_AUDIOPART; aplist = ""; } while (consume); } void Recognize::executeJuliusEngine() { string command, type, freqStr; char cfreq[10]; command = "julius -C vlibras_user/vlibras-core/recognize/src/julius.jconf -input "; if (inputType == 1) { type = "rawfile"; command.append(type).append(" -filelist ").append(PATH_API_UPLOADS).append(id).append(FILENAME_AUDIOLIST); } else { type = "mic"; } sprintf(cfreq, "%i", frequency); command.append(" -smpFreq "). append(cfreq). append(" -nolog >> "). append(PATH_API_UPLOADS).append(id). append(FILENAME_RECOGNIZED_OUT); //Command of execute Julius //printf("\n\nCommand for executeJuliusEngine: %s\n", command.c_str()); system(command.c_str()); } void Recognize::generateConfidence() { string command = "cat "; command.append(PATH_API_UPLOADS).append(id).append(FILENAME_RECOGNIZED_OUT).append(" | grep "). append(FIND_CONFIDENCE).append(" >> ").append(PATH_API_UPLOADS).append(id).append(FILENAME_CONFIDENCEOUT); system(command.c_str()); //printf("\n\n---> command: %s\n\n", command.c_str()); string path; path.append(PATH_API_UPLOADS).append(id).append(FILENAME_CONFIDENCEOUT); ifstream in(path.c_str()); if (!in) { perror("Error: "); } else { string line; float tmp; do { getline(in, line); std::istringstream buf(line); if (line.length() > 0) { istream_iterator beg(buf), end; vector tokens(beg, end); float sizeAvgScores = 0; float sizeLowScores = 0; float avgScores = 0; float lowScores = 0; int i; for(i=2; i < tokens.size()-1; i++){ istringstream(tokens[i]) >> tmp; if (tmp > confidenceRate){ avgScores += tmp; sizeAvgScores++; } else{ lowScores += tmp; sizeLowScores++; } } if (lowScores > 0){ lowScores = lowScores/sizeLowScores; int i; for(i=0; i < sizeLowScores/2; i++){ avgScores += lowScores; sizeAvgScores++; } } scores.push_back(avgScores/sizeAvgScores); } } while (!in.eof()); in.close(); } } bool Recognize::getConfidence(){ //cout << "CONFIDENCE: " << scores[i] << endl; if (scores.front() < confidenceRate) return false; return true; } void Recognize::filterOutputJulius() { std::list *sentences; sentences = new std::list(); string command = "cat "; command.append(PATH_API_UPLOADS).append(id).append(FILENAME_RECOGNIZED_OUT).append(" | grep -e "). append(FIND_SENTENCE).append(" -e \"").append(AUDIO_SILENT).append("\"").append(" >> ").append(PATH_API_UPLOADS).append(id).append(FILENAME_FILTEROUT); system(command.c_str()); //printf("\n\n---> command: %s\n\n", command.c_str()); count_lines = 0; string path; path.append(PATH_API_UPLOADS).append(id).append(FILENAME_FILTEROUT); ifstream in(path.c_str()); string strFilter; if (!in) { perror("Error: "); } else { string line; int sizeLine; char* sentence_ptr; do { getline(in, line); if (line.length() > 0) { if (line != AUDIO_SILENT){ sizeLine = (int)line.length(); strFilter = line.substr(strlen(FIND_SENTENCE), sizeLine); sentence_ptr = new char[strFilter.length()+1]; strcpy(sentence_ptr, (char*) strFilter.c_str()); if(getConfidence()) notifyListeners(sentence_ptr, pts.front()); else notifyListeners((char*) "SENTENCA_COM_BAIXA_QUALIDADE", pts.front()); scores.erase(scores.begin()); count_lines++; } pts.erase(pts.begin()); } } while (!in.eof()); in.close(); } /*char* ptr_strFilter; ptr_strFilter = (char*) malloc (strFilter.length()+1); strcpy(ptr_strFilter, (char*) strFilter.c_str());*/ } void Recognize::notifyListeners(char* text, int64_t pts) { //cout << "NOTIFY: " << text << endl; for(list::iterator it = listeners->begin(); it != listeners->end(); it++){ (*it)->notifyTextRecognized((unsigned char*) text, calcula_pts(pts)); } } void Recognize::notifyEndExtraction(int sentences_size) { DDPRINTF("Recognizer concluiu o reconhecimento: %d sentenças.\n", sentences_size); for(list::iterator it = listeners->begin(); it != listeners->end(); it++){ (*it)->notifyEnd(sentences_size); } } void Recognize::notifyPCRBase(uint64_t pcrbase){ //DDPRINTF("PCRBase = %ld\n", pcrbase); this->pcr_base = pcrbase; this->hasPCRBase = true; } int64_t Recognize::calcula_pts(double msec) { return (int64_t)(pcr_base + ((msec/1000) * 100000.0)); } int64_t Recognize::convert_pts(string pts){ int64_t ui64; stringstream ss; ss << pts; ss >> ui64; return ui64*1000; } void Recognize::addListener(RecognizeListener* listener) { listeners->push_back(listener); } bool Recognize::isFinished() { return finished; } void Recognize::cleanFiles() { string command = "rm -r "; command.append(PATH_API_UPLOADS).append(id).append("/audio"); system(command.c_str()); } void Recognize::createDir(){ string command = "mkdir "; command.append(PATH_API_UPLOADS).append(id).append("/audio").append(" && mkdir "). append(PATH_API_UPLOADS).append(id).append("/audio/parts").append(" && mkdir "). append(PATH_API_UPLOADS).append(id).append("/audio/origin"); system(command.c_str()); }