recognize.cpp 9.02 KB
#include <fstream>
#include <stdio.h>
#include <sstream>
#include <stdlib.h>
#include <algorithm>
#include <vector> 
#include <iterator>

#include <iostream>
#include <string>
#include <sys/stat.h>

#include <lavidlib/io/FileIO.h>

#include <recognize.h>

#define FREQUENCY_PATTERN 22050
#define INPUT_PATTERN 1 /* 1 = Raw file, 2 = Mic */
#define BLOCS_PATTERN 10
#define SIZE_BUFFER 256
#define CONFIDENCE_RATE 0.80

#define PATH_GTAAAS_WEB "gtaaas_user/gtaaas_web/public/uploads/videos/"
#define PATH_AUDIO_ORIGIN "/audio/origin/audio_origin.wav"
#define PATH_AUDIO_PARTS "/audio/parts/"
#define FILENAME_RECOGNIZED_OUT "/audio/recognized.out"
#define FILENAME_AUDIOLIST "/audio/audiolist"
#define FILENAME_FILTEROUT "/audio/filter"
#define FILENAME_CONFIDENCEOUT "/audio/confidence"

#define FIND_CONFIDENCE "\"cmscore1:\""
#define FIND_SENTENCE "\"pass1_best:\""

#define FILENAME_AUDIOPART "audio00"

#define PROGRAM "ffmpeg" // ffmpeg
#define PTS_PATTERN 1000

using namespace std;


Recognize::Recognize(char* _pathVideo, char* _id) {
	
	listeners = new list<RecognizeListener*>();
	pathVideo = _pathVideo;
	inputType = INPUT_PATTERN;
	frequency = FREQUENCY_PATTERN;
	sizeBlocs = BLOCS_PATTERN;
	stringstream ss;
	ss << _id;
	ss >> id;
	printf("ID: %s\n", id.c_str());
    DPRINTF("Done!\n");
}

Recognize::Recognize(char* _pathVideo, int _inputType, char* _id) {

	listeners = new list<RecognizeListener*>();
	pathVideo = _pathVideo;
	inputType = _inputType;
	frequency = FREQUENCY_PATTERN;
	sizeBlocs = BLOCS_PATTERN;
	id = _id;
    DPRINTF("Done!\n");
}

Recognize::~Recognize() {
	listeners->clear();
	delete listeners;
    DDDPRINTF("Recognize finalized!\n");
}


void Recognize::initialize() {
	
	DDPRINTF("Recognizing...\n");
	/**printf("*** Initialized Recognition ***\n\nVideo: %s\nType [1-File; 2-Mic]: %d\nFrequency: %d\n\n", 
		this->pathVideo, this->inputType, this->frequency);**/
	
	ifstream file(pathVideo, ifstream::binary);
	if(!file.is_open()){
		finished = true;
        Util::Logger::Instance()->writeLog((char*) "[ERRO: recognize.cpp] Arquivo de vídeo não encontrado.");
        throw RecognizeException("Falha ao abrir o arquivo de vídeo! Verifique se o mesmo existe.");		
	}

	finished = false;

	createDir();

	extractAudioFromVideo();
	
	breakVideoParts(getTimeMediaSec());
	
	executeJuliusEngine();

	confidenceJulius();

	std::list<char*> *list_sentences;
	list_sentences = filterOutputJulius();
	
	std::list<char*>::iterator it;
	for (it = list_sentences->begin(); it != list_sentences->end(); it++)
		notifyListeners((*it));
	
	finished = true;
	cleanFiles();
}


void Recognize::setFrequency(int freq) {
	frequency = freq;
}


void Recognize::setSizeAudioBlocs(int sec) {
	sizeBlocs = sec;
}


char* Recognize::extractAudioFromVideo() {

	string command = PROGRAM;
	command.append(" -i ").
	append((string) pathVideo).
	append(" -ar ");
	
	string strFreq;
	std::stringstream sstr;
	sstr << frequency;
	strFreq = sstr.str();

	//command.append(strFreq).append(" -ac 1 -f wav ").append(PATH_AUDIO_ORIGIN).append(" &");
	command.append(strFreq).
	append(" -ac 1 -f wav ").
	append(PATH_GTAAAS_WEB).
	append(id).
	append(PATH_AUDIO_ORIGIN).

	append(" -v quiet");

	/*string tmp = "echo ";
	tmp.append(PATH_AUDIO_ORIGIN).append(" >> ").append(FILENAME_AUDIOLIST);
	system(tmp.c_str());*/

	system(command.c_str());

}


int Recognize::getTimeMediaSec() {
	
	string command = PROGRAM;
	command.append(" -i ").append(PATH_GTAAAS_WEB).append(id).append(PATH_AUDIO_ORIGIN);
	command.append(" 2>&1 | grep Duration >> outfile");
	system(command.c_str());
	
	ifstream in("outfile");
	if (!in) return -1;
	std::string line;
	in >> line >> line;

	char* timetok;
	timetok = strtok((char*)line.c_str(), " :,.");
	int seconds = 0;
	
	seconds += (atoi(timetok) * 60 * 60); // hora
	timetok = strtok(NULL, " :,.");	
	seconds += (atoi(timetok) * 60); // min
	timetok = strtok(NULL, " :,.");
	seconds += atoi(timetok); // seg
	system("rm outfile");
	
	return seconds;

}

void Recognize::breakVideoParts(int timeTotal) {

	string ss_str, t_str, command, aplist;
	int count = 1;
	int ss = 0;
	int t = 0;
	bool consume = true;

	string filename= FILENAME_AUDIOPART;
	char tmp [filename.length()];
	sprintf(tmp, "%i", ss);
	ss_str = tmp;

	sprintf(tmp, "%i", sizeBlocs);
	t_str = tmp;

	if (timeTotal < sizeBlocs)
		sizeBlocs = timeTotal;

	do {
		ss = t;
		if(timeTotal >= sizeBlocs && (timeTotal - sizeBlocs) > (sizeBlocs/2)) {			
			t += sizeBlocs;
			timeTotal -= sizeBlocs;
		} else {
			t += timeTotal;
			timeTotal = 0;
			consume = false;
			sprintf(tmp, "%i", t);
			t_str = tmp;
		}
		sprintf(tmp, "%i", ss);
		ss_str = tmp;

		command = "sox ";
		command.append(PATH_GTAAAS_WEB).append(id).append(PATH_AUDIO_ORIGIN).append(" ").append(PATH_GTAAAS_WEB).append(id).append(PATH_AUDIO_PARTS);
		sprintf(tmp, "%i", count++);
		filename.append(tmp).append(".wav");
		command.append(filename).append(" trim ").append(ss_str).append(" ").append(t_str);

		system(command.c_str());

		string apcomm = "echo ";
		apcomm.append(PATH_GTAAAS_WEB).append(id).append(PATH_AUDIO_PARTS).append(filename).append(" >> ").append(PATH_GTAAAS_WEB).append(id).append(FILENAME_AUDIOLIST);
		system(apcomm.c_str());		

		filename = FILENAME_AUDIOPART;
		aplist = "";

	} while (consume);

}


void Recognize::executeJuliusEngine() {	

	string command, type, freqStr;
	char cfreq[10];

	command = "julius -C gtaaas_user/gtaaas/recognize/src/julius.jconf -input ";
	if (inputType == 1) {
		type = "rawfile";
		command.append(type).append(" -filelist ").append(PATH_GTAAAS_WEB).append(id).append(FILENAME_AUDIOLIST);
	} else { 
		type = "mic";
	}	
	sprintf(cfreq, "%i", frequency);
	command.append(" -smpFreq ").
	append(cfreq).
	append(" -nolog >> ").
	append(PATH_GTAAAS_WEB).append(id).
	append(FILENAME_RECOGNIZED_OUT);

	printf("\n\nCommand for executeJuliusEngine: %s\n", command.c_str());
	system(command.c_str());	

}

void Recognize::confidenceJulius() {
	
	string command = "cat ";
	command.append(PATH_GTAAAS_WEB).append(id).append(FILENAME_RECOGNIZED_OUT).append(" | grep ").
			append(FIND_CONFIDENCE).append(" >> ").append(PATH_GTAAAS_WEB).append(id).append(FILENAME_CONFIDENCEOUT);

	system(command.c_str());
	printf("\n\n---> command: %s\n\n", command.c_str());

	string path;
	path.append(PATH_GTAAAS_WEB).append(id).append(FILENAME_CONFIDENCEOUT);
	ifstream in(path.c_str());	

	if (!in) {
		perror("Error: ");
	} else {		
		string line;
		float tmp;
		avgScores = 0;
		do {
			getline(in, line);
		    std::istringstream buf(line);
			if (line.length() > 0) {
	    		istream_iterator<std::string> beg(buf), end;
	    		vector<string> tokens(beg, end);
    			int i;
         		for(i=2; i < tokens.size()-1; i++){
         			istringstream(tokens[i]) >> tmp;
         			avgScores += tmp;
         			sizeScores++;
         		}
			}
		} while (!in.eof());
		in.close();
		avgScores /= sizeScores;
	}

	cout << "Média: " << avgScores << endl;

	if (avgScores < CONFIDENCE_RATE){
		finished = true;
		cleanFiles();
        throw RecognizeException("O vídeo selecionado tem baixa qualidade. Tente novamente com outro vídeo.");	
	}
}


std::list<char*>* Recognize::filterOutputJulius() {

	std::list<char*> *sentences;
	sentences = new std::list<char*>();
	
	string command = "cat ";
	command.append(PATH_GTAAAS_WEB).append(id).append(FILENAME_RECOGNIZED_OUT).append(" | grep ").
			append(FIND_SENTENCE).append(" >> ").append(PATH_GTAAAS_WEB).append(id).append(FILENAME_FILTEROUT);

	system(command.c_str());
	printf("\n\n---> command: %s\n\n", command.c_str());

	int count_lines = 0;

	string path;
	path.append(PATH_GTAAAS_WEB).append(id).append(FILENAME_CONFIDENCEOUT);
	ifstream in(path.c_str());	
	string strFilter;

	if (!in) {
		perror("Error: ");
	} else {		
		string line;
		int sizeLine;
		char* sentence_ptr;
		do {
			getline(in, line);
			if (line.length() > 0) {
				sizeLine = (int)line.length();
				strFilter = line.substr(strlen(FIND_SENTENCE), sizeLine);
				sentence_ptr = new char[strFilter.length()+1];
				strcpy(sentence_ptr, (char*) strFilter.c_str());
				sentences->push_back(sentence_ptr);
			}
		} while (!in.eof());
		in.close();
	}

	/*char* ptr_strFilter;
	ptr_strFilter = (char*) malloc (strFilter.length()+1);	
	strcpy(ptr_strFilter, (char*) strFilter.c_str());*/

	return sentences;

}


void Recognize::notifyListeners(char* text) {

	int64_t pts = PTS_PATTERN;
	for(list<RecognizeListener*>::iterator it = listeners->begin(); it != listeners->end(); it++){
		(*it)->notifyTextRecognized((unsigned char*) text, pts);
	}

}


void Recognize::addListener(RecognizeListener* listener) {
	listeners->push_back(listener);
}


bool Recognize::isFinished() {
	return finished;
}


void Recognize::cleanFiles() {

	string command = "rm -r ";
    command.append(PATH_GTAAAS_WEB).append(id).append("/audio");
    system(command.c_str());
}

void Recognize::createDir(){
	string command = "mkdir ";
	command.append(PATH_GTAAAS_WEB).append(id).append("/audio").append(" && mkdir ").
	append(PATH_GTAAAS_WEB).append(id).append("/audio/parts").append(" && mkdir ").
	append(PATH_GTAAAS_WEB).append(id).append("/audio/origin");
	printf("%s\n", command.c_str());
	system(command.c_str());
}