extratorSRT.cpp 7.04 KB
#include "extratorSRT.h"
#include <iostream>

ExtratorSRT::ExtratorSRT(){
    listeners = new list<ListenerSub*>();
    finish = false;
    seek_pos = 0;

    // Used to watch a sequence of the subtitles during the extraction
    index_counter = 1; 
    
    hasNextSub = true;
    PRINTL(util::_DEBUG, "ExtratorSTR Done!\n");
}

ExtratorSRT::~ExtratorSRT() {
    listeners->clear();
    delete listeners;
    PRINTL(util::_DEBUG, "ExtratorSTR finalized!\n");
}

void ExtratorSRT::addListener(ListenerSub* listener){
    listeners->push_back(listener);
}

void ExtratorSRT::notifyListeners(unsigned char* subtitle, int64_t pts) {
    for(list<ListenerSub*>::iterator it = listeners->begin(); it != listeners->end(); it++){
        (*it)->notifySubtitle(subtitle, pts);
    }
}

void ExtratorSRT::notifyEndExtraction(int size) {
    PRINTL(util::_DEBUG, "Extrator SRT concluiu a extração: %d legendas.\n", size);
    for(list<ListenerSub*>::iterator it = listeners->begin(); it != listeners->end(); it++){
        (*it)->notifyEnd(size);
    }
}

void ExtratorSRT::encodingfiletoUTF8() {
    string recmd = "recode ";
    recmd.append("$(file --mime-encoding -b ")
    .append(this->filePath).append(")..utf-8 ")
    .append(this->filePath);
    system(recmd.c_str());

    string sedcmd = "sed -i 's/\r$//' ";
    sedcmd.append(this->filePath);
    system(sedcmd.c_str());
    

}

void ExtratorSRT::setFilePath(char* path) {
    this->filePath = path;
    encodingfiletoUTF8();
}

void ExtratorSRT::initialize(){

    ifs_.open(this->filePath, std::ifstream::in);

    if(ifs_.is_open() && ifs_.good()){
        this->Start();
    } else {
        finish = true;
        Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado.");
        throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n");
    }

}

bool ExtratorSRT::isFinished(){
    return finish;
}

bool ExtratorSRT::hasNextSubtitle() {
    return hasNextSub;
}

void ExtratorSRT::Run(){
    PRINTL(util::_INFO, "Extraindo Legendas...\n");
    int sub_index = 0;
    string sub_text = "";
    while(hasNextSub){
        try{
            subtitle = next();
        }catch(ExtratorException ex){
          break;
        }
        sub_text = subtitle->getSubtitleText();
        notifyListeners((unsigned char*)sub_text.c_str(), calcula_pts((double) subtitle->getTimeIn()));
        sub_index++;
        free(subtitle);
    }
    if(sub_index == 0)
        notifyListeners((unsigned char*)"ARQUIVO_INVALIDO", 0);

    finish = true;
    notifyEndExtraction(sub_index);
}

Subtitle* ExtratorSRT::next() {

    static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}");

    unsigned int section_id;
    uint64_t section_time_in;
    uint64_t section_time_out;
    string section_text;

    int state = 0;
    int line_num = 0;
    string current_line;

    while (ifs_.good()) {

        getline(ifs_, current_line, '\n');
        line_num++;

        switch (state) {
            case 0: // first state, search id
                if(current_line.empty())
                    break;  // ignore blank lines

                char *p_end;
                section_id = strtol(current_line.c_str(), &p_end, 10);
                if(section_id || current_line.compare(0, 3, "\xEF\xBB\xBF") == 0) {
                    state = 1;  // state 1, find timestamp
                }

                break;

            case 1: // want timestamp
                bool match;
                match = regex_match(current_line, timestamp_regex);

                if(match) {
                    split_timestamp(current_line, &section_time_in, &section_time_out);
                    state = 2;  // state 2, find text
                    break;
                }

                throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num );

            case 2: // Reading subtitle, first line of text
                if(current_line.empty()) { // first line is empty (blank subtitle)
                    state = 4; // state 4 find next subtitle before empty subtitle
                    break;
                }

                section_text = current_line;
                state = 3; // state 3 find second line of text or end of subtitle
                break;

            case 3: // create new subtitle object
                if(current_line.empty()){
                    // std::clog << "[ID] " << section_id << std::endl;
                    // std::clog << "[IN] " << section_time_in << std::endl;
                    // std::clog << "[OUT] " << section_time_out << std::endl;
                    // std::clog << "[TEXT] " << section_text << "\n" <<std::endl;
                    Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out);
                    return sub;
                }

                section_text += " ";
                section_text += current_line;
                break;

            case 4:
                if(current_line.empty()) {
                    state = 0;
                    break;  // ignore blank lines
                }

                char *pend;
                section_id = strtol(current_line.c_str(), &p_end, 10);
                if(section_id) {
                    state = 1;  // find timestamp
                    break;
                }

            default:
                throw ExtratorException("SRT parser: Cannot parse file");

        } // switch
    } // while

    throw ExtratorException("EOF");
}

string ExtratorSRT::formatText(string line){
    int lessThanPos;
    int moreThanPos;
    string f_line = line;

    lessThanPos = f_line.find_first_of(LESS_THAN); //pega a posição do simbolo '<'
    moreThanPos = f_line.find_first_of(MORE_THAN); //pega a posição do simbolo '>'

    while(lessThanPos != string::npos && moreThanPos != string::npos){
        f_line = f_line.erase(lessThanPos, moreThanPos - (lessThanPos-1)); //remove o trecho '<string>'
        lessThanPos = f_line.find_first_of(LESS_THAN);
        moreThanPos = f_line.find_first_of(MORE_THAN);
    }

    return f_line;
}

int64_t ExtratorSRT::str_to_time(string str_time) {

        int64_t ttime = 0;
        char* tokens = new char[4]; // hh, mm, ss, ms
        strcpy(tokens, (char*)str_time.c_str());

        int index = 0;
        int values [4]; // hh, mm, ss, ms
        char * str = strtok(tokens, ":,");
        while (str != NULL) {
            values[index] = atoi(str);
            str = strtok(NULL, ":,");
            index++;
        }
        delete(tokens);

        /* calculate time */
        ttime = /*hour to sec*/((((values[0] * 60) * 60) +
            /*min to sec*/(values[1] * 60) +/*sec*/values[2])*1000) + values[3];

        return ttime;

}

void ExtratorSRT::split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out) {

  std::size_t pos = timestamp.find(" --> ");
  *time_in  = str_to_time(timestamp.substr(0, pos)); // left -->
  *time_out = str_to_time(timestamp.substr(pos + 5)); // rigth -->
}

uint64_t ExtratorSRT::calcula_pts(double msec) {
    return (uint64_t)msec;
}