From 0918b85b15dbfcd6512926e97644230d0e6a1366 Mon Sep 17 00:00:00 2001 From: Wesnydy Ribeiro Date: Thu, 30 Jun 2016 15:51:35 -0300 Subject: [PATCH] ExtratorSRT corrections --- extrator/src/extratorSRT.cpp | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------- extrator/src/include/extrator.h | 2 ++ extrator/src/include/extratorSRT.h | 9 ++++++++- util/src/include/logging.h | 8 ++++---- 4 files changed, 111 insertions(+), 84 deletions(-) diff --git a/extrator/src/extratorSRT.cpp b/extrator/src/extratorSRT.cpp index 4fa45a4..1bb5d58 100644 --- a/extrator/src/extratorSRT.cpp +++ b/extrator/src/extratorSRT.cpp @@ -1,4 +1,5 @@ #include "extratorSRT.h" +#include ExtratorSRT::ExtratorSRT(){ listeners = new list(); @@ -43,9 +44,10 @@ void ExtratorSRT::encodingfiletoUTF8() { .append(this->filePath); system(recmd.c_str()); - string sedcmd = "sed -i 's/\r$//;$d' "; + string sedcmd = "sed -i 's/\r$//' "; sedcmd.append(this->filePath); system(sedcmd.c_str()); + } @@ -56,16 +58,15 @@ void ExtratorSRT::setFilePath(char* path) { void ExtratorSRT::initialize(){ - file = new lavidlib::File(this->filePath); + ifs_.open(this->filePath, std::ifstream::in); - try{ - file_io = new lavidlib::FileIO(file->getPath(), FileIO::MODE_READ); - }catch(Exception &ex){ - finish = true; - Logging::instance()->writeLog("extratorSRT.cpp : Arquivo de legenda não encontrado."); - throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n"); - } - this->Start(); + if(ifs_.is_open() && ifs_.good()){ + this->Start(); + } else { + finish = true; + Logging::instance()->writeLog("extratorSRT.cpp : Arquivo de legenda não encontrado."); + throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n"); + } } @@ -81,7 +82,7 @@ void ExtratorSRT::Run(){ PRINTL(util::_INFO, "Extraindo Legendas...\n"); int sub_index = 0; string sub_text = ""; - while(hasNextSubtitle()){ + while(hasNextSub){ try{ subtitle = next(); }catch(ExtratorException ex){ @@ -101,78 +102,88 @@ void ExtratorSRT::Run(){ Subtitle* ExtratorSRT::next() { - file_io->seek(seek_pos); - try{ - bff_reader = new BufferedReader(file_io); - }catch(Exception &ex){ - Logging::instance()->writeLog("extratorSRT.cpp : BufferedReader não inicializado."); - throw ExtratorException("O BufferedReader não foi inicializado."); + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); + + unsigned int section_id; + uint64_t section_time_in; + uint64_t section_time_out; + string section_text; + + int state = 0; + int line_num = 0; + string current_line; + + while (ifs_.good()) { + + getline(ifs_, current_line, '\n'); + line_num++; + + switch (state) { + case 0: // first state, search id + if(current_line.empty()) + break; // ignore blank lines + + char *p_end; + section_id = strtol(current_line.c_str(), &p_end, 10); + if(section_id) { + state = 1; // state 1, find timestamp } - Subtitle* sub = new Subtitle(); - string line = ""; - string text_sub = ""; - - try { - // ID - int id_sub = -1; - - // Case exists some blank lines before index, read until find - do { - line = bff_reader->readLine(); - printf("%s\n", line.c_str()); - seek_pos++; - } while (line.size() < 1); - - //seek_pos += (int64_t) line.size() + SIZE_CSCAPE; - id_sub = atoi(line.c_str()); - if (id_sub != index_counter) - { - PRINTL(util::_DEBUG, "[Error] The SRT file is bad formmated: indexes is not continuous.\n"); - exit (1); - } - sub->setID(id_sub); - - // TimeIn and TimeOut - int64_t t_in = 0, t_out = 0; - line = bff_reader->readLine(); - seek_pos += (int64_t) line.size() + SIZE_CSCAPE; - - int target_pos = line.find(TARGET_TIME); - t_in = str_to_time(line.substr(0, target_pos)); - sub->setTimeIn(t_in); - t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size())); - sub->setTimeOut(t_out); - - // Read until find an empty line - while ((line = bff_reader->readLine()).size() > 0) { - text_sub += line; - text_sub.append(" "); - } - printf("size of text: %d\n", text_sub.size()); - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; - - } catch (lavidlib::EOFException &ex) { - - if(text_sub == "") - sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório - - sub->setSubtitleText(formatText(text_sub)); - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; - - hasNextSub = false; - delete(bff_reader); - return sub; + break; + + case 1: // want timestamp + bool match; + match = regex_match(current_line, timestamp_regex); + + if(match) { + split_timestamp(current_line, §ion_time_in, §ion_time_out); + state = 2; // state 2, find text + break; } - index_counter++; - sub->setSubtitleText(formatText(text_sub)); - - printf("%s\n\n", sub->toString().c_str()); + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); - delete(bff_reader); - return sub; - } + case 2: // Reading subtitle, first line of text + if(current_line.empty()) { // first line is empty (blank subtitle) + state = 4; // state 4 find next subtitle before empty subtitle + break; + } + + section_text = current_line; + state = 3; // state 3 find second line of text or end of subtitle + break; + + case 3: // create new subtitle object + if(current_line.empty()){ + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); + return sub; + } + + section_text += current_line; + break; + + case 4: + if(current_line.empty()) { + state = 0; + break; // ignore blank lines + } + + char *pend; + section_id = strtol(current_line.c_str(), &p_end, 10); + if(section_id) { + state = 1; // find timestamp + break; + } + + default: + throw ExtratorException("SRT parser: Cannot parse file"); + + } // switch + } + + throw ExtratorException("EOF"); + +} string ExtratorSRT::formatText(string line){ int lessThanPos; @@ -213,7 +224,14 @@ int64_t ExtratorSRT::str_to_time(string str_time) { return ttime; - } +} + +void ExtratorSRT::split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out) { + + std::size_t pos = timestamp.find(" --> "); + *time_in = str_to_time(timestamp.substr(0, pos)); // left --> + *time_out = str_to_time(timestamp.substr(pos + 5)); // rigth --> +} uint64_t ExtratorSRT::calcula_pts(double msec) { return (uint64_t)msec; diff --git a/extrator/src/include/extrator.h b/extrator/src/include/extrator.h index 29dd0b2..a7c9380 100644 --- a/extrator/src/include/extrator.h +++ b/extrator/src/include/extrator.h @@ -8,6 +8,8 @@ #ifndef EXTRATOR_H #define EXTRATOR_H + +#include #include "logging.h" #include #include diff --git a/extrator/src/include/extratorSRT.h b/extrator/src/include/extratorSRT.h index 742e5e7..c40407c 100644 --- a/extrator/src/include/extratorSRT.h +++ b/extrator/src/include/extratorSRT.h @@ -19,6 +19,10 @@ #include "listenerSub.h" #include "extratorException.h" +#include +#include +#include + #define SIZE_CSCAPE 1 #define TARGET_TIME "-->" #define LESS_THAN "<" @@ -101,6 +105,8 @@ public: private: list *listeners; + ifstream ifs_; + Subtitle *subtitle; int64_t seek_pos; bool hasNextSub; @@ -109,7 +115,8 @@ private: void encodingfiletoUTF8(); string formatText(string line); uint64_t calcula_pts(double msec); - int64_t str_to_time(std::string str_time); + int64_t str_to_time(string str_time); + void split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out); }; #endif /* EXTRATORSRT_H */ diff --git a/util/src/include/logging.h b/util/src/include/logging.h index 0d363ba..409c38f 100644 --- a/util/src/include/logging.h +++ b/util/src/include/logging.h @@ -65,16 +65,16 @@ namespace util { if(level <= llevel){ \ switch(level){ \ case _DEBUG: \ - fprintf(stdout, _DEBUG_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ + fprintf(stdout, _DEBUG_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ break; \ case _INFO: \ - fprintf(stdout, _INFO_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ + fprintf(stdout, _INFO_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ break; \ case _WARNING: \ - fprintf(stdout, _WARN_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ + fprintf(stdout, _WARN_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ break; \ case _ERROR: \ - fprintf(stderr, _ERROR_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ + fprintf(stderr, _ERROR_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ break; \ } \ } \ -- libgit2 0.21.2