diff --git a/extrator/src/extratorSRT.cpp b/extrator/src/extratorSRT.cpp index 1bb5d58..0fdc680 100644 --- a/extrator/src/extratorSRT.cpp +++ b/extrator/src/extratorSRT.cpp @@ -102,87 +102,90 @@ void ExtratorSRT::Run(){ Subtitle* ExtratorSRT::next() { - static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); - - unsigned int section_id; - uint64_t section_time_in; - uint64_t section_time_out; - string section_text; - - int state = 0; - int line_num = 0; - string current_line; - - while (ifs_.good()) { - - getline(ifs_, current_line, '\n'); - line_num++; - - switch (state) { - case 0: // first state, search id - if(current_line.empty()) - break; // ignore blank lines - - char *p_end; - section_id = strtol(current_line.c_str(), &p_end, 10); - if(section_id) { - state = 1; // state 1, find timestamp - } - - break; - - case 1: // want timestamp - bool match; - match = regex_match(current_line, timestamp_regex); - - if(match) { - split_timestamp(current_line, §ion_time_in, §ion_time_out); - state = 2; // state 2, find text - break; - } - - throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); - - case 2: // Reading subtitle, first line of text - if(current_line.empty()) { // first line is empty (blank subtitle) - state = 4; // state 4 find next subtitle before empty subtitle - break; - } - - section_text = current_line; - state = 3; // state 3 find second line of text or end of subtitle - break; - - case 3: // create new subtitle object - if(current_line.empty()){ - Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); - return sub; - } - - section_text += current_line; - break; - - case 4: - if(current_line.empty()) { - state = 0; - break; // ignore blank lines - } - - char *pend; - section_id = strtol(current_line.c_str(), &p_end, 10); - if(section_id) { - state = 1; // find timestamp - break; - } - - default: - throw ExtratorException("SRT parser: Cannot parse file"); - - } // switch - } - - throw ExtratorException("EOF"); - + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); + + unsigned int section_id; + uint64_t section_time_in; + uint64_t section_time_out; + string section_text; + + int state = 0; + int line_num = 0; + string current_line; + + while (ifs_.good()) { + + getline(ifs_, current_line, '\n'); + line_num++; + + switch (state) { + case 0: // first state, search id + if(current_line.empty()) + break; // ignore blank lines + + char *p_end; + section_id = strtol(current_line.c_str(), &p_end, 10); + if(section_id || current_line.compare(0, 3, "\xEF\xBB\xBF") == 0) { + state = 1; // state 1, find timestamp + } + + break; + + case 1: // want timestamp + bool match; + match = regex_match(current_line, timestamp_regex); + + if(match) { + split_timestamp(current_line, §ion_time_in, §ion_time_out); + state = 2; // state 2, find text + break; + } + + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); + + case 2: // Reading subtitle, first line of text + if(current_line.empty()) { // first line is empty (blank subtitle) + state = 4; // state 4 find next subtitle before empty subtitle + break; + } + + section_text = current_line; + state = 3; // state 3 find second line of text or end of subtitle + break; + + case 3: // create new subtitle object + if(current_line.empty()){ + // std::clog << "[ID] " << section_id << std::endl; + // std::clog << "[IN] " << section_time_in << std::endl; + // std::clog << "[OUT] " << section_time_out << std::endl; + // std::clog << "[TEXT] " << section_text << "\n" <