Commit 0918b85b15dbfcd6512926e97644230d0e6a1366
1 parent
f616fdfd
Exists in
devel
ExtratorSRT corrections
Showing
4 changed files
with
111 additions
and
84 deletions
Show diff stats
extrator/src/extratorSRT.cpp
| 1 | 1 | #include "extratorSRT.h" |
| 2 | +#include <iostream> | |
| 2 | 3 | |
| 3 | 4 | ExtratorSRT::ExtratorSRT(){ |
| 4 | 5 | listeners = new list<ListenerSub*>(); |
| ... | ... | @@ -43,9 +44,10 @@ void ExtratorSRT::encodingfiletoUTF8() { |
| 43 | 44 | .append(this->filePath); |
| 44 | 45 | system(recmd.c_str()); |
| 45 | 46 | |
| 46 | - string sedcmd = "sed -i 's/\r$//;$d' "; | |
| 47 | + string sedcmd = "sed -i 's/\r$//' "; | |
| 47 | 48 | sedcmd.append(this->filePath); |
| 48 | 49 | system(sedcmd.c_str()); |
| 50 | + | |
| 49 | 51 | |
| 50 | 52 | } |
| 51 | 53 | |
| ... | ... | @@ -56,16 +58,15 @@ void ExtratorSRT::setFilePath(char* path) { |
| 56 | 58 | |
| 57 | 59 | void ExtratorSRT::initialize(){ |
| 58 | 60 | |
| 59 | - file = new lavidlib::File(this->filePath); | |
| 61 | + ifs_.open(this->filePath, std::ifstream::in); | |
| 60 | 62 | |
| 61 | - try{ | |
| 62 | - file_io = new lavidlib::FileIO(file->getPath(), FileIO::MODE_READ); | |
| 63 | - }catch(Exception &ex){ | |
| 64 | - finish = true; | |
| 65 | - Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado."); | |
| 66 | - throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n"); | |
| 67 | - } | |
| 68 | - this->Start(); | |
| 63 | + if(ifs_.is_open() && ifs_.good()){ | |
| 64 | + this->Start(); | |
| 65 | + } else { | |
| 66 | + finish = true; | |
| 67 | + Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado."); | |
| 68 | + throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n"); | |
| 69 | + } | |
| 69 | 70 | |
| 70 | 71 | } |
| 71 | 72 | |
| ... | ... | @@ -81,7 +82,7 @@ void ExtratorSRT::Run(){ |
| 81 | 82 | PRINTL(util::_INFO, "Extraindo Legendas...\n"); |
| 82 | 83 | int sub_index = 0; |
| 83 | 84 | string sub_text = ""; |
| 84 | - while(hasNextSubtitle()){ | |
| 85 | + while(hasNextSub){ | |
| 85 | 86 | try{ |
| 86 | 87 | subtitle = next(); |
| 87 | 88 | }catch(ExtratorException ex){ |
| ... | ... | @@ -101,78 +102,88 @@ void ExtratorSRT::Run(){ |
| 101 | 102 | |
| 102 | 103 | Subtitle* ExtratorSRT::next() { |
| 103 | 104 | |
| 104 | - file_io->seek(seek_pos); | |
| 105 | - try{ | |
| 106 | - bff_reader = new BufferedReader(file_io); | |
| 107 | - }catch(Exception &ex){ | |
| 108 | - Logging::instance()->writeLog("extratorSRT.cpp <Error>: BufferedReader não inicializado."); | |
| 109 | - throw ExtratorException("O BufferedReader não foi inicializado."); | |
| 105 | + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); | |
| 106 | + | |
| 107 | + unsigned int section_id; | |
| 108 | + uint64_t section_time_in; | |
| 109 | + uint64_t section_time_out; | |
| 110 | + string section_text; | |
| 111 | + | |
| 112 | + int state = 0; | |
| 113 | + int line_num = 0; | |
| 114 | + string current_line; | |
| 115 | + | |
| 116 | + while (ifs_.good()) { | |
| 117 | + | |
| 118 | + getline(ifs_, current_line, '\n'); | |
| 119 | + line_num++; | |
| 120 | + | |
| 121 | + switch (state) { | |
| 122 | + case 0: // first state, search id | |
| 123 | + if(current_line.empty()) | |
| 124 | + break; // ignore blank lines | |
| 125 | + | |
| 126 | + char *p_end; | |
| 127 | + section_id = strtol(current_line.c_str(), &p_end, 10); | |
| 128 | + if(section_id) { | |
| 129 | + state = 1; // state 1, find timestamp | |
| 110 | 130 | } |
| 111 | 131 | |
| 112 | - Subtitle* sub = new Subtitle(); | |
| 113 | - string line = ""; | |
| 114 | - string text_sub = ""; | |
| 115 | - | |
| 116 | - try { | |
| 117 | - // ID | |
| 118 | - int id_sub = -1; | |
| 119 | - | |
| 120 | - // Case exists some blank lines before index, read until find | |
| 121 | - do { | |
| 122 | - line = bff_reader->readLine(); | |
| 123 | - printf("%s\n", line.c_str()); | |
| 124 | - seek_pos++; | |
| 125 | - } while (line.size() < 1); | |
| 126 | - | |
| 127 | - //seek_pos += (int64_t) line.size() + SIZE_CSCAPE; | |
| 128 | - id_sub = atoi(line.c_str()); | |
| 129 | - if (id_sub != index_counter) | |
| 130 | - { | |
| 131 | - PRINTL(util::_DEBUG, "[Error] The SRT file is bad formmated: indexes is not continuous.\n"); | |
| 132 | - exit (1); | |
| 133 | - } | |
| 134 | - sub->setID(id_sub); | |
| 135 | - | |
| 136 | - // TimeIn and TimeOut | |
| 137 | - int64_t t_in = 0, t_out = 0; | |
| 138 | - line = bff_reader->readLine(); | |
| 139 | - seek_pos += (int64_t) line.size() + SIZE_CSCAPE; | |
| 140 | - | |
| 141 | - int target_pos = line.find(TARGET_TIME); | |
| 142 | - t_in = str_to_time(line.substr(0, target_pos)); | |
| 143 | - sub->setTimeIn(t_in); | |
| 144 | - t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size())); | |
| 145 | - sub->setTimeOut(t_out); | |
| 146 | - | |
| 147 | - // Read until find an empty line | |
| 148 | - while ((line = bff_reader->readLine()).size() > 0) { | |
| 149 | - text_sub += line; | |
| 150 | - text_sub.append(" "); | |
| 151 | - } | |
| 152 | - printf("size of text: %d\n", text_sub.size()); | |
| 153 | - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; | |
| 154 | - | |
| 155 | - } catch (lavidlib::EOFException &ex) { | |
| 156 | - | |
| 157 | - if(text_sub == "") | |
| 158 | - sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório | |
| 159 | - | |
| 160 | - sub->setSubtitleText(formatText(text_sub)); | |
| 161 | - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; | |
| 162 | - | |
| 163 | - hasNextSub = false; | |
| 164 | - delete(bff_reader); | |
| 165 | - return sub; | |
| 132 | + break; | |
| 133 | + | |
| 134 | + case 1: // want timestamp | |
| 135 | + bool match; | |
| 136 | + match = regex_match(current_line, timestamp_regex); | |
| 137 | + | |
| 138 | + if(match) { | |
| 139 | + split_timestamp(current_line, §ion_time_in, §ion_time_out); | |
| 140 | + state = 2; // state 2, find text | |
| 141 | + break; | |
| 166 | 142 | } |
| 167 | - index_counter++; | |
| 168 | 143 | |
| 169 | - sub->setSubtitleText(formatText(text_sub)); | |
| 170 | - | |
| 171 | - printf("%s\n\n", sub->toString().c_str()); | |
| 144 | + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); | |
| 172 | 145 | |
| 173 | - delete(bff_reader); | |
| 174 | - return sub; | |
| 175 | - } | |
| 146 | + case 2: // Reading subtitle, first line of text | |
| 147 | + if(current_line.empty()) { // first line is empty (blank subtitle) | |
| 148 | + state = 4; // state 4 find next subtitle before empty subtitle | |
| 149 | + break; | |
| 150 | + } | |
| 151 | + | |
| 152 | + section_text = current_line; | |
| 153 | + state = 3; // state 3 find second line of text or end of subtitle | |
| 154 | + break; | |
| 155 | + | |
| 156 | + case 3: // create new subtitle object | |
| 157 | + if(current_line.empty()){ | |
| 158 | + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | |
| 159 | + return sub; | |
| 160 | + } | |
| 161 | + | |
| 162 | + section_text += current_line; | |
| 163 | + break; | |
| 164 | + | |
| 165 | + case 4: | |
| 166 | + if(current_line.empty()) { | |
| 167 | + state = 0; | |
| 168 | + break; // ignore blank lines | |
| 169 | + } | |
| 170 | + | |
| 171 | + char *pend; | |
| 172 | + section_id = strtol(current_line.c_str(), &p_end, 10); | |
| 173 | + if(section_id) { | |
| 174 | + state = 1; // find timestamp | |
| 175 | + break; | |
| 176 | + } | |
| 177 | + | |
| 178 | + default: | |
| 179 | + throw ExtratorException("SRT parser: Cannot parse file"); | |
| 180 | + | |
| 181 | + } // switch | |
| 182 | + } | |
| 183 | + | |
| 184 | + throw ExtratorException("EOF"); | |
| 185 | + | |
| 186 | +} | |
| 176 | 187 | |
| 177 | 188 | string ExtratorSRT::formatText(string line){ |
| 178 | 189 | int lessThanPos; |
| ... | ... | @@ -213,7 +224,14 @@ int64_t ExtratorSRT::str_to_time(string str_time) { |
| 213 | 224 | |
| 214 | 225 | return ttime; |
| 215 | 226 | |
| 216 | - } | |
| 227 | +} | |
| 228 | + | |
| 229 | +void ExtratorSRT::split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out) { | |
| 230 | + | |
| 231 | + std::size_t pos = timestamp.find(" --> "); | |
| 232 | + *time_in = str_to_time(timestamp.substr(0, pos)); // left --> | |
| 233 | + *time_out = str_to_time(timestamp.substr(pos + 5)); // rigth --> | |
| 234 | +} | |
| 217 | 235 | |
| 218 | 236 | uint64_t ExtratorSRT::calcula_pts(double msec) { |
| 219 | 237 | return (uint64_t)msec; | ... | ... |
extrator/src/include/extrator.h
extrator/src/include/extratorSRT.h
| ... | ... | @@ -19,6 +19,10 @@ |
| 19 | 19 | #include "listenerSub.h" |
| 20 | 20 | #include "extratorException.h" |
| 21 | 21 | |
| 22 | +#include <algorithm> | |
| 23 | +#include <fstream> | |
| 24 | +#include <sstream> | |
| 25 | + | |
| 22 | 26 | #define SIZE_CSCAPE 1 |
| 23 | 27 | #define TARGET_TIME "-->" |
| 24 | 28 | #define LESS_THAN "<" |
| ... | ... | @@ -101,6 +105,8 @@ public: |
| 101 | 105 | private: |
| 102 | 106 | list<ListenerSub*> *listeners; |
| 103 | 107 | |
| 108 | + ifstream ifs_; | |
| 109 | + | |
| 104 | 110 | Subtitle *subtitle; |
| 105 | 111 | int64_t seek_pos; |
| 106 | 112 | bool hasNextSub; |
| ... | ... | @@ -109,7 +115,8 @@ private: |
| 109 | 115 | void encodingfiletoUTF8(); |
| 110 | 116 | string formatText(string line); |
| 111 | 117 | uint64_t calcula_pts(double msec); |
| 112 | - int64_t str_to_time(std::string str_time); | |
| 118 | + int64_t str_to_time(string str_time); | |
| 119 | + void split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out); | |
| 113 | 120 | }; |
| 114 | 121 | |
| 115 | 122 | #endif /* EXTRATORSRT_H */ | ... | ... |
util/src/include/logging.h
| ... | ... | @@ -65,16 +65,16 @@ namespace util { |
| 65 | 65 | if(level <= llevel){ \ |
| 66 | 66 | switch(level){ \ |
| 67 | 67 | case _DEBUG: \ |
| 68 | - fprintf(stdout, _DEBUG_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 68 | + fprintf(stdout, _DEBUG_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 69 | 69 | break; \ |
| 70 | 70 | case _INFO: \ |
| 71 | - fprintf(stdout, _INFO_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 71 | + fprintf(stdout, _INFO_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 72 | 72 | break; \ |
| 73 | 73 | case _WARNING: \ |
| 74 | - fprintf(stdout, _WARN_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 74 | + fprintf(stdout, _WARN_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 75 | 75 | break; \ |
| 76 | 76 | case _ERROR: \ |
| 77 | - fprintf(stderr, _ERROR_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 77 | + fprintf(stderr, _ERROR_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | |
| 78 | 78 | break; \ |
| 79 | 79 | } \ |
| 80 | 80 | } \ | ... | ... |