Commit 0918b85b15dbfcd6512926e97644230d0e6a1366
1 parent
f616fdfd
Exists in
devel
ExtratorSRT corrections
Showing
4 changed files
with
111 additions
and
84 deletions
Show diff stats
extrator/src/extratorSRT.cpp
| 1 | #include "extratorSRT.h" | 1 | #include "extratorSRT.h" |
| 2 | +#include <iostream> | ||
| 2 | 3 | ||
| 3 | ExtratorSRT::ExtratorSRT(){ | 4 | ExtratorSRT::ExtratorSRT(){ |
| 4 | listeners = new list<ListenerSub*>(); | 5 | listeners = new list<ListenerSub*>(); |
| @@ -43,9 +44,10 @@ void ExtratorSRT::encodingfiletoUTF8() { | @@ -43,9 +44,10 @@ void ExtratorSRT::encodingfiletoUTF8() { | ||
| 43 | .append(this->filePath); | 44 | .append(this->filePath); |
| 44 | system(recmd.c_str()); | 45 | system(recmd.c_str()); |
| 45 | 46 | ||
| 46 | - string sedcmd = "sed -i 's/\r$//;$d' "; | 47 | + string sedcmd = "sed -i 's/\r$//' "; |
| 47 | sedcmd.append(this->filePath); | 48 | sedcmd.append(this->filePath); |
| 48 | system(sedcmd.c_str()); | 49 | system(sedcmd.c_str()); |
| 50 | + | ||
| 49 | 51 | ||
| 50 | } | 52 | } |
| 51 | 53 | ||
| @@ -56,16 +58,15 @@ void ExtratorSRT::setFilePath(char* path) { | @@ -56,16 +58,15 @@ void ExtratorSRT::setFilePath(char* path) { | ||
| 56 | 58 | ||
| 57 | void ExtratorSRT::initialize(){ | 59 | void ExtratorSRT::initialize(){ |
| 58 | 60 | ||
| 59 | - file = new lavidlib::File(this->filePath); | 61 | + ifs_.open(this->filePath, std::ifstream::in); |
| 60 | 62 | ||
| 61 | - try{ | ||
| 62 | - file_io = new lavidlib::FileIO(file->getPath(), FileIO::MODE_READ); | ||
| 63 | - }catch(Exception &ex){ | ||
| 64 | - finish = true; | ||
| 65 | - Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado."); | ||
| 66 | - throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n"); | ||
| 67 | - } | ||
| 68 | - this->Start(); | 63 | + if(ifs_.is_open() && ifs_.good()){ |
| 64 | + this->Start(); | ||
| 65 | + } else { | ||
| 66 | + finish = true; | ||
| 67 | + Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado."); | ||
| 68 | + throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n"); | ||
| 69 | + } | ||
| 69 | 70 | ||
| 70 | } | 71 | } |
| 71 | 72 | ||
| @@ -81,7 +82,7 @@ void ExtratorSRT::Run(){ | @@ -81,7 +82,7 @@ void ExtratorSRT::Run(){ | ||
| 81 | PRINTL(util::_INFO, "Extraindo Legendas...\n"); | 82 | PRINTL(util::_INFO, "Extraindo Legendas...\n"); |
| 82 | int sub_index = 0; | 83 | int sub_index = 0; |
| 83 | string sub_text = ""; | 84 | string sub_text = ""; |
| 84 | - while(hasNextSubtitle()){ | 85 | + while(hasNextSub){ |
| 85 | try{ | 86 | try{ |
| 86 | subtitle = next(); | 87 | subtitle = next(); |
| 87 | }catch(ExtratorException ex){ | 88 | }catch(ExtratorException ex){ |
| @@ -101,78 +102,88 @@ void ExtratorSRT::Run(){ | @@ -101,78 +102,88 @@ void ExtratorSRT::Run(){ | ||
| 101 | 102 | ||
| 102 | Subtitle* ExtratorSRT::next() { | 103 | Subtitle* ExtratorSRT::next() { |
| 103 | 104 | ||
| 104 | - file_io->seek(seek_pos); | ||
| 105 | - try{ | ||
| 106 | - bff_reader = new BufferedReader(file_io); | ||
| 107 | - }catch(Exception &ex){ | ||
| 108 | - Logging::instance()->writeLog("extratorSRT.cpp <Error>: BufferedReader não inicializado."); | ||
| 109 | - throw ExtratorException("O BufferedReader não foi inicializado."); | 105 | + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); |
| 106 | + | ||
| 107 | + unsigned int section_id; | ||
| 108 | + uint64_t section_time_in; | ||
| 109 | + uint64_t section_time_out; | ||
| 110 | + string section_text; | ||
| 111 | + | ||
| 112 | + int state = 0; | ||
| 113 | + int line_num = 0; | ||
| 114 | + string current_line; | ||
| 115 | + | ||
| 116 | + while (ifs_.good()) { | ||
| 117 | + | ||
| 118 | + getline(ifs_, current_line, '\n'); | ||
| 119 | + line_num++; | ||
| 120 | + | ||
| 121 | + switch (state) { | ||
| 122 | + case 0: // first state, search id | ||
| 123 | + if(current_line.empty()) | ||
| 124 | + break; // ignore blank lines | ||
| 125 | + | ||
| 126 | + char *p_end; | ||
| 127 | + section_id = strtol(current_line.c_str(), &p_end, 10); | ||
| 128 | + if(section_id) { | ||
| 129 | + state = 1; // state 1, find timestamp | ||
| 110 | } | 130 | } |
| 111 | 131 | ||
| 112 | - Subtitle* sub = new Subtitle(); | ||
| 113 | - string line = ""; | ||
| 114 | - string text_sub = ""; | ||
| 115 | - | ||
| 116 | - try { | ||
| 117 | - // ID | ||
| 118 | - int id_sub = -1; | ||
| 119 | - | ||
| 120 | - // Case exists some blank lines before index, read until find | ||
| 121 | - do { | ||
| 122 | - line = bff_reader->readLine(); | ||
| 123 | - printf("%s\n", line.c_str()); | ||
| 124 | - seek_pos++; | ||
| 125 | - } while (line.size() < 1); | ||
| 126 | - | ||
| 127 | - //seek_pos += (int64_t) line.size() + SIZE_CSCAPE; | ||
| 128 | - id_sub = atoi(line.c_str()); | ||
| 129 | - if (id_sub != index_counter) | ||
| 130 | - { | ||
| 131 | - PRINTL(util::_DEBUG, "[Error] The SRT file is bad formmated: indexes is not continuous.\n"); | ||
| 132 | - exit (1); | ||
| 133 | - } | ||
| 134 | - sub->setID(id_sub); | ||
| 135 | - | ||
| 136 | - // TimeIn and TimeOut | ||
| 137 | - int64_t t_in = 0, t_out = 0; | ||
| 138 | - line = bff_reader->readLine(); | ||
| 139 | - seek_pos += (int64_t) line.size() + SIZE_CSCAPE; | ||
| 140 | - | ||
| 141 | - int target_pos = line.find(TARGET_TIME); | ||
| 142 | - t_in = str_to_time(line.substr(0, target_pos)); | ||
| 143 | - sub->setTimeIn(t_in); | ||
| 144 | - t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size())); | ||
| 145 | - sub->setTimeOut(t_out); | ||
| 146 | - | ||
| 147 | - // Read until find an empty line | ||
| 148 | - while ((line = bff_reader->readLine()).size() > 0) { | ||
| 149 | - text_sub += line; | ||
| 150 | - text_sub.append(" "); | ||
| 151 | - } | ||
| 152 | - printf("size of text: %d\n", text_sub.size()); | ||
| 153 | - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; | ||
| 154 | - | ||
| 155 | - } catch (lavidlib::EOFException &ex) { | ||
| 156 | - | ||
| 157 | - if(text_sub == "") | ||
| 158 | - sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório | ||
| 159 | - | ||
| 160 | - sub->setSubtitleText(formatText(text_sub)); | ||
| 161 | - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; | ||
| 162 | - | ||
| 163 | - hasNextSub = false; | ||
| 164 | - delete(bff_reader); | ||
| 165 | - return sub; | 132 | + break; |
| 133 | + | ||
| 134 | + case 1: // want timestamp | ||
| 135 | + bool match; | ||
| 136 | + match = regex_match(current_line, timestamp_regex); | ||
| 137 | + | ||
| 138 | + if(match) { | ||
| 139 | + split_timestamp(current_line, §ion_time_in, §ion_time_out); | ||
| 140 | + state = 2; // state 2, find text | ||
| 141 | + break; | ||
| 166 | } | 142 | } |
| 167 | - index_counter++; | ||
| 168 | 143 | ||
| 169 | - sub->setSubtitleText(formatText(text_sub)); | ||
| 170 | - | ||
| 171 | - printf("%s\n\n", sub->toString().c_str()); | 144 | + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); |
| 172 | 145 | ||
| 173 | - delete(bff_reader); | ||
| 174 | - return sub; | ||
| 175 | - } | 146 | + case 2: // Reading subtitle, first line of text |
| 147 | + if(current_line.empty()) { // first line is empty (blank subtitle) | ||
| 148 | + state = 4; // state 4 find next subtitle before empty subtitle | ||
| 149 | + break; | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + section_text = current_line; | ||
| 153 | + state = 3; // state 3 find second line of text or end of subtitle | ||
| 154 | + break; | ||
| 155 | + | ||
| 156 | + case 3: // create new subtitle object | ||
| 157 | + if(current_line.empty()){ | ||
| 158 | + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | ||
| 159 | + return sub; | ||
| 160 | + } | ||
| 161 | + | ||
| 162 | + section_text += current_line; | ||
| 163 | + break; | ||
| 164 | + | ||
| 165 | + case 4: | ||
| 166 | + if(current_line.empty()) { | ||
| 167 | + state = 0; | ||
| 168 | + break; // ignore blank lines | ||
| 169 | + } | ||
| 170 | + | ||
| 171 | + char *pend; | ||
| 172 | + section_id = strtol(current_line.c_str(), &p_end, 10); | ||
| 173 | + if(section_id) { | ||
| 174 | + state = 1; // find timestamp | ||
| 175 | + break; | ||
| 176 | + } | ||
| 177 | + | ||
| 178 | + default: | ||
| 179 | + throw ExtratorException("SRT parser: Cannot parse file"); | ||
| 180 | + | ||
| 181 | + } // switch | ||
| 182 | + } | ||
| 183 | + | ||
| 184 | + throw ExtratorException("EOF"); | ||
| 185 | + | ||
| 186 | +} | ||
| 176 | 187 | ||
| 177 | string ExtratorSRT::formatText(string line){ | 188 | string ExtratorSRT::formatText(string line){ |
| 178 | int lessThanPos; | 189 | int lessThanPos; |
| @@ -213,7 +224,14 @@ int64_t ExtratorSRT::str_to_time(string str_time) { | @@ -213,7 +224,14 @@ int64_t ExtratorSRT::str_to_time(string str_time) { | ||
| 213 | 224 | ||
| 214 | return ttime; | 225 | return ttime; |
| 215 | 226 | ||
| 216 | - } | 227 | +} |
| 228 | + | ||
| 229 | +void ExtratorSRT::split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out) { | ||
| 230 | + | ||
| 231 | + std::size_t pos = timestamp.find(" --> "); | ||
| 232 | + *time_in = str_to_time(timestamp.substr(0, pos)); // left --> | ||
| 233 | + *time_out = str_to_time(timestamp.substr(pos + 5)); // rigth --> | ||
| 234 | +} | ||
| 217 | 235 | ||
| 218 | uint64_t ExtratorSRT::calcula_pts(double msec) { | 236 | uint64_t ExtratorSRT::calcula_pts(double msec) { |
| 219 | return (uint64_t)msec; | 237 | return (uint64_t)msec; |
extrator/src/include/extrator.h
| @@ -8,6 +8,8 @@ | @@ -8,6 +8,8 @@ | ||
| 8 | #ifndef EXTRATOR_H | 8 | #ifndef EXTRATOR_H |
| 9 | #define EXTRATOR_H | 9 | #define EXTRATOR_H |
| 10 | 10 | ||
| 11 | + | ||
| 12 | +#include <regex> | ||
| 11 | #include "logging.h" | 13 | #include "logging.h" |
| 12 | #include <lavidlib/io/File.h> | 14 | #include <lavidlib/io/File.h> |
| 13 | #include <lavidlib/io/FileIO.h> | 15 | #include <lavidlib/io/FileIO.h> |
extrator/src/include/extratorSRT.h
| @@ -19,6 +19,10 @@ | @@ -19,6 +19,10 @@ | ||
| 19 | #include "listenerSub.h" | 19 | #include "listenerSub.h" |
| 20 | #include "extratorException.h" | 20 | #include "extratorException.h" |
| 21 | 21 | ||
| 22 | +#include <algorithm> | ||
| 23 | +#include <fstream> | ||
| 24 | +#include <sstream> | ||
| 25 | + | ||
| 22 | #define SIZE_CSCAPE 1 | 26 | #define SIZE_CSCAPE 1 |
| 23 | #define TARGET_TIME "-->" | 27 | #define TARGET_TIME "-->" |
| 24 | #define LESS_THAN "<" | 28 | #define LESS_THAN "<" |
| @@ -101,6 +105,8 @@ public: | @@ -101,6 +105,8 @@ public: | ||
| 101 | private: | 105 | private: |
| 102 | list<ListenerSub*> *listeners; | 106 | list<ListenerSub*> *listeners; |
| 103 | 107 | ||
| 108 | + ifstream ifs_; | ||
| 109 | + | ||
| 104 | Subtitle *subtitle; | 110 | Subtitle *subtitle; |
| 105 | int64_t seek_pos; | 111 | int64_t seek_pos; |
| 106 | bool hasNextSub; | 112 | bool hasNextSub; |
| @@ -109,7 +115,8 @@ private: | @@ -109,7 +115,8 @@ private: | ||
| 109 | void encodingfiletoUTF8(); | 115 | void encodingfiletoUTF8(); |
| 110 | string formatText(string line); | 116 | string formatText(string line); |
| 111 | uint64_t calcula_pts(double msec); | 117 | uint64_t calcula_pts(double msec); |
| 112 | - int64_t str_to_time(std::string str_time); | 118 | + int64_t str_to_time(string str_time); |
| 119 | + void split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out); | ||
| 113 | }; | 120 | }; |
| 114 | 121 | ||
| 115 | #endif /* EXTRATORSRT_H */ | 122 | #endif /* EXTRATORSRT_H */ |
util/src/include/logging.h
| @@ -65,16 +65,16 @@ namespace util { | @@ -65,16 +65,16 @@ namespace util { | ||
| 65 | if(level <= llevel){ \ | 65 | if(level <= llevel){ \ |
| 66 | switch(level){ \ | 66 | switch(level){ \ |
| 67 | case _DEBUG: \ | 67 | case _DEBUG: \ |
| 68 | - fprintf(stdout, _DEBUG_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | 68 | + fprintf(stdout, _DEBUG_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ |
| 69 | break; \ | 69 | break; \ |
| 70 | case _INFO: \ | 70 | case _INFO: \ |
| 71 | - fprintf(stdout, _INFO_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | 71 | + fprintf(stdout, _INFO_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ |
| 72 | break; \ | 72 | break; \ |
| 73 | case _WARNING: \ | 73 | case _WARNING: \ |
| 74 | - fprintf(stdout, _WARN_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | 74 | + fprintf(stdout, _WARN_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ |
| 75 | break; \ | 75 | break; \ |
| 76 | case _ERROR: \ | 76 | case _ERROR: \ |
| 77 | - fprintf(stderr, _ERROR_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ | 77 | + fprintf(stderr, _ERROR_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ |
| 78 | break; \ | 78 | break; \ |
| 79 | } \ | 79 | } \ |
| 80 | } \ | 80 | } \ |