Commit 0918b85b15dbfcd6512926e97644230d0e6a1366

Authored by Wesnydy Ribeiro
1 parent f616fdfd
Exists in devel

ExtratorSRT corrections

extrator/src/extratorSRT.cpp
1 #include "extratorSRT.h" 1 #include "extratorSRT.h"
  2 +#include <iostream>
2 3
3 ExtratorSRT::ExtratorSRT(){ 4 ExtratorSRT::ExtratorSRT(){
4 listeners = new list<ListenerSub*>(); 5 listeners = new list<ListenerSub*>();
@@ -43,9 +44,10 @@ void ExtratorSRT::encodingfiletoUTF8() { @@ -43,9 +44,10 @@ void ExtratorSRT::encodingfiletoUTF8() {
43 .append(this->filePath); 44 .append(this->filePath);
44 system(recmd.c_str()); 45 system(recmd.c_str());
45 46
46 - string sedcmd = "sed -i 's/\r$//;$d' "; 47 + string sedcmd = "sed -i 's/\r$//' ";
47 sedcmd.append(this->filePath); 48 sedcmd.append(this->filePath);
48 system(sedcmd.c_str()); 49 system(sedcmd.c_str());
  50 +
49 51
50 } 52 }
51 53
@@ -56,16 +58,15 @@ void ExtratorSRT::setFilePath(char* path) { @@ -56,16 +58,15 @@ void ExtratorSRT::setFilePath(char* path) {
56 58
57 void ExtratorSRT::initialize(){ 59 void ExtratorSRT::initialize(){
58 60
59 - file = new lavidlib::File(this->filePath); 61 + ifs_.open(this->filePath, std::ifstream::in);
60 62
61 - try{  
62 - file_io = new lavidlib::FileIO(file->getPath(), FileIO::MODE_READ);  
63 - }catch(Exception &ex){  
64 - finish = true;  
65 - Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado.");  
66 - throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n");  
67 - }  
68 - this->Start(); 63 + if(ifs_.is_open() && ifs_.good()){
  64 + this->Start();
  65 + } else {
  66 + finish = true;
  67 + Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado.");
  68 + throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n");
  69 + }
69 70
70 } 71 }
71 72
@@ -81,7 +82,7 @@ void ExtratorSRT::Run(){ @@ -81,7 +82,7 @@ void ExtratorSRT::Run(){
81 PRINTL(util::_INFO, "Extraindo Legendas...\n"); 82 PRINTL(util::_INFO, "Extraindo Legendas...\n");
82 int sub_index = 0; 83 int sub_index = 0;
83 string sub_text = ""; 84 string sub_text = "";
84 - while(hasNextSubtitle()){ 85 + while(hasNextSub){
85 try{ 86 try{
86 subtitle = next(); 87 subtitle = next();
87 }catch(ExtratorException ex){ 88 }catch(ExtratorException ex){
@@ -101,78 +102,88 @@ void ExtratorSRT::Run(){ @@ -101,78 +102,88 @@ void ExtratorSRT::Run(){
101 102
102 Subtitle* ExtratorSRT::next() { 103 Subtitle* ExtratorSRT::next() {
103 104
104 - file_io->seek(seek_pos);  
105 - try{  
106 - bff_reader = new BufferedReader(file_io);  
107 - }catch(Exception &ex){  
108 - Logging::instance()->writeLog("extratorSRT.cpp <Error>: BufferedReader não inicializado.");  
109 - throw ExtratorException("O BufferedReader não foi inicializado."); 105 + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}");
  106 +
  107 + unsigned int section_id;
  108 + uint64_t section_time_in;
  109 + uint64_t section_time_out;
  110 + string section_text;
  111 +
  112 + int state = 0;
  113 + int line_num = 0;
  114 + string current_line;
  115 +
  116 + while (ifs_.good()) {
  117 +
  118 + getline(ifs_, current_line, '\n');
  119 + line_num++;
  120 +
  121 + switch (state) {
  122 + case 0: // first state, search id
  123 + if(current_line.empty())
  124 + break; // ignore blank lines
  125 +
  126 + char *p_end;
  127 + section_id = strtol(current_line.c_str(), &p_end, 10);
  128 + if(section_id) {
  129 + state = 1; // state 1, find timestamp
110 } 130 }
111 131
112 - Subtitle* sub = new Subtitle();  
113 - string line = "";  
114 - string text_sub = "";  
115 -  
116 - try {  
117 - // ID  
118 - int id_sub = -1;  
119 -  
120 - // Case exists some blank lines before index, read until find  
121 - do {  
122 - line = bff_reader->readLine();  
123 - printf("%s\n", line.c_str());  
124 - seek_pos++;  
125 - } while (line.size() < 1);  
126 -  
127 - //seek_pos += (int64_t) line.size() + SIZE_CSCAPE;  
128 - id_sub = atoi(line.c_str());  
129 - if (id_sub != index_counter)  
130 - {  
131 - PRINTL(util::_DEBUG, "[Error] The SRT file is bad formmated: indexes is not continuous.\n");  
132 - exit (1);  
133 - }  
134 - sub->setID(id_sub);  
135 -  
136 - // TimeIn and TimeOut  
137 - int64_t t_in = 0, t_out = 0;  
138 - line = bff_reader->readLine();  
139 - seek_pos += (int64_t) line.size() + SIZE_CSCAPE;  
140 -  
141 - int target_pos = line.find(TARGET_TIME);  
142 - t_in = str_to_time(line.substr(0, target_pos));  
143 - sub->setTimeIn(t_in);  
144 - t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size()));  
145 - sub->setTimeOut(t_out);  
146 -  
147 - // Read until find an empty line  
148 - while ((line = bff_reader->readLine()).size() > 0) {  
149 - text_sub += line;  
150 - text_sub.append(" ");  
151 - }  
152 - printf("size of text: %d\n", text_sub.size());  
153 - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE;  
154 -  
155 - } catch (lavidlib::EOFException &ex) {  
156 -  
157 - if(text_sub == "")  
158 - sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório  
159 -  
160 - sub->setSubtitleText(formatText(text_sub));  
161 - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE;  
162 -  
163 - hasNextSub = false;  
164 - delete(bff_reader);  
165 - return sub; 132 + break;
  133 +
  134 + case 1: // want timestamp
  135 + bool match;
  136 + match = regex_match(current_line, timestamp_regex);
  137 +
  138 + if(match) {
  139 + split_timestamp(current_line, &section_time_in, &section_time_out);
  140 + state = 2; // state 2, find text
  141 + break;
166 } 142 }
167 - index_counter++;  
168 143
169 - sub->setSubtitleText(formatText(text_sub));  
170 -  
171 - printf("%s\n\n", sub->toString().c_str()); 144 + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num );
172 145
173 - delete(bff_reader);  
174 - return sub;  
175 - } 146 + case 2: // Reading subtitle, first line of text
  147 + if(current_line.empty()) { // first line is empty (blank subtitle)
  148 + state = 4; // state 4 find next subtitle before empty subtitle
  149 + break;
  150 + }
  151 +
  152 + section_text = current_line;
  153 + state = 3; // state 3 find second line of text or end of subtitle
  154 + break;
  155 +
  156 + case 3: // create new subtitle object
  157 + if(current_line.empty()){
  158 + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out);
  159 + return sub;
  160 + }
  161 +
  162 + section_text += current_line;
  163 + break;
  164 +
  165 + case 4:
  166 + if(current_line.empty()) {
  167 + state = 0;
  168 + break; // ignore blank lines
  169 + }
  170 +
  171 + char *pend;
  172 + section_id = strtol(current_line.c_str(), &p_end, 10);
  173 + if(section_id) {
  174 + state = 1; // find timestamp
  175 + break;
  176 + }
  177 +
  178 + default:
  179 + throw ExtratorException("SRT parser: Cannot parse file");
  180 +
  181 + } // switch
  182 + }
  183 +
  184 + throw ExtratorException("EOF");
  185 +
  186 +}
176 187
177 string ExtratorSRT::formatText(string line){ 188 string ExtratorSRT::formatText(string line){
178 int lessThanPos; 189 int lessThanPos;
@@ -213,7 +224,14 @@ int64_t ExtratorSRT::str_to_time(string str_time) { @@ -213,7 +224,14 @@ int64_t ExtratorSRT::str_to_time(string str_time) {
213 224
214 return ttime; 225 return ttime;
215 226
216 - } 227 +}
  228 +
  229 +void ExtratorSRT::split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out) {
  230 +
  231 + std::size_t pos = timestamp.find(" --> ");
  232 + *time_in = str_to_time(timestamp.substr(0, pos)); // left -->
  233 + *time_out = str_to_time(timestamp.substr(pos + 5)); // rigth -->
  234 +}
217 235
218 uint64_t ExtratorSRT::calcula_pts(double msec) { 236 uint64_t ExtratorSRT::calcula_pts(double msec) {
219 return (uint64_t)msec; 237 return (uint64_t)msec;
extrator/src/include/extrator.h
@@ -8,6 +8,8 @@ @@ -8,6 +8,8 @@
8 #ifndef EXTRATOR_H 8 #ifndef EXTRATOR_H
9 #define EXTRATOR_H 9 #define EXTRATOR_H
10 10
  11 +
  12 +#include <regex>
11 #include "logging.h" 13 #include "logging.h"
12 #include <lavidlib/io/File.h> 14 #include <lavidlib/io/File.h>
13 #include <lavidlib/io/FileIO.h> 15 #include <lavidlib/io/FileIO.h>
extrator/src/include/extratorSRT.h
@@ -19,6 +19,10 @@ @@ -19,6 +19,10 @@
19 #include "listenerSub.h" 19 #include "listenerSub.h"
20 #include "extratorException.h" 20 #include "extratorException.h"
21 21
  22 +#include <algorithm>
  23 +#include <fstream>
  24 +#include <sstream>
  25 +
22 #define SIZE_CSCAPE 1 26 #define SIZE_CSCAPE 1
23 #define TARGET_TIME "-->" 27 #define TARGET_TIME "-->"
24 #define LESS_THAN "<" 28 #define LESS_THAN "<"
@@ -101,6 +105,8 @@ public: @@ -101,6 +105,8 @@ public:
101 private: 105 private:
102 list<ListenerSub*> *listeners; 106 list<ListenerSub*> *listeners;
103 107
  108 + ifstream ifs_;
  109 +
104 Subtitle *subtitle; 110 Subtitle *subtitle;
105 int64_t seek_pos; 111 int64_t seek_pos;
106 bool hasNextSub; 112 bool hasNextSub;
@@ -109,7 +115,8 @@ private: @@ -109,7 +115,8 @@ private:
109 void encodingfiletoUTF8(); 115 void encodingfiletoUTF8();
110 string formatText(string line); 116 string formatText(string line);
111 uint64_t calcula_pts(double msec); 117 uint64_t calcula_pts(double msec);
112 - int64_t str_to_time(std::string str_time); 118 + int64_t str_to_time(string str_time);
  119 + void split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out);
113 }; 120 };
114 121
115 #endif /* EXTRATORSRT_H */ 122 #endif /* EXTRATORSRT_H */
util/src/include/logging.h
@@ -65,16 +65,16 @@ namespace util { @@ -65,16 +65,16 @@ namespace util {
65 if(level <= llevel){ \ 65 if(level <= llevel){ \
66 switch(level){ \ 66 switch(level){ \
67 case _DEBUG: \ 67 case _DEBUG: \
68 - fprintf(stdout, _DEBUG_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ 68 + fprintf(stdout, _DEBUG_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
69 break; \ 69 break; \
70 case _INFO: \ 70 case _INFO: \
71 - fprintf(stdout, _INFO_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ 71 + fprintf(stdout, _INFO_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
72 break; \ 72 break; \
73 case _WARNING: \ 73 case _WARNING: \
74 - fprintf(stdout, _WARN_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ 74 + fprintf(stdout, _WARN_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
75 break; \ 75 break; \
76 case _ERROR: \ 76 case _ERROR: \
77 - fprintf(stderr, _ERROR_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ 77 + fprintf(stderr, _ERROR_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
78 break; \ 78 break; \
79 } \ 79 } \
80 } \ 80 } \