Commit 0918b85b15dbfcd6512926e97644230d0e6a1366

Authored by Wesnydy Ribeiro
1 parent f616fdfd
Exists in devel

ExtratorSRT corrections

extrator/src/extratorSRT.cpp
1 1 #include "extratorSRT.h"
  2 +#include <iostream>
2 3  
3 4 ExtratorSRT::ExtratorSRT(){
4 5 listeners = new list<ListenerSub*>();
... ... @@ -43,9 +44,10 @@ void ExtratorSRT::encodingfiletoUTF8() {
43 44 .append(this->filePath);
44 45 system(recmd.c_str());
45 46  
46   - string sedcmd = "sed -i 's/\r$//;$d' ";
  47 + string sedcmd = "sed -i 's/\r$//' ";
47 48 sedcmd.append(this->filePath);
48 49 system(sedcmd.c_str());
  50 +
49 51  
50 52 }
51 53  
... ... @@ -56,16 +58,15 @@ void ExtratorSRT::setFilePath(char* path) {
56 58  
57 59 void ExtratorSRT::initialize(){
58 60  
59   - file = new lavidlib::File(this->filePath);
  61 + ifs_.open(this->filePath, std::ifstream::in);
60 62  
61   - try{
62   - file_io = new lavidlib::FileIO(file->getPath(), FileIO::MODE_READ);
63   - }catch(Exception &ex){
64   - finish = true;
65   - Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado.");
66   - throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n");
67   - }
68   - this->Start();
  63 + if(ifs_.is_open() && ifs_.good()){
  64 + this->Start();
  65 + } else {
  66 + finish = true;
  67 + Logging::instance()->writeLog("extratorSRT.cpp <Error>: Arquivo de legenda não encontrado.");
  68 + throw ExtratorException("Falha ao abrir o arquivo de legenda! Verifique se o mesmo existe.\n");
  69 + }
69 70  
70 71 }
71 72  
... ... @@ -81,7 +82,7 @@ void ExtratorSRT::Run(){
81 82 PRINTL(util::_INFO, "Extraindo Legendas...\n");
82 83 int sub_index = 0;
83 84 string sub_text = "";
84   - while(hasNextSubtitle()){
  85 + while(hasNextSub){
85 86 try{
86 87 subtitle = next();
87 88 }catch(ExtratorException ex){
... ... @@ -101,78 +102,88 @@ void ExtratorSRT::Run(){
101 102  
102 103 Subtitle* ExtratorSRT::next() {
103 104  
104   - file_io->seek(seek_pos);
105   - try{
106   - bff_reader = new BufferedReader(file_io);
107   - }catch(Exception &ex){
108   - Logging::instance()->writeLog("extratorSRT.cpp <Error>: BufferedReader não inicializado.");
109   - throw ExtratorException("O BufferedReader não foi inicializado.");
  105 + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}");
  106 +
  107 + unsigned int section_id;
  108 + uint64_t section_time_in;
  109 + uint64_t section_time_out;
  110 + string section_text;
  111 +
  112 + int state = 0;
  113 + int line_num = 0;
  114 + string current_line;
  115 +
  116 + while (ifs_.good()) {
  117 +
  118 + getline(ifs_, current_line, '\n');
  119 + line_num++;
  120 +
  121 + switch (state) {
  122 + case 0: // first state, search id
  123 + if(current_line.empty())
  124 + break; // ignore blank lines
  125 +
  126 + char *p_end;
  127 + section_id = strtol(current_line.c_str(), &p_end, 10);
  128 + if(section_id) {
  129 + state = 1; // state 1, find timestamp
110 130 }
111 131  
112   - Subtitle* sub = new Subtitle();
113   - string line = "";
114   - string text_sub = "";
115   -
116   - try {
117   - // ID
118   - int id_sub = -1;
119   -
120   - // Case exists some blank lines before index, read until find
121   - do {
122   - line = bff_reader->readLine();
123   - printf("%s\n", line.c_str());
124   - seek_pos++;
125   - } while (line.size() < 1);
126   -
127   - //seek_pos += (int64_t) line.size() + SIZE_CSCAPE;
128   - id_sub = atoi(line.c_str());
129   - if (id_sub != index_counter)
130   - {
131   - PRINTL(util::_DEBUG, "[Error] The SRT file is bad formmated: indexes is not continuous.\n");
132   - exit (1);
133   - }
134   - sub->setID(id_sub);
135   -
136   - // TimeIn and TimeOut
137   - int64_t t_in = 0, t_out = 0;
138   - line = bff_reader->readLine();
139   - seek_pos += (int64_t) line.size() + SIZE_CSCAPE;
140   -
141   - int target_pos = line.find(TARGET_TIME);
142   - t_in = str_to_time(line.substr(0, target_pos));
143   - sub->setTimeIn(t_in);
144   - t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size()));
145   - sub->setTimeOut(t_out);
146   -
147   - // Read until find an empty line
148   - while ((line = bff_reader->readLine()).size() > 0) {
149   - text_sub += line;
150   - text_sub.append(" ");
151   - }
152   - printf("size of text: %d\n", text_sub.size());
153   - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE;
154   -
155   - } catch (lavidlib::EOFException &ex) {
156   -
157   - if(text_sub == "")
158   - sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório
159   -
160   - sub->setSubtitleText(formatText(text_sub));
161   - seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE;
162   -
163   - hasNextSub = false;
164   - delete(bff_reader);
165   - return sub;
  132 + break;
  133 +
  134 + case 1: // want timestamp
  135 + bool match;
  136 + match = regex_match(current_line, timestamp_regex);
  137 +
  138 + if(match) {
  139 + split_timestamp(current_line, &section_time_in, &section_time_out);
  140 + state = 2; // state 2, find text
  141 + break;
166 142 }
167   - index_counter++;
168 143  
169   - sub->setSubtitleText(formatText(text_sub));
170   -
171   - printf("%s\n\n", sub->toString().c_str());
  144 + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num );
172 145  
173   - delete(bff_reader);
174   - return sub;
175   - }
  146 + case 2: // Reading subtitle, first line of text
  147 + if(current_line.empty()) { // first line is empty (blank subtitle)
  148 + state = 4; // state 4 find next subtitle before empty subtitle
  149 + break;
  150 + }
  151 +
  152 + section_text = current_line;
  153 + state = 3; // state 3 find second line of text or end of subtitle
  154 + break;
  155 +
  156 + case 3: // create new subtitle object
  157 + if(current_line.empty()){
  158 + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out);
  159 + return sub;
  160 + }
  161 +
  162 + section_text += current_line;
  163 + break;
  164 +
  165 + case 4:
  166 + if(current_line.empty()) {
  167 + state = 0;
  168 + break; // ignore blank lines
  169 + }
  170 +
  171 + char *pend;
  172 + section_id = strtol(current_line.c_str(), &p_end, 10);
  173 + if(section_id) {
  174 + state = 1; // find timestamp
  175 + break;
  176 + }
  177 +
  178 + default:
  179 + throw ExtratorException("SRT parser: Cannot parse file");
  180 +
  181 + } // switch
  182 + }
  183 +
  184 + throw ExtratorException("EOF");
  185 +
  186 +}
176 187  
177 188 string ExtratorSRT::formatText(string line){
178 189 int lessThanPos;
... ... @@ -213,7 +224,14 @@ int64_t ExtratorSRT::str_to_time(string str_time) {
213 224  
214 225 return ttime;
215 226  
216   - }
  227 +}
  228 +
  229 +void ExtratorSRT::split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out) {
  230 +
  231 + std::size_t pos = timestamp.find(" --> ");
  232 + *time_in = str_to_time(timestamp.substr(0, pos)); // left -->
  233 + *time_out = str_to_time(timestamp.substr(pos + 5)); // rigth -->
  234 +}
217 235  
218 236 uint64_t ExtratorSRT::calcula_pts(double msec) {
219 237 return (uint64_t)msec;
... ...
extrator/src/include/extrator.h
... ... @@ -8,6 +8,8 @@
8 8 #ifndef EXTRATOR_H
9 9 #define EXTRATOR_H
10 10  
  11 +
  12 +#include <regex>
11 13 #include "logging.h"
12 14 #include <lavidlib/io/File.h>
13 15 #include <lavidlib/io/FileIO.h>
... ...
extrator/src/include/extratorSRT.h
... ... @@ -19,6 +19,10 @@
19 19 #include "listenerSub.h"
20 20 #include "extratorException.h"
21 21  
  22 +#include <algorithm>
  23 +#include <fstream>
  24 +#include <sstream>
  25 +
22 26 #define SIZE_CSCAPE 1
23 27 #define TARGET_TIME "-->"
24 28 #define LESS_THAN "<"
... ... @@ -101,6 +105,8 @@ public:
101 105 private:
102 106 list<ListenerSub*> *listeners;
103 107  
  108 + ifstream ifs_;
  109 +
104 110 Subtitle *subtitle;
105 111 int64_t seek_pos;
106 112 bool hasNextSub;
... ... @@ -109,7 +115,8 @@ private:
109 115 void encodingfiletoUTF8();
110 116 string formatText(string line);
111 117 uint64_t calcula_pts(double msec);
112   - int64_t str_to_time(std::string str_time);
  118 + int64_t str_to_time(string str_time);
  119 + void split_timestamp(std::string timestamp, uint64_t *time_in, uint64_t *time_out);
113 120 };
114 121  
115 122 #endif /* EXTRATORSRT_H */
... ...
util/src/include/logging.h
... ... @@ -65,16 +65,16 @@ namespace util {
65 65 if(level <= llevel){ \
66 66 switch(level){ \
67 67 case _DEBUG: \
68   - fprintf(stdout, _DEBUG_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
  68 + fprintf(stdout, _DEBUG_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
69 69 break; \
70 70 case _INFO: \
71   - fprintf(stdout, _INFO_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
  71 + fprintf(stdout, _INFO_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
72 72 break; \
73 73 case _WARNING: \
74   - fprintf(stdout, _WARN_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
  74 + fprintf(stdout, _WARN_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
75 75 break; \
76 76 case _ERROR: \
77   - fprintf(stderr, _ERROR_"%s::%s<%d>: "_END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
  77 + fprintf(stderr, _ERROR_ "%s::%s<%d>: " _END_ format, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
78 78 break; \
79 79 } \
80 80 } \
... ...