Commit 315c2c3793987752fd0713d17764ca49aa3275c5

Authored by Leonardo Domingues
1 parent e9b85bfd
Exists in devel

Fix the problem to read empty lines in the field text from SRT subtitle

extrator/src/extratorSRT.cpp
@@ -4,11 +4,15 @@ ExtratorSRT::ExtratorSRT(){ @@ -4,11 +4,15 @@ ExtratorSRT::ExtratorSRT(){
4 listeners = new list<ListenerSub*>(); 4 listeners = new list<ListenerSub*>();
5 finish = false; 5 finish = false;
6 seek_pos = 0; 6 seek_pos = 0;
  7 +
  8 + // Used to watch a sequence of the subtitles during the extraction
  9 + index_counter = 1;
  10 +
7 hasNextSub = true; 11 hasNextSub = true;
8 PRINTL(util::_DEBUG, "ExtratorSTR Done!\n"); 12 PRINTL(util::_DEBUG, "ExtratorSTR Done!\n");
9 } 13 }
10 14
11 -ExtratorSRT::~ExtratorSRT(){ 15 +ExtratorSRT::~ExtratorSRT() {
12 listeners->clear(); 16 listeners->clear();
13 delete listeners; 17 delete listeners;
14 if (file_io) delete file_io; 18 if (file_io) delete file_io;
@@ -110,14 +114,26 @@ Subtitle* ExtratorSRT::next() { @@ -110,14 +114,26 @@ Subtitle* ExtratorSRT::next() {
110 string text_sub = ""; 114 string text_sub = "";
111 115
112 try { 116 try {
113 - /* ID */  
114 - int id = 0;  
115 - line = bff_reader->readLine();  
116 - seek_pos += (int64_t) line.size() + SIZE_CSCAPE;  
117 - id = atoi(line.c_str());  
118 - sub->setID(id); 117 + // ID
  118 + int id_sub = -1;
  119 +
  120 + // Case exists some blank lines before index, read until find
  121 + do {
  122 + line = bff_reader->readLine();
  123 + printf("%s\n", line.c_str());
  124 + seek_pos++;
  125 + } while (line.size() < 1);
  126 +
  127 + //seek_pos += (int64_t) line.size() + SIZE_CSCAPE;
  128 + id_sub = atoi(line.c_str());
  129 + if (id_sub != index_counter)
  130 + {
  131 + PRINTL(util::_DEBUG, "[Error] The SRT file is bad formmated: indexes is not continuous.\n");
  132 + exit (1);
  133 + }
  134 + sub->setID(id_sub);
119 135
120 - /* TimeIn and TimeOut */ 136 + // TimeIn and TimeOut
121 int64_t t_in = 0, t_out = 0; 137 int64_t t_in = 0, t_out = 0;
122 line = bff_reader->readLine(); 138 line = bff_reader->readLine();
123 seek_pos += (int64_t) line.size() + SIZE_CSCAPE; 139 seek_pos += (int64_t) line.size() + SIZE_CSCAPE;
@@ -128,24 +144,32 @@ Subtitle* ExtratorSRT::next() { @@ -128,24 +144,32 @@ Subtitle* ExtratorSRT::next() {
128 t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size())); 144 t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size()));
129 sub->setTimeOut(t_out); 145 sub->setTimeOut(t_out);
130 146
131 - /* Text: read until line be empty */ 147 + // Read until find an empty line
132 while ((line = bff_reader->readLine()).size() > 0) { 148 while ((line = bff_reader->readLine()).size() > 0) {
133 text_sub += line; 149 text_sub += line;
134 text_sub.append(" "); 150 text_sub.append(" ");
135 } 151 }
  152 + printf("size of text: %d\n", text_sub.size());
136 seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; 153 seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE;
137 154
138 } catch (lavidlib::EOFException &ex) { 155 } catch (lavidlib::EOFException &ex) {
  156 +
139 if(text_sub == "") 157 if(text_sub == "")
140 sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório 158 sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório
141 159
142 sub->setSubtitleText(formatText(text_sub)); 160 sub->setSubtitleText(formatText(text_sub));
143 seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; 161 seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE;
  162 +
144 hasNextSub = false; 163 hasNextSub = false;
145 delete(bff_reader); 164 delete(bff_reader);
146 return sub; 165 return sub;
147 } 166 }
  167 + index_counter++;
  168 +
148 sub->setSubtitleText(formatText(text_sub)); 169 sub->setSubtitleText(formatText(text_sub));
  170 +
  171 + printf("%s\n\n", sub->toString().c_str());
  172 +
149 delete(bff_reader); 173 delete(bff_reader);
150 return sub; 174 return sub;
151 } 175 }
extrator/src/include/extratorSRT.h
@@ -104,6 +104,7 @@ private: @@ -104,6 +104,7 @@ private:
104 Subtitle *subtitle; 104 Subtitle *subtitle;
105 int64_t seek_pos; 105 int64_t seek_pos;
106 bool hasNextSub; 106 bool hasNextSub;
  107 + int index_counter;
107 108
108 void encodingfiletoUTF8(); 109 void encodingfiletoUTF8();
109 string formatText(string line); 110 string formatText(string line);
recognize/src/recognizer.cpp
@@ -2,10 +2,10 @@ @@ -2,10 +2,10 @@
2 2
3 3
4 #include "recognizer.h" 4 #include "recognizer.h"
5 -#include <json/json.h> 5 +#include <jsoncpp/json/json.h>
6 6
7 7
8 -static boolean reconhecendo ; 8 +static boolean reconhecendo;
9 static string sentence; 9 static string sentence;
10 static float confidence; 10 static float confidence;
11 string lenguage = "pt-BR"; 11 string lenguage = "pt-BR";