From 315c2c3793987752fd0713d17764ca49aa3275c5 Mon Sep 17 00:00:00 2001 From: Leonardo Domingues Date: Thu, 12 May 2016 15:52:57 -0300 Subject: [PATCH] Fix the problem to read empty lines in the field text from SRT subtitle --- extrator/src/extratorSRT.cpp | 42 +++++++++++++++++++++++++++++++++--------- extrator/src/include/extratorSRT.h | 1 + recognize/src/recognizer.cpp | 4 ++-- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/extrator/src/extratorSRT.cpp b/extrator/src/extratorSRT.cpp index bcde839..4fa45a4 100644 --- a/extrator/src/extratorSRT.cpp +++ b/extrator/src/extratorSRT.cpp @@ -4,11 +4,15 @@ ExtratorSRT::ExtratorSRT(){ listeners = new list(); finish = false; seek_pos = 0; + + // Used to watch a sequence of the subtitles during the extraction + index_counter = 1; + hasNextSub = true; PRINTL(util::_DEBUG, "ExtratorSTR Done!\n"); } -ExtratorSRT::~ExtratorSRT(){ +ExtratorSRT::~ExtratorSRT() { listeners->clear(); delete listeners; if (file_io) delete file_io; @@ -110,14 +114,26 @@ Subtitle* ExtratorSRT::next() { string text_sub = ""; try { - /* ID */ - int id = 0; - line = bff_reader->readLine(); - seek_pos += (int64_t) line.size() + SIZE_CSCAPE; - id = atoi(line.c_str()); - sub->setID(id); + // ID + int id_sub = -1; + + // Case exists some blank lines before index, read until find + do { + line = bff_reader->readLine(); + printf("%s\n", line.c_str()); + seek_pos++; + } while (line.size() < 1); + + //seek_pos += (int64_t) line.size() + SIZE_CSCAPE; + id_sub = atoi(line.c_str()); + if (id_sub != index_counter) + { + PRINTL(util::_DEBUG, "[Error] The SRT file is bad formmated: indexes is not continuous.\n"); + exit (1); + } + sub->setID(id_sub); - /* TimeIn and TimeOut */ + // TimeIn and TimeOut int64_t t_in = 0, t_out = 0; line = bff_reader->readLine(); seek_pos += (int64_t) line.size() + SIZE_CSCAPE; @@ -128,24 +144,32 @@ Subtitle* ExtratorSRT::next() { t_out = str_to_time(line.substr(target_pos + strlen(TARGET_TIME)+1, line.size())); sub->setTimeOut(t_out); - /* Text: read until line be empty */ + // Read until find an empty line while ((line = bff_reader->readLine()).size() > 0) { text_sub += line; text_sub.append(" "); } + printf("size of text: %d\n", text_sub.size()); seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; } catch (lavidlib::EOFException &ex) { + if(text_sub == "") sub->setTimeIn(0); //seta o valor 0 para nao gerar um valor aleatório sub->setSubtitleText(formatText(text_sub)); seek_pos += (int64_t) text_sub.size() + SIZE_CSCAPE; + hasNextSub = false; delete(bff_reader); return sub; } + index_counter++; + sub->setSubtitleText(formatText(text_sub)); + + printf("%s\n\n", sub->toString().c_str()); + delete(bff_reader); return sub; } diff --git a/extrator/src/include/extratorSRT.h b/extrator/src/include/extratorSRT.h index ecd8c65..742e5e7 100644 --- a/extrator/src/include/extratorSRT.h +++ b/extrator/src/include/extratorSRT.h @@ -104,6 +104,7 @@ private: Subtitle *subtitle; int64_t seek_pos; bool hasNextSub; + int index_counter; void encodingfiletoUTF8(); string formatText(string line); diff --git a/recognize/src/recognizer.cpp b/recognize/src/recognizer.cpp index 74e58af..79d2879 100644 --- a/recognize/src/recognizer.cpp +++ b/recognize/src/recognizer.cpp @@ -2,10 +2,10 @@ #include "recognizer.h" -#include +#include -static boolean reconhecendo ; +static boolean reconhecendo; static string sentence; static float confidence; string lenguage = "pt-BR"; -- libgit2 0.21.2