Commit bac4339914db837e1c6089c6ed03acaa6f54c00a

Authored by Wesnydy Ribeiro
1 parent 0918b85b
Exists in devel

Byte Order Mark verification

Showing 1 changed file with 84 additions and 81 deletions   Show diff stats
extrator/src/extratorSRT.cpp
... ... @@ -102,87 +102,90 @@ void ExtratorSRT::Run(){
102 102  
103 103 Subtitle* ExtratorSRT::next() {
104 104  
105   - static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}");
106   -
107   - unsigned int section_id;
108   - uint64_t section_time_in;
109   - uint64_t section_time_out;
110   - string section_text;
111   -
112   - int state = 0;
113   - int line_num = 0;
114   - string current_line;
115   -
116   - while (ifs_.good()) {
117   -
118   - getline(ifs_, current_line, '\n');
119   - line_num++;
120   -
121   - switch (state) {
122   - case 0: // first state, search id
123   - if(current_line.empty())
124   - break; // ignore blank lines
125   -
126   - char *p_end;
127   - section_id = strtol(current_line.c_str(), &p_end, 10);
128   - if(section_id) {
129   - state = 1; // state 1, find timestamp
130   - }
131   -
132   - break;
133   -
134   - case 1: // want timestamp
135   - bool match;
136   - match = regex_match(current_line, timestamp_regex);
137   -
138   - if(match) {
139   - split_timestamp(current_line, &section_time_in, &section_time_out);
140   - state = 2; // state 2, find text
141   - break;
142   - }
143   -
144   - throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num );
145   -
146   - case 2: // Reading subtitle, first line of text
147   - if(current_line.empty()) { // first line is empty (blank subtitle)
148   - state = 4; // state 4 find next subtitle before empty subtitle
149   - break;
150   - }
151   -
152   - section_text = current_line;
153   - state = 3; // state 3 find second line of text or end of subtitle
154   - break;
155   -
156   - case 3: // create new subtitle object
157   - if(current_line.empty()){
158   - Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out);
159   - return sub;
160   - }
161   -
162   - section_text += current_line;
163   - break;
164   -
165   - case 4:
166   - if(current_line.empty()) {
167   - state = 0;
168   - break; // ignore blank lines
169   - }
170   -
171   - char *pend;
172   - section_id = strtol(current_line.c_str(), &p_end, 10);
173   - if(section_id) {
174   - state = 1; // find timestamp
175   - break;
176   - }
177   -
178   - default:
179   - throw ExtratorException("SRT parser: Cannot parse file");
180   -
181   - } // switch
182   - }
183   -
184   - throw ExtratorException("EOF");
185   -
  105 + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}");
  106 +
  107 + unsigned int section_id;
  108 + uint64_t section_time_in;
  109 + uint64_t section_time_out;
  110 + string section_text;
  111 +
  112 + int state = 0;
  113 + int line_num = 0;
  114 + string current_line;
  115 +
  116 + while (ifs_.good()) {
  117 +
  118 + getline(ifs_, current_line, '\n');
  119 + line_num++;
  120 +
  121 + switch (state) {
  122 + case 0: // first state, search id
  123 + if(current_line.empty())
  124 + break; // ignore blank lines
  125 +
  126 + char *p_end;
  127 + section_id = strtol(current_line.c_str(), &p_end, 10);
  128 + if(section_id || current_line.compare(0, 3, "\xEF\xBB\xBF") == 0) {
  129 + state = 1; // state 1, find timestamp
  130 + }
  131 +
  132 + break;
  133 +
  134 + case 1: // want timestamp
  135 + bool match;
  136 + match = regex_match(current_line, timestamp_regex);
  137 +
  138 + if(match) {
  139 + split_timestamp(current_line, &section_time_in, &section_time_out);
  140 + state = 2; // state 2, find text
  141 + break;
  142 + }
  143 +
  144 + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num );
  145 +
  146 + case 2: // Reading subtitle, first line of text
  147 + if(current_line.empty()) { // first line is empty (blank subtitle)
  148 + state = 4; // state 4 find next subtitle before empty subtitle
  149 + break;
  150 + }
  151 +
  152 + section_text = current_line;
  153 + state = 3; // state 3 find second line of text or end of subtitle
  154 + break;
  155 +
  156 + case 3: // create new subtitle object
  157 + if(current_line.empty()){
  158 + // std::clog << "[ID] " << section_id << std::endl;
  159 + // std::clog << "[IN] " << section_time_in << std::endl;
  160 + // std::clog << "[OUT] " << section_time_out << std::endl;
  161 + // std::clog << "[TEXT] " << section_text << "\n" <<std::endl;
  162 + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out);
  163 + return sub;
  164 + }
  165 +
  166 + section_text += current_line;
  167 + break;
  168 +
  169 + case 4:
  170 + if(current_line.empty()) {
  171 + state = 0;
  172 + break; // ignore blank lines
  173 + }
  174 +
  175 + char *pend;
  176 + section_id = strtol(current_line.c_str(), &p_end, 10);
  177 + if(section_id) {
  178 + state = 1; // find timestamp
  179 + break;
  180 + }
  181 +
  182 + default:
  183 + throw ExtratorException("SRT parser: Cannot parse file");
  184 +
  185 + } // switch
  186 + } // while
  187 +
  188 + throw ExtratorException("EOF");
186 189 }
187 190  
188 191 string ExtratorSRT::formatText(string line){
... ...