Commit bac4339914db837e1c6089c6ed03acaa6f54c00a
1 parent
0918b85b
Exists in
devel
Byte Order Mark verification
Showing
1 changed file
with
84 additions
and
81 deletions
Show diff stats
extrator/src/extratorSRT.cpp
... | ... | @@ -102,87 +102,90 @@ void ExtratorSRT::Run(){ |
102 | 102 | |
103 | 103 | Subtitle* ExtratorSRT::next() { |
104 | 104 | |
105 | - static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); | |
106 | - | |
107 | - unsigned int section_id; | |
108 | - uint64_t section_time_in; | |
109 | - uint64_t section_time_out; | |
110 | - string section_text; | |
111 | - | |
112 | - int state = 0; | |
113 | - int line_num = 0; | |
114 | - string current_line; | |
115 | - | |
116 | - while (ifs_.good()) { | |
117 | - | |
118 | - getline(ifs_, current_line, '\n'); | |
119 | - line_num++; | |
120 | - | |
121 | - switch (state) { | |
122 | - case 0: // first state, search id | |
123 | - if(current_line.empty()) | |
124 | - break; // ignore blank lines | |
125 | - | |
126 | - char *p_end; | |
127 | - section_id = strtol(current_line.c_str(), &p_end, 10); | |
128 | - if(section_id) { | |
129 | - state = 1; // state 1, find timestamp | |
130 | - } | |
131 | - | |
132 | - break; | |
133 | - | |
134 | - case 1: // want timestamp | |
135 | - bool match; | |
136 | - match = regex_match(current_line, timestamp_regex); | |
137 | - | |
138 | - if(match) { | |
139 | - split_timestamp(current_line, §ion_time_in, §ion_time_out); | |
140 | - state = 2; // state 2, find text | |
141 | - break; | |
142 | - } | |
143 | - | |
144 | - throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); | |
145 | - | |
146 | - case 2: // Reading subtitle, first line of text | |
147 | - if(current_line.empty()) { // first line is empty (blank subtitle) | |
148 | - state = 4; // state 4 find next subtitle before empty subtitle | |
149 | - break; | |
150 | - } | |
151 | - | |
152 | - section_text = current_line; | |
153 | - state = 3; // state 3 find second line of text or end of subtitle | |
154 | - break; | |
155 | - | |
156 | - case 3: // create new subtitle object | |
157 | - if(current_line.empty()){ | |
158 | - Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | |
159 | - return sub; | |
160 | - } | |
161 | - | |
162 | - section_text += current_line; | |
163 | - break; | |
164 | - | |
165 | - case 4: | |
166 | - if(current_line.empty()) { | |
167 | - state = 0; | |
168 | - break; // ignore blank lines | |
169 | - } | |
170 | - | |
171 | - char *pend; | |
172 | - section_id = strtol(current_line.c_str(), &p_end, 10); | |
173 | - if(section_id) { | |
174 | - state = 1; // find timestamp | |
175 | - break; | |
176 | - } | |
177 | - | |
178 | - default: | |
179 | - throw ExtratorException("SRT parser: Cannot parse file"); | |
180 | - | |
181 | - } // switch | |
182 | - } | |
183 | - | |
184 | - throw ExtratorException("EOF"); | |
185 | - | |
105 | + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); | |
106 | + | |
107 | + unsigned int section_id; | |
108 | + uint64_t section_time_in; | |
109 | + uint64_t section_time_out; | |
110 | + string section_text; | |
111 | + | |
112 | + int state = 0; | |
113 | + int line_num = 0; | |
114 | + string current_line; | |
115 | + | |
116 | + while (ifs_.good()) { | |
117 | + | |
118 | + getline(ifs_, current_line, '\n'); | |
119 | + line_num++; | |
120 | + | |
121 | + switch (state) { | |
122 | + case 0: // first state, search id | |
123 | + if(current_line.empty()) | |
124 | + break; // ignore blank lines | |
125 | + | |
126 | + char *p_end; | |
127 | + section_id = strtol(current_line.c_str(), &p_end, 10); | |
128 | + if(section_id || current_line.compare(0, 3, "\xEF\xBB\xBF") == 0) { | |
129 | + state = 1; // state 1, find timestamp | |
130 | + } | |
131 | + | |
132 | + break; | |
133 | + | |
134 | + case 1: // want timestamp | |
135 | + bool match; | |
136 | + match = regex_match(current_line, timestamp_regex); | |
137 | + | |
138 | + if(match) { | |
139 | + split_timestamp(current_line, §ion_time_in, §ion_time_out); | |
140 | + state = 2; // state 2, find text | |
141 | + break; | |
142 | + } | |
143 | + | |
144 | + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); | |
145 | + | |
146 | + case 2: // Reading subtitle, first line of text | |
147 | + if(current_line.empty()) { // first line is empty (blank subtitle) | |
148 | + state = 4; // state 4 find next subtitle before empty subtitle | |
149 | + break; | |
150 | + } | |
151 | + | |
152 | + section_text = current_line; | |
153 | + state = 3; // state 3 find second line of text or end of subtitle | |
154 | + break; | |
155 | + | |
156 | + case 3: // create new subtitle object | |
157 | + if(current_line.empty()){ | |
158 | + // std::clog << "[ID] " << section_id << std::endl; | |
159 | + // std::clog << "[IN] " << section_time_in << std::endl; | |
160 | + // std::clog << "[OUT] " << section_time_out << std::endl; | |
161 | + // std::clog << "[TEXT] " << section_text << "\n" <<std::endl; | |
162 | + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | |
163 | + return sub; | |
164 | + } | |
165 | + | |
166 | + section_text += current_line; | |
167 | + break; | |
168 | + | |
169 | + case 4: | |
170 | + if(current_line.empty()) { | |
171 | + state = 0; | |
172 | + break; // ignore blank lines | |
173 | + } | |
174 | + | |
175 | + char *pend; | |
176 | + section_id = strtol(current_line.c_str(), &p_end, 10); | |
177 | + if(section_id) { | |
178 | + state = 1; // find timestamp | |
179 | + break; | |
180 | + } | |
181 | + | |
182 | + default: | |
183 | + throw ExtratorException("SRT parser: Cannot parse file"); | |
184 | + | |
185 | + } // switch | |
186 | + } // while | |
187 | + | |
188 | + throw ExtratorException("EOF"); | |
186 | 189 | } |
187 | 190 | |
188 | 191 | string ExtratorSRT::formatText(string line){ | ... | ... |