Commit bac4339914db837e1c6089c6ed03acaa6f54c00a
1 parent
0918b85b
Exists in
devel
Byte Order Mark verification
Showing
1 changed file
with
84 additions
and
81 deletions
Show diff stats
extrator/src/extratorSRT.cpp
@@ -102,87 +102,90 @@ void ExtratorSRT::Run(){ | @@ -102,87 +102,90 @@ void ExtratorSRT::Run(){ | ||
102 | 102 | ||
103 | Subtitle* ExtratorSRT::next() { | 103 | Subtitle* ExtratorSRT::next() { |
104 | 104 | ||
105 | - static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); | ||
106 | - | ||
107 | - unsigned int section_id; | ||
108 | - uint64_t section_time_in; | ||
109 | - uint64_t section_time_out; | ||
110 | - string section_text; | ||
111 | - | ||
112 | - int state = 0; | ||
113 | - int line_num = 0; | ||
114 | - string current_line; | ||
115 | - | ||
116 | - while (ifs_.good()) { | ||
117 | - | ||
118 | - getline(ifs_, current_line, '\n'); | ||
119 | - line_num++; | ||
120 | - | ||
121 | - switch (state) { | ||
122 | - case 0: // first state, search id | ||
123 | - if(current_line.empty()) | ||
124 | - break; // ignore blank lines | ||
125 | - | ||
126 | - char *p_end; | ||
127 | - section_id = strtol(current_line.c_str(), &p_end, 10); | ||
128 | - if(section_id) { | ||
129 | - state = 1; // state 1, find timestamp | ||
130 | - } | ||
131 | - | ||
132 | - break; | ||
133 | - | ||
134 | - case 1: // want timestamp | ||
135 | - bool match; | ||
136 | - match = regex_match(current_line, timestamp_regex); | ||
137 | - | ||
138 | - if(match) { | ||
139 | - split_timestamp(current_line, §ion_time_in, §ion_time_out); | ||
140 | - state = 2; // state 2, find text | ||
141 | - break; | ||
142 | - } | ||
143 | - | ||
144 | - throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); | ||
145 | - | ||
146 | - case 2: // Reading subtitle, first line of text | ||
147 | - if(current_line.empty()) { // first line is empty (blank subtitle) | ||
148 | - state = 4; // state 4 find next subtitle before empty subtitle | ||
149 | - break; | ||
150 | - } | ||
151 | - | ||
152 | - section_text = current_line; | ||
153 | - state = 3; // state 3 find second line of text or end of subtitle | ||
154 | - break; | ||
155 | - | ||
156 | - case 3: // create new subtitle object | ||
157 | - if(current_line.empty()){ | ||
158 | - Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | ||
159 | - return sub; | ||
160 | - } | ||
161 | - | ||
162 | - section_text += current_line; | ||
163 | - break; | ||
164 | - | ||
165 | - case 4: | ||
166 | - if(current_line.empty()) { | ||
167 | - state = 0; | ||
168 | - break; // ignore blank lines | ||
169 | - } | ||
170 | - | ||
171 | - char *pend; | ||
172 | - section_id = strtol(current_line.c_str(), &p_end, 10); | ||
173 | - if(section_id) { | ||
174 | - state = 1; // find timestamp | ||
175 | - break; | ||
176 | - } | ||
177 | - | ||
178 | - default: | ||
179 | - throw ExtratorException("SRT parser: Cannot parse file"); | ||
180 | - | ||
181 | - } // switch | ||
182 | - } | ||
183 | - | ||
184 | - throw ExtratorException("EOF"); | ||
185 | - | 105 | + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); |
106 | + | ||
107 | + unsigned int section_id; | ||
108 | + uint64_t section_time_in; | ||
109 | + uint64_t section_time_out; | ||
110 | + string section_text; | ||
111 | + | ||
112 | + int state = 0; | ||
113 | + int line_num = 0; | ||
114 | + string current_line; | ||
115 | + | ||
116 | + while (ifs_.good()) { | ||
117 | + | ||
118 | + getline(ifs_, current_line, '\n'); | ||
119 | + line_num++; | ||
120 | + | ||
121 | + switch (state) { | ||
122 | + case 0: // first state, search id | ||
123 | + if(current_line.empty()) | ||
124 | + break; // ignore blank lines | ||
125 | + | ||
126 | + char *p_end; | ||
127 | + section_id = strtol(current_line.c_str(), &p_end, 10); | ||
128 | + if(section_id || current_line.compare(0, 3, "\xEF\xBB\xBF") == 0) { | ||
129 | + state = 1; // state 1, find timestamp | ||
130 | + } | ||
131 | + | ||
132 | + break; | ||
133 | + | ||
134 | + case 1: // want timestamp | ||
135 | + bool match; | ||
136 | + match = regex_match(current_line, timestamp_regex); | ||
137 | + | ||
138 | + if(match) { | ||
139 | + split_timestamp(current_line, §ion_time_in, §ion_time_out); | ||
140 | + state = 2; // state 2, find text | ||
141 | + break; | ||
142 | + } | ||
143 | + | ||
144 | + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); | ||
145 | + | ||
146 | + case 2: // Reading subtitle, first line of text | ||
147 | + if(current_line.empty()) { // first line is empty (blank subtitle) | ||
148 | + state = 4; // state 4 find next subtitle before empty subtitle | ||
149 | + break; | ||
150 | + } | ||
151 | + | ||
152 | + section_text = current_line; | ||
153 | + state = 3; // state 3 find second line of text or end of subtitle | ||
154 | + break; | ||
155 | + | ||
156 | + case 3: // create new subtitle object | ||
157 | + if(current_line.empty()){ | ||
158 | + // std::clog << "[ID] " << section_id << std::endl; | ||
159 | + // std::clog << "[IN] " << section_time_in << std::endl; | ||
160 | + // std::clog << "[OUT] " << section_time_out << std::endl; | ||
161 | + // std::clog << "[TEXT] " << section_text << "\n" <<std::endl; | ||
162 | + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | ||
163 | + return sub; | ||
164 | + } | ||
165 | + | ||
166 | + section_text += current_line; | ||
167 | + break; | ||
168 | + | ||
169 | + case 4: | ||
170 | + if(current_line.empty()) { | ||
171 | + state = 0; | ||
172 | + break; // ignore blank lines | ||
173 | + } | ||
174 | + | ||
175 | + char *pend; | ||
176 | + section_id = strtol(current_line.c_str(), &p_end, 10); | ||
177 | + if(section_id) { | ||
178 | + state = 1; // find timestamp | ||
179 | + break; | ||
180 | + } | ||
181 | + | ||
182 | + default: | ||
183 | + throw ExtratorException("SRT parser: Cannot parse file"); | ||
184 | + | ||
185 | + } // switch | ||
186 | + } // while | ||
187 | + | ||
188 | + throw ExtratorException("EOF"); | ||
186 | } | 189 | } |
187 | 190 | ||
188 | string ExtratorSRT::formatText(string line){ | 191 | string ExtratorSRT::formatText(string line){ |