Commit bac4339914db837e1c6089c6ed03acaa6f54c00a
1 parent
0918b85b
Exists in
devel
Byte Order Mark verification
Showing
1 changed file
with
84 additions
and
81 deletions
Show diff stats
extrator/src/extratorSRT.cpp
| ... | ... | @@ -102,87 +102,90 @@ void ExtratorSRT::Run(){ |
| 102 | 102 | |
| 103 | 103 | Subtitle* ExtratorSRT::next() { |
| 104 | 104 | |
| 105 | - static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); | |
| 106 | - | |
| 107 | - unsigned int section_id; | |
| 108 | - uint64_t section_time_in; | |
| 109 | - uint64_t section_time_out; | |
| 110 | - string section_text; | |
| 111 | - | |
| 112 | - int state = 0; | |
| 113 | - int line_num = 0; | |
| 114 | - string current_line; | |
| 115 | - | |
| 116 | - while (ifs_.good()) { | |
| 117 | - | |
| 118 | - getline(ifs_, current_line, '\n'); | |
| 119 | - line_num++; | |
| 120 | - | |
| 121 | - switch (state) { | |
| 122 | - case 0: // first state, search id | |
| 123 | - if(current_line.empty()) | |
| 124 | - break; // ignore blank lines | |
| 125 | - | |
| 126 | - char *p_end; | |
| 127 | - section_id = strtol(current_line.c_str(), &p_end, 10); | |
| 128 | - if(section_id) { | |
| 129 | - state = 1; // state 1, find timestamp | |
| 130 | - } | |
| 131 | - | |
| 132 | - break; | |
| 133 | - | |
| 134 | - case 1: // want timestamp | |
| 135 | - bool match; | |
| 136 | - match = regex_match(current_line, timestamp_regex); | |
| 137 | - | |
| 138 | - if(match) { | |
| 139 | - split_timestamp(current_line, §ion_time_in, §ion_time_out); | |
| 140 | - state = 2; // state 2, find text | |
| 141 | - break; | |
| 142 | - } | |
| 143 | - | |
| 144 | - throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); | |
| 145 | - | |
| 146 | - case 2: // Reading subtitle, first line of text | |
| 147 | - if(current_line.empty()) { // first line is empty (blank subtitle) | |
| 148 | - state = 4; // state 4 find next subtitle before empty subtitle | |
| 149 | - break; | |
| 150 | - } | |
| 151 | - | |
| 152 | - section_text = current_line; | |
| 153 | - state = 3; // state 3 find second line of text or end of subtitle | |
| 154 | - break; | |
| 155 | - | |
| 156 | - case 3: // create new subtitle object | |
| 157 | - if(current_line.empty()){ | |
| 158 | - Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | |
| 159 | - return sub; | |
| 160 | - } | |
| 161 | - | |
| 162 | - section_text += current_line; | |
| 163 | - break; | |
| 164 | - | |
| 165 | - case 4: | |
| 166 | - if(current_line.empty()) { | |
| 167 | - state = 0; | |
| 168 | - break; // ignore blank lines | |
| 169 | - } | |
| 170 | - | |
| 171 | - char *pend; | |
| 172 | - section_id = strtol(current_line.c_str(), &p_end, 10); | |
| 173 | - if(section_id) { | |
| 174 | - state = 1; // find timestamp | |
| 175 | - break; | |
| 176 | - } | |
| 177 | - | |
| 178 | - default: | |
| 179 | - throw ExtratorException("SRT parser: Cannot parse file"); | |
| 180 | - | |
| 181 | - } // switch | |
| 182 | - } | |
| 183 | - | |
| 184 | - throw ExtratorException("EOF"); | |
| 185 | - | |
| 105 | + static const regex timestamp_regex("[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}"); | |
| 106 | + | |
| 107 | + unsigned int section_id; | |
| 108 | + uint64_t section_time_in; | |
| 109 | + uint64_t section_time_out; | |
| 110 | + string section_text; | |
| 111 | + | |
| 112 | + int state = 0; | |
| 113 | + int line_num = 0; | |
| 114 | + string current_line; | |
| 115 | + | |
| 116 | + while (ifs_.good()) { | |
| 117 | + | |
| 118 | + getline(ifs_, current_line, '\n'); | |
| 119 | + line_num++; | |
| 120 | + | |
| 121 | + switch (state) { | |
| 122 | + case 0: // first state, search id | |
| 123 | + if(current_line.empty()) | |
| 124 | + break; // ignore blank lines | |
| 125 | + | |
| 126 | + char *p_end; | |
| 127 | + section_id = strtol(current_line.c_str(), &p_end, 10); | |
| 128 | + if(section_id || current_line.compare(0, 3, "\xEF\xBB\xBF") == 0) { | |
| 129 | + state = 1; // state 1, find timestamp | |
| 130 | + } | |
| 131 | + | |
| 132 | + break; | |
| 133 | + | |
| 134 | + case 1: // want timestamp | |
| 135 | + bool match; | |
| 136 | + match = regex_match(current_line, timestamp_regex); | |
| 137 | + | |
| 138 | + if(match) { | |
| 139 | + split_timestamp(current_line, §ion_time_in, §ion_time_out); | |
| 140 | + state = 2; // state 2, find text | |
| 141 | + break; | |
| 142 | + } | |
| 143 | + | |
| 144 | + throw ExtratorException("SRT parser: Expected subtitle timestamp at line " + line_num ); | |
| 145 | + | |
| 146 | + case 2: // Reading subtitle, first line of text | |
| 147 | + if(current_line.empty()) { // first line is empty (blank subtitle) | |
| 148 | + state = 4; // state 4 find next subtitle before empty subtitle | |
| 149 | + break; | |
| 150 | + } | |
| 151 | + | |
| 152 | + section_text = current_line; | |
| 153 | + state = 3; // state 3 find second line of text or end of subtitle | |
| 154 | + break; | |
| 155 | + | |
| 156 | + case 3: // create new subtitle object | |
| 157 | + if(current_line.empty()){ | |
| 158 | + // std::clog << "[ID] " << section_id << std::endl; | |
| 159 | + // std::clog << "[IN] " << section_time_in << std::endl; | |
| 160 | + // std::clog << "[OUT] " << section_time_out << std::endl; | |
| 161 | + // std::clog << "[TEXT] " << section_text << "\n" <<std::endl; | |
| 162 | + Subtitle* sub = new Subtitle(section_id, section_text, section_time_in, section_time_out); | |
| 163 | + return sub; | |
| 164 | + } | |
| 165 | + | |
| 166 | + section_text += current_line; | |
| 167 | + break; | |
| 168 | + | |
| 169 | + case 4: | |
| 170 | + if(current_line.empty()) { | |
| 171 | + state = 0; | |
| 172 | + break; // ignore blank lines | |
| 173 | + } | |
| 174 | + | |
| 175 | + char *pend; | |
| 176 | + section_id = strtol(current_line.c_str(), &p_end, 10); | |
| 177 | + if(section_id) { | |
| 178 | + state = 1; // find timestamp | |
| 179 | + break; | |
| 180 | + } | |
| 181 | + | |
| 182 | + default: | |
| 183 | + throw ExtratorException("SRT parser: Cannot parse file"); | |
| 184 | + | |
| 185 | + } // switch | |
| 186 | + } // while | |
| 187 | + | |
| 188 | + throw ExtratorException("EOF"); | |
| 186 | 189 | } |
| 187 | 190 | |
| 188 | 191 | string ExtratorSRT::formatText(string line){ | ... | ... |