Commit a25a85b9396cbc07bf2bfcd12d3c0a6685df66d6
1 parent
6e5caa19
Exists in
master
and in
4 other branches
merge charlock_holmes to master
Showing
3 changed files
with
9 additions
and
11 deletions
Show diff stats
Gemfile
... | ... | @@ -22,9 +22,9 @@ gem "acts_as_list" |
22 | 22 | gem "rdiscount" |
23 | 23 | gem "acts-as-taggable-on", "~> 2.1.0" |
24 | 24 | gem "drapper" |
25 | -gem "rchardet19", "~> 1.3.5" | |
26 | 25 | gem "resque" |
27 | 26 | gem "httparty" |
27 | +gem "charlock_holmes" | |
28 | 28 | |
29 | 29 | group :assets do |
30 | 30 | gem "sass-rails", "~> 3.1.0" | ... | ... |
Gemfile.lock
... | ... | @@ -77,6 +77,7 @@ GEM |
77 | 77 | xpath (~> 0.1.4) |
78 | 78 | carrierwave (0.5.8) |
79 | 79 | activesupport (~> 3.0) |
80 | + charlock_holmes (0.6.8) | |
80 | 81 | childprocess (0.2.2) |
81 | 82 | ffi (~> 1.0.6) |
82 | 83 | coffee-rails (3.1.1) |
... | ... | @@ -172,7 +173,6 @@ GEM |
172 | 173 | rdoc (~> 3.4) |
173 | 174 | thor (~> 0.14.6) |
174 | 175 | rake (0.9.2.2) |
175 | - rchardet19 (1.3.5) | |
176 | 176 | rdiscount (1.6.8) |
177 | 177 | rdoc (3.11) |
178 | 178 | json (~> 1.4) |
... | ... | @@ -285,6 +285,7 @@ DEPENDENCIES |
285 | 285 | awesome_print |
286 | 286 | capybara |
287 | 287 | carrierwave |
288 | + charlock_holmes | |
288 | 289 | coffee-rails (~> 3.1.0) |
289 | 290 | database_cleaner |
290 | 291 | devise (= 1.5.0) |
... | ... | @@ -302,7 +303,6 @@ DEPENDENCIES |
302 | 303 | pygments.rb (= 0.2.3) |
303 | 304 | rails (= 3.1.1) |
304 | 305 | rails-footnotes (~> 3.7.5) |
305 | - rchardet19 (~> 1.3.5) | |
306 | 306 | rdiscount |
307 | 307 | resque |
308 | 308 | rspec-rails | ... | ... |
lib/utils.rb
... | ... | @@ -17,15 +17,13 @@ module Utils |
17 | 17 | end |
18 | 18 | |
19 | 19 | module CharEncode |
20 | - def encode(string) | |
21 | - return '' unless string | |
22 | - cd = CharDet.detect(string) | |
23 | - if cd.confidence > 0.6 | |
24 | - string.force_encoding(cd.encoding) | |
20 | + def encode(content) | |
21 | + content ||= '' | |
22 | + detection = CharlockHolmes::EncodingDetector.detect(content) | |
23 | + if hash = detection | |
24 | + content = CharlockHolmes::Converter.convert(content, hash[:encoding], 'UTF-8') if hash[:encoding] | |
25 | 25 | end |
26 | - string.encode("utf-8", :undef => :replace, :replace => "?", :invalid => :replace) | |
27 | - rescue | |
28 | - "Invalid Encoding" | |
26 | + content | |
29 | 27 | end |
30 | 28 | end |
31 | 29 | ... | ... |