Commit 2cf8010792c3075824ee27d0f037aeb178cbbf7e

Authored by Dmitriy Zaporozhets
2 parents af226ae9 e851cb07

Merge pull request #868 from SaitoWu/bugfix/encoding

Bugfix/encoding
app/helpers/commits_helper.rb
@@ -58,14 +58,14 @@ module CommitsHelper @@ -58,14 +58,14 @@ module CommitsHelper
58 next if line.match(/^\-\-\- a/) 58 next if line.match(/^\-\-\- a/)
59 next if line.match(/^\+\+\+ b/) 59 next if line.match(/^\+\+\+ b/)
60 60
61 - full_line = html_escape(line.gsub(/\n/, '')).force_encoding("UTF-8") 61 + full_line = html_escape(line.gsub(/\n/, ''))
62 62
63 if line.match(/^@@ -/) 63 if line.match(/^@@ -/)
64 type = "match" 64 type = "match"
65 65
66 line_old = line.match(/\-[0-9]*/)[0].to_i.abs rescue 0 66 line_old = line.match(/\-[0-9]*/)[0].to_i.abs rescue 0
67 line_new = line.match(/\+[0-9]*/)[0].to_i.abs rescue 0 67 line_new = line.match(/\+[0-9]*/)[0].to_i.abs rescue 0
68 - 68 +
69 next if line_old == 1 && line_new == 1 69 next if line_old == 1 && line_new == 1
70 yield(full_line, type, nil, nil, nil) 70 yield(full_line, type, nil, nil, nil)
71 next 71 next
app/views/refs/_tree.html.haml
@@ -42,9 +42,9 @@ @@ -42,9 +42,9 @@
42 .readme 42 .readme
43 - if content.name =~ /\.(md|markdown)$/i 43 - if content.name =~ /\.(md|markdown)$/i
44 = preserve do 44 = preserve do
45 - = markdown(content.data.detect_encoding!) 45 + = markdown(content.data)
46 - else 46 - else
47 - = simple_format(content.data.detect_encoding!) 47 + = simple_format(content.data)
48 48
49 - if params[:path] 49 - if params[:path]
50 - history_path = tree_file_project_ref_path(@project, @ref, params[:path]) 50 - history_path = tree_file_project_ref_path(@project, @ref, params[:path])
app/views/refs/_tree_file.html.haml
@@ -13,7 +13,7 @@ @@ -13,7 +13,7 @@
13 #tree-readme-holder 13 #tree-readme-holder
14 .readme 14 .readme
15 = preserve do 15 = preserve do
16 - = markdown(file.data.detect_encoding!) 16 + = markdown(file.data)
17 - else 17 - else
18 .view_file_content 18 .view_file_content
19 - unless file.empty? 19 - unless file.empty?
config/initializers/gitlabhq/20_grit_ext.rb
1 require 'grit' 1 require 'grit'
2 require 'pygments' 2 require 'pygments'
3 3
  4 +Grit::Git.git_timeout = GIT_OPTS["git_timeout"]
  5 +Grit::Git.git_max_size = GIT_OPTS["git_max_size"]
  6 +
4 Grit::Blob.class_eval do 7 Grit::Blob.class_eval do
5 include Linguist::BlobHelper 8 include Linguist::BlobHelper
6 -end  
7 9
8 -#monkey patch raw_object from string  
9 -Grit::GitRuby::Internal::RawObject.class_eval do  
10 - def content  
11 - @content 10 + def data
  11 + @data ||= @repo.git.cat_file({:p => true}, id)
  12 + Gitlab::Encode.utf8 @data
12 end 13 end
13 end 14 end
14 15
15 -  
16 Grit::Diff.class_eval do 16 Grit::Diff.class_eval do
17 def old_path 17 def old_path
18 - Gitlab::Encode.utf8 a_path 18 + Gitlab::Encode.utf8 @a_path
19 end 19 end
20 20
21 def new_path 21 def new_path
22 - Gitlab::Encode.utf8 b_path 22 + Gitlab::Encode.utf8 @b_path
23 end 23 end
24 -end  
25 24
26 -Grit::Git.git_timeout = GIT_OPTS["git_timeout"]  
27 -Grit::Git.git_max_size = GIT_OPTS["git_max_size"] 25 + def diff
  26 + Gitlab::Encode.utf8 @diff
  27 + end
  28 +end
lib/gitlab/encode.rb
1 # Patch Strings to enable detect_encoding! on views 1 # Patch Strings to enable detect_encoding! on views
2 require 'charlock_holmes/string' 2 require 'charlock_holmes/string'
3 module Gitlab 3 module Gitlab
4 - module Encode 4 + module Encode
5 extend self 5 extend self
6 6
7 def utf8 message 7 def utf8 message
  8 + # return nil if message is nil
8 return nil unless message 9 return nil unless message
9 10
10 - detect = CharlockHolmes::EncodingDetector.detect(message) rescue {} 11 + message.force_encoding("utf-8")
  12 + # return message if message type is binary
  13 + detect = CharlockHolmes::EncodingDetector.detect(message)
  14 + return message if detect[:type] == :binary
11 15
12 - # It's better to default to UTF-8 as sometimes it's wrongly detected as another charset  
13 - if detect[:encoding] && detect[:confidence] == 100  
14 - CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8')  
15 - else  
16 - message  
17 - end.force_encoding("utf-8") 16 + # if message is utf-8 encoding, just return it
  17 + return message if message.valid_encoding?
18 18
19 - # Prevent app from crash cause of  
20 - # encoding errors 19 + # if message is not utf-8 encoding, convert it
  20 + if detect[:encoding]
  21 + message.force_encoding(detect[:encoding])
  22 + message.encode!("utf-8", detect[:encoding], :undef => :replace, :replace => "", :invalid => :replace)
  23 + end
  24 +
  25 + # ensure message encoding is utf8
  26 + message.valid_encoding? ? message : raise
  27 +
  28 + # Prevent app from crash cause of encoding errors
21 rescue 29 rescue
22 - "--broken encoding: #{encoding}" 30 + "--broken encoding: #{detect[:encoding]}"
23 end 31 end
24 32
25 def detect_encoding message 33 def detect_encoding message
lib/graph_commit.rb
@@ -96,13 +96,13 @@ class GraphCommit @@ -96,13 +96,13 @@ class GraphCommit
96 h[:parents] = self.parents.collect do |p| 96 h[:parents] = self.parents.collect do |p|
97 [p.id,0,0] 97 [p.id,0,0]
98 end 98 end
99 - h[:author] = author.name.force_encoding("UTF-8") 99 + h[:author] = Gitlab::Encode.utf8(author.name)
100 h[:time] = time 100 h[:time] = time
101 h[:space] = space 101 h[:space] = space
102 h[:refs] = refs.collect{|r|r.name}.join(" ") unless refs.nil? 102 h[:refs] = refs.collect{|r|r.name}.join(" ") unless refs.nil?
103 h[:id] = sha 103 h[:id] = sha
104 h[:date] = date 104 h[:date] = date
105 - h[:message] = message.force_encoding("UTF-8") 105 + h[:message] = Gitlab::Encode.utf8(message)
106 h[:login] = author.email 106 h[:login] = author.email
107 h 107 h
108 end 108 end