noosfero_harvest.rb
4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
class VirtuosoPlugin::NoosferoHarvest
COMMON_MAPPING = {
:type => {:predicate => "http://purl.org/dc/terms/type", :value => lambda {|s, t| t.class.name}},
:created_at => {:predicate => "http://purl.org/dc/terms/created"},
:updated_at => {:predicate => "http://purl.org/dc/terms/modified"},
}
ARTICLE_MAPPING = {
:title => {:predicate => "http://purl.org/dc/terms/title"},
:abstract => {:predicate => "http://purl.org/dc/terms/abstract"},
:body => {:predicate => "http://purl.org/dc/terms/description"},
:part_of => {:predicate => "http://purl.org/dc/terms/isPartOf", :value => lambda {|s, t| url_for(s.url)} },
:published_at => {:predicate => "http://purl.org/dc/terms/issued"},
:author => {:predicate => "http://purl.org/dc/terms/creator", :value => lambda {|s, t| url_for(t.author_url) if t.author_url} },
}
PROFILE_MAPPING = {
:name => {:predicate => "http://purl.org/dc/terms/title"},
:public? => {:predicate => "http://purl.org/socialparticipation/opa#publicProfile"},
}
COMMENT_MAPPING = {
:title => {:predicate => "http://purl.org/dc/terms/title"},
:body => {:predicate => "http://purl.org/dc/terms/description"},
:part_of => {:predicate => "http://purl.org/dc/terms/isPartOf", :value => lambda {|s, t| url_for(s.url)} },
:author => {:predicate => "http://purl.org/dc/terms/creator", :value => lambda {|s, t| url_for(t.author_url) if t.author_url} },
}
FRIENDSHIP_MAPPING = {
:knows => {:predicate => "http://xmlns.com/foaf/0.1/knows", :value => lambda {|s, t| url_for(t.url)} },
:type => nil, :created_at => nil, :updated_at => nil,
}
def initialize(environment)
@environment = environment
@graph = environment.top_url
end
attr_reader :environment
def plugin
@plugin ||= VirtuosoPlugin.new(self)
end
delegate :settings, :to => :plugin
include Rails.application.routes.url_helpers
def triplify_comments(article)
total = article.comments.count
count = 0
article.comments.each do |comment|
subject_identifier = url_for(comment.url)
puts "triplify #{subject_identifier} comment (#{count+=1}/#{total})"
triplify_mappings(COMMENT_MAPPING, subject_identifier, article, comment)
end
end
def triplify_articles(profile)
total = profile.articles.count
count = 0
profile.articles.public.each do |article|
subject_identifier = url_for(article.url)
puts "triplify #{subject_identifier} article (#{count+=1}/#{total})"
triplify_mappings(ARTICLE_MAPPING, subject_identifier, profile, article)
triplify_comments(article)
end
end
def triplify_friendship(person)
total = person.friends.count
count = 0
person.friends.each do |friend|
subject_identifier = url_for(person.url)
puts "triplify #{subject_identifier} friendship (#{count+=1}/#{total})"
triplify_mappings(FRIENDSHIP_MAPPING, subject_identifier, person, friend)
end
end
def triplify_profiles
total = environment.profiles.count
count = 0
environment.profiles.each do |profile|
subject_identifier = url_for(profile.url)
puts "triplify #{subject_identifier} profile (#{count+=1}/#{total})"
triplify_mappings(PROFILE_MAPPING, subject_identifier, environment, profile)
triplify_articles(profile) if profile.public?
triplify_friendship(profile) if profile.person?
end
end
def run
triplify_profiles
end
protected
def triplify_mappings(mapping, subject_identifier, source, target)
COMMON_MAPPING.merge(mapping).each do |k, v|
next unless v
value = nil
if v[:value]
value = v[:value].kind_of?(Proc) ? v[:value].call(source, target) : v[:value]
elsif target.respond_to?(k)
value = target.send(k)
end
insert_triple(RDF::URI.new(subject_identifier), RDF::URI.new(v[:predicate]), value) if value.present?
end
end
def insert_triple(subject, predicate, value)
value = RDF::URI.new(value) if value.kind_of?(String) && /https?:\/\//.match(value)
value = RDF::Literal::DateTime.new(value) if value.kind_of?(ActiveSupport::TimeWithZone)
value = RDF::Literal::Boolean.new(value) if !!value == value
query = RDF::Virtuoso::Query.insert_data([RDF::URI.new(subject), RDF::URI.new(predicate), value]).graph(RDF::URI.new(@graph))
plugin.virtuoso_client.insert(query)
end
end