dspace_harvest.rb
2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#inspired by https://github.com/code4lib/ruby-oai/blob/master/lib/oai/harvester/harvest.rb
class VirtuosoPlugin::DspaceHarvest
DC_CONVERSION = [:title, :creator, :subject, :description, :date, :type, :identifier, :language, :rights, :format]
def initialize(environment)
@environment = environment
end
def settings
@settings ||= Noosfero::Plugin::Settings.new(@environment, VirtuosoPlugin)
end
def dspace_client
@dspace_client ||= OAI::Client.new("#{settings.dspace_uri}/oai/request")
end
def virtuoso_client
@virtuoso_client ||= RDF::Virtuoso::Repository.new("#{settings.virtuoso_uri}/sparql", :update_uri => "#{settings.virtuoso_uri}/sparql-auth", :username => settings.virtuoso_username, :password => settings.virtuoso_password, :auth_method => 'digest', :timeout => 30)
end
def triplify(record)
metadata = VirtuosoPlugin::DublinCoreMetadata.new(record.metadata)
puts "triplify #{record.header.identifier}"
DC_CONVERSION.each do |c|
values = [metadata.send(c)].flatten.compact
values.each do |value|
query = RDF::Virtuoso::Query.insert_data([RDF::URI.new(metadata.identifier), RDF::URI.new("http://purl.org/dc/elements/1.1/#{c}"), value]).graph(RDF::URI.new(settings.dspace_uri))
virtuoso_client.insert(query)
end
end
end
def run
harvest_time = Time.now.utc
params = settings.last_harvest ? {:from => settings.last_harvest.utc} : {}
puts "starting harvest #{params}"
begin
records = dspace_client.list_records(params)
records.each do |record|
triplify(record)
end
rescue Exception => ex
puts ex.to_s
end
settings.last_harvest = harvest_time
settings.save!
puts "ending harvest #{harvest_time}"
end
def start
if find_job.empty?
job = VirtuosoPlugin::DspaceHarvest::Job.new(@environment.id)
Delayed::Job.enqueue(job)
end
end
def find_job
Delayed::Job.where(:handler => "--- !ruby/struct:VirtuosoPlugin::DspaceHarvest::Job\nenvironment_id: #{@environment.id}\n")
end
class Job < Struct.new(:environment_id)
def perform
environment = Environment.find(environment_id)
harvest = VirtuosoPlugin::DspaceHarvest.new(environment)
harvest.run
end
end
end