dspace_harvest.rb 2.56 KB
#inspired by https://github.com/code4lib/ruby-oai/blob/master/lib/oai/harvester/harvest.rb
class VirtuosoPlugin::DspaceHarvest

  DC_CONVERSION = [:title, :creator, :subject, :description, :date, :type, :identifier, :language, :rights, :format]
 
  def initialize(environment, dspace_uri = "")
    @environment = environment
    @dspace_uri = dspace_uri
  end

  attr_reader :environment

  def plugin
    @plugin ||= VirtuosoPlugin.new(self)
  end

  delegate :settings, :to => :plugin

  def dspace_client
    @dspace_client ||= OAI::Client.new("#{@dspace_uri}/oai/request")
  end

  def triplify(record)
    metadata = VirtuosoPlugin::DublinCoreMetadata.new(record.metadata)
    puts "triplify #{record.header.identifier}"

    DC_CONVERSION.each do |c|
      values = [metadata.send(c)].flatten.compact
      values.each do |value|
        query = RDF::Virtuoso::Query.insert_data([RDF::URI.new(metadata.identifier), RDF::URI.new("http://purl.org/dc/elements/1.1/#{c}"), value]).graph(RDF::URI.new(@dspace_uri))
        plugin.virtuoso_client.insert(query)
      end
    end
  end

  def run
    harvest_time = Time.now.utc
    params = settings.last_harvest ? {:from => settings.last_harvest.utc} : {}
    puts "starting harvest #{params} #{@dspace_uri} #{settings.virtuoso_uri}"
    begin
      records = dspace_client.list_records(params)
      records.each do |record|
        triplify(record)
      end
    rescue OAI::Exception => ex
      puts ex.to_s
      if ex.code != 'noRecordsMatch'
        puts "unexpected error"
        raise ex
      end
    end
    settings.last_harvest = harvest_time
    settings.save!
    puts "ending harvest #{harvest_time}"
  end
    
  def self.harvest_all(environment, from_start)
    settings = Noosfero::Plugin::Settings.new(environment, VirtuosoPlugin)
    settings.dspace_servers.each do |k, v|
      harvest = VirtuosoPlugin::DspaceHarvest.new(environment, k[:dspace_uri])
      harvest.start(from_start)
      harvest.run
    end    
  end  
  
  def start(from_start = false)
    if find_job.empty?
      if from_start
        settings.last_harvest = nil
        settings.save!
      end
      job = VirtuosoPlugin::DspaceHarvest::Job.new(@environment.id)
      Delayed::Job.enqueue(job)
    end
  end

  def find_job
    Delayed::Job.where(:handler => "--- !ruby/struct:VirtuosoPlugin::DspaceHarvest::Job\nenvironment_id: #{@environment.id}\n")
  end

  class Job < Struct.new(:environment_id)
    def perform
      environment = Environment.find(environment_id)
      harvest = VirtuosoPlugin::DspaceHarvest.new(environment)
      harvest.run
    end
  end

end