data-config.xml 13.6 KB
<dataConfig>
  <dataSource name="trac"
              type="JdbcDataSource" 
              driver="org.postgresql.Driver"
              url="jdbc:postgresql://bdinterlegis.interlegis.leg.br/trac_colab" 
              user="colab" />
  <dataSource name="colab"
              type="JdbcDataSource" 
              driver="org.postgresql.Driver"
              url="jdbc:postgresql://bdinterlegis.interlegis.leg.br/colab" 
              user="colab" />

  <document>

    <entity name="wiki"
            dataSource="trac"
            transformer="TemplateTransformer,DateFormatTransformer"
            query="SELECT
                     name,
                     TIMESTAMP WITH TIME ZONE 'epoch' + (max(time)/1000000) * INTERVAL '1s' AS modified,
                     max(version) AS version
                   FROM wiki GROUP BY name"
            deltaQuery="
              SELECT DISTINCT
                name
              FROM
                wiki
              WHERE
                time > (EXTRACT(
                  epoch FROM TIMESTAMP WITH TIME ZONE '${dataimporter.wiki.last_index_time}'
                ) * 1000000)"
            deltaImportQuery="
                   SELECT
                     name,
                     max(version) AS version
                   FROM 
                     wiki
                   WHERE
                     name = '${dataimporter.delta.id}'
                   GROUP BY name">
      
      <entity name="wiki_creation" 
              dataSource="trac"
              query="SELECT 
                       author AS Creator,
                       TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS created
                     FROM 
                       wiki 
                     WHERE
                       name = '${wiki.name}' 
                       AND version = 1" />
      
      <entity name="wiki_modification" 
              dataSource="trac"
              query="SELECT 
                       author AS last_author,
                       TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS modified
                     FROM 
                       wiki 
                     WHERE
                       name = '${wiki.name}' 
                       AND version = '${wiki.version}'" />
      
      <entity name="wiki_collaborators"
              dataSource="trac"
              query="SELECT DISTINCT 
                       author AS collaborator
                     FROM 
                       wiki 
                     WHERE
                       name = '${wiki.name}'
                       AND author != ''" />
      
      <entity name="content"
              dataSource="trac"
              query="SELECT 
                       text AS content 
                     FROM
                       wiki 
                     WHERE 
                       name = '${wiki.name}'
                       AND version = '${wiki.version}'" />

      <field column="UID" template="WIKI_${wiki.name}" />
      <field column="getId" template="${wiki.name}" />
      <field column="Type" template="wiki" />
      <field column="Title" template="${wiki.name}" />
      <field column="created" name="created" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
      <field column="modified" name="modified" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
      <field column="path_string" template="/wiki/${wiki.name}" />
    </entity>

    <entity name="ticket"
            dataSource="trac"
            transformer="TemplateTransformer,DateFormatTransformer"
            pk="id"
            deltaQuery="
              SELECT
                  id
                FROM
                  ticket
              WHERE
                time > (EXTRACT(
                  epoch FROM TIMESTAMP WITH TIME ZONE '${dataimporter.ticket.last_index_time}'
                ) * 1000000)"
            query="SELECT
                     id, 
                     summary,
                     description AS Description,
                     milestone,
                     priority,
                     component,
                     version,
                     severity,
                     reporter,
                     owner,
                     status,
                     TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000)* INTERVAL '1s' AS created,
                     TIMESTAMP WITH TIME ZONE 'epoch' + (changetime/1000000) * INTERVAL '1s' AS modified
                   FROM
                     ticket">

      <entity name="ticket_collaborator"
              dataSource="trac"
              query="SELECT 
                       reporter AS collaborator 
                     FROM
                       ticket
                     WHERE 
                       id = ${ticket.id}
                     
                     UNION
                     
                     SELECT 
                       owner AS collaborator 
                     FROM
                       ticket
                     WHERE 
                       id = ${ticket.id}
                     
                     UNION
                    
                     SELECT DISTINCT
                       author AS collaborator
                     FROM 
                       ticket_change 
                     WHERE
                       ticket = ${ticket.id}" />

      <entity name="ticket_keywords"
              dataSource="trac"
              query="SELECT DISTINCT
                       REGEXP_SPLIT_TO_TABLE(keywords, ',|\\s') AS keyword
                     FROM 
                       ticket
                     WHERE
                       id = ${ticket.id} AND
                       keywords != ''" />
      
      <entity name="ticket_modification"
              dataSource="trac"
              query="SELECT DISTINCT
                       author AS last_author
                     FROM 
                       ticket_change
                     WHERE
                       ticket = ${ticket.id} AND
                       time = (SELECT max(time) 
                               FROM ticket_change 
                               WHERE ticket = ${ticket.id});" />

      <entity name="ticket_comments"
              dataSource="trac"
              query="SELECT
                       newvalue AS comment
                     FROM 
                       ticket_change
                     WHERE
                       ticket = ${ticket.id}
                       AND field = 'comment'" />

      <field column="UID" template="TICKET_${ticket.id}" />
      <field column="getId" template="${ticket.id}" />
      <field column="Type" template="ticket" />
      <field column="path_string" template="/ticket/${ticket.id}" />
      <field column="Title" 
             template="#${ticket.id} (${ticket.status}) - ${ticket.summary}" />
      <field column="Creator" template="${ticket.reporter}" />
      <field column="created" name="created" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
      <field column="modified" name="modified" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
    </entity>
    
    <entity name="changeset"
            dataSource="trac"
            transformer="TemplateTransformer,DateFormatTransformer"
            pk="rev"
            deltaQuery="
            SELECT 
              rev 
            FROM 
              revision 
            WHERE
              time > (EXTRACT(
                epoch FROM TIMESTAMP WITH TIME ZONE '${dataimporter.changeset.last_index_time}'
              ) * 1000000)"

            query="SELECT
                     rev AS revision, 
                     author AS Creator,
                     author AS collaborator,
                     repos.value AS repos_name,
                     TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS created,
                     TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS modified,
                     message                    
                   FROM
                     revision AS rev JOIN
                     repository AS repos
                       ON rev.repos = repos.id AND 
                          repos.name = 'name' AND repos.value != ''">

      <field column="UID" template="CHANGESET_${changeset.revision}" />
      <field column="getId" template="${changeset.revision}" />
      <field column="Type" template="changeset" />
      <field column="path_string" 
             template="/changeset/${changeset.revision}/${changeset.repos_name}"
      />
      <field column="Title" 
             template="[${changeset.revision}] - ${changeset.message}" />
      <field column="created" name="created" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
      <field column="modified" name="modified" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
    </entity>

    <entity name="thread"
            dataSource="colab"
            transformer="TemplateTransformer,DateFormatTransformer"
            deltaQuery="
              SELECT
                thread_id AS id
              FROM
                super_archives_message
              GROUP BY
                thread_id
              HAVING
                max(received_time) > '${dataimporter.thread.last_index_time}'"
            deltaImportQuery="SELECT
                     sam.thread_id AS id,
                     sat.subject_token AS name,
                     sat.latest_message_id,
                     saml.name AS mailinglist,
                     array_to_string(array_agg(sam.body), ' ') AS content
                   FROM
                     super_archives_message AS sam
                     JOIN super_archives_thread AS sat
                       ON sat.id = sam.thread_id
                     JOIN super_archives_mailinglist AS saml
                       ON sat.mailinglist_id = saml.id
                   WHERE
                     sat.id = '${dataimporter.delta.id}'
                   GROUP BY
                     sam.thread_id,
                     sat.subject_token,
                     sat.latest_message_id,
                     saml.name"

            query="SELECT
                     sam.thread_id AS id,
                     sat.subject_token AS name,
                     sat.latest_message_id,
                     saml.name AS mailinglist,
                     array_to_string(array_agg(sam.body), ' ') AS content
                   FROM
                     super_archives_message AS sam
                     JOIN super_archives_thread AS sat
                       ON sat.id = sam.thread_id
                     JOIN super_archives_mailinglist AS saml
                       ON sat.mailinglist_id = saml.id
                   WHERE
                     sat.spam IS NOT True
                   GROUP BY
                     sam.thread_id,
                     sat.subject_token,
                     sat.latest_message_id,
                     saml.name">

      <!--
        Check about "DISTINCT ON" here: 
          http://archives.postgresql.org/pgsql-general/2002-06/msg01330.php
      -->
      <entity name="first_message"
              dataSource="colab"
              transformer="TemplateTransformer"
              query="SELECT DISTINCT ON (sam.thread_id)
                       sam.received_time AS created,
                       sam.subject_clean AS subject,
                       saea.real_name AS creator_real_name,
                       saea.md5 AS creator_email_md5,
                       au.username AS Creator
                     FROM 
                       super_archives_message AS sam
                       JOIN super_archives_emailaddress AS saea
                         ON sam.from_address_id = saea.id
                       LEFT JOIN auth_user AS au
                         ON au.id = saea.user_id
                     WHERE
                       sam.thread_id = ${thread.id}
                     ORDER BY 
                       sam.thread_id,
                       sam.received_time">
        <field column="Title" template="${first_message.subject}" />
        <field column="creator_profile_uri" 
               template="/user/hash/${first_message.creator_email_md5}" />
      </entity>

      <entity name="latest_message"
              dataSource="colab"
              query="SELECT 
                       sam.body AS Description,
                       sam.received_time AS modified,
                       au.username AS last_author
                     FROM 
                       super_archives_message AS sam
                       JOIN super_archives_emailaddress AS saea
                         ON sam.from_address_id = saea.id
                       LEFT JOIN auth_user AS au
                         ON au.id = saea.user_id
                     WHERE
                       sam.id = ${thread.latest_message_id}" />

      <entity name="thread_collaborators"
              dataSource="colab"
              query="SELECT DISTINCT 
                       au.username AS collaborator
                     FROM 
                       super_archives_message AS sam
                       JOIN super_archives_emailaddress AS saea
                         ON sam.from_address_id = saea.id
                       JOIN auth_user AS au
                         ON au.id = saea.user_id
                     WHERE
                       thread_id = ${thread.id}" />

      <field column="UID" template="THREAD_${thread.id}" />
      <field column="getId" template="${thread.name}" />
      <field column="Type" template="thread" />
      <field column="path_string" 
             template="/archives/thread/${thread.mailinglist}/${thread.name}" />
      <field column="created" name="created" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss" />
      <field column="modified" name="modified" 
             dateTimeFormat="yyyy-MM-dd hh:mm:ss" />
    </entity> 
  </document>
          
</dataConfig>
  
<!-- 
vim: ts=2 sw=2 ss=2 expandtab:
-->