Commit 89b449818fda01bb9e5f6e360eb3706f22fec870

Authored by sergiocampos
1 parent 4eb4f872

Arquivos de configuração para o sistema de buscas integrada


git-svn-id: http://repositorio.interlegis.gov.br/colab/trunk@5846 bee1b3ed-c3eb-0310-9994-b88e04532788
colab/settings.py
... ... @@ -13,13 +13,6 @@ PROJECT_PATH = os.path.abspath(os.path.dirname(__file__))
13 13  
14 14 MANAGERS = ADMINS
15 15  
16   -DATABASES = {
17   - 'default': {
18   - 'ENGINE': 'django.db.backends.sqlite3',
19   - 'NAME': os.path.join(PROJECT_PATH, 'colab.db'),
20   - }
21   -}
22   -
23 16 LOGIN_URL = '/login/'
24 17  
25 18 # Local time zone for this installation. Choices can be found here:
... ... @@ -66,9 +59,6 @@ STATICFILES_FINDERS = (
66 59 # 'django.contrib.staticfiles.finders.DefaultStorageFinder',
67 60 )
68 61  
69   -# Make this unique, and don't share it with anybody.
70   -SECRET_KEY = '^$10gkd7%o==sa$f$&au!c*w*@bue^n7fn*o8sederb9a1cup5'
71   -
72 62 # List of callables that know how to import templates from various sources.
73 63 TEMPLATE_LOADERS = (
74 64 'django.template.loaders.filesystem.Loader',
... ... @@ -136,3 +126,6 @@ LOGGING = {
136 126 },
137 127 }
138 128 }
  129 +
  130 +from settings_local import *
  131 +
... ...
sorl-conf/README 0 → 100644
... ... @@ -0,0 +1,16 @@
  1 +Installation instructions for Ubuntu 10.04
  2 +-------------------------------------------
  3 +
  4 +* Install Java, tomcat, Solr and JDBC Postgres drivers (Ubuntu partner repositories must be enabled):
  5 +sudo apt-get install sun-java6-bin tomcat6 solr-common libpg-java
  6 +
  7 +* Link the JDBC Postgres drivers into the Solr installation:
  8 +sudo ln -s /usr/share/java/postgresql-jdbc3-8.4.jar /usr/share/solr/WEB-INF/lib/
  9 +
  10 +* Check data-config.xml to make sure all information to connect to the databases are right
  11 +
  12 +* Copy the configuration files from this folder into /etc/solr/conf/
  13 +
  14 +* Restart tomcat:
  15 +sudo /etc/init.d/tomcat6 restart
  16 +
... ...
sorl-conf/data-config.xml 0 → 100644
... ... @@ -0,0 +1,314 @@
  1 + <dataConfig>
  2 + <dataSource name="trac"
  3 + type="JdbcDataSource"
  4 + driver="org.postgresql.Driver"
  5 + url="jdbc:postgresql://localhost/trac_gitec"
  6 + user="colab" />
  7 + <dataSource name="colab"
  8 + type="JdbcDataSource"
  9 + driver="org.postgresql.Driver"
  10 + url="jdbc:postgresql://localhost/colab"
  11 + user="colab" />
  12 +
  13 + <document>
  14 +
  15 + <entity name="wiki"
  16 + dataSource="trac"
  17 + transformer="TemplateTransformer,DateFormatTransformer"
  18 + query="SELECT
  19 + name,
  20 + TIMESTAMP 'epoch' + max(time) * INTERVAL '1s' AS modified,
  21 + max(version) AS version
  22 + FROM wiki GROUP BY name"
  23 + deltaQuery="
  24 + SELECT DISTINCT
  25 + name
  26 + FROM
  27 + wiki
  28 + WHERE
  29 + time > EXTRACT(
  30 + epoch FROM TIMESTAMP '${dataimporter.wiki.last_index_time}'
  31 + )"
  32 + deltaImportQuery="
  33 + SELECT
  34 + name,
  35 + TIMESTAMP 'epoch' + max(time) * INTERVAL '1s' AS modified,
  36 + max(version) AS version
  37 + FROM
  38 + wiki
  39 + WHERE
  40 + name = '${dataimporter.delta.id}'
  41 + GROUP BY name">
  42 +
  43 + <entity name="wiki_creation"
  44 + dataSource="trac"
  45 + query="SELECT
  46 + author AS creator,
  47 + TIMESTAMP 'epoch' + time * INTERVAL '1s' AS created
  48 + FROM
  49 + wiki
  50 + WHERE
  51 + name = '${wiki.name}'
  52 + AND version = 1" />
  53 +
  54 + <entity name="wiki_collaborators"
  55 + dataSource="trac"
  56 + query="SELECT DISTINCT
  57 + author AS collaborator
  58 + FROM
  59 + wiki
  60 + WHERE
  61 + name = '${wiki.name}'
  62 + AND author != ''" />
  63 +
  64 + <entity name="content"
  65 + dataSource="trac"
  66 + query="SELECT
  67 + text AS content
  68 + FROM
  69 + wiki
  70 + WHERE
  71 + name = '${wiki.name}'
  72 + AND version = '${wiki.version}'" />
  73 +
  74 + <field column="uid" template="WIKI_${wiki.name}" />
  75 + <field column="id" template="${wiki.name}" />
  76 + <field column="type" template="wiki" />
  77 + <field column="title" template="${wiki.name}" />
  78 + <field column="created" name="created"
  79 + dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
  80 + <field column="modified" name="modified"
  81 + dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
  82 + </entity>
  83 +
  84 + <entity name="ticket"
  85 + dataSource="trac"
  86 + transformer="TemplateTransformer,DateFormatTransformer"
  87 + pk="id"
  88 + deltaQuery="
  89 + SELECT
  90 + id
  91 + FROM
  92 + ticket
  93 + WHERE
  94 + time > EXTRACT(
  95 + epoch FROM TIMESTAMP '${dataimporter.ticket.last_index_time}'
  96 + )"
  97 + query="SELECT
  98 + id,
  99 + summary,
  100 + description,
  101 + milestone,
  102 + priority,
  103 + component,
  104 + version,
  105 + severity,
  106 + reporter,
  107 + owner,
  108 + status,
  109 + TIMESTAMP 'epoch' + time * INTERVAL '1s' AS created,
  110 + TIMESTAMP 'epoch' + changetime * INTERVAL '1s' AS modified
  111 + FROM
  112 + ticket">
  113 +
  114 + <entity name="ticket_collaborator"
  115 + dataSource="trac"
  116 + query="SELECT
  117 + reporter AS collaborator
  118 + FROM
  119 + ticket
  120 + WHERE
  121 + id = ${ticket.id}
  122 +
  123 + UNION
  124 +
  125 + SELECT
  126 + owner AS collaborator
  127 + FROM
  128 + ticket
  129 + WHERE
  130 + id = ${ticket.id}
  131 +
  132 + UNION
  133 +
  134 + SELECT DISTINCT
  135 + author AS collaborator
  136 + FROM
  137 + ticket_change
  138 + WHERE
  139 + ticket = ${ticket.id}" />
  140 +
  141 + <entity name="ticket_keywords"
  142 + dataSource="trac"
  143 + query="SELECT DISTINCT
  144 + REGEXP_SPLIT_TO_TABLE(keywords, ',|\\s') AS keyword
  145 + FROM
  146 + ticket
  147 + WHERE
  148 + id = ${ticket.id} AND
  149 + keywords != ''" />
  150 +
  151 + <entity name="ticket_comments"
  152 + dataSource="trac"
  153 + query="SELECT
  154 + newvalue AS comment
  155 + FROM
  156 + ticket_change
  157 + WHERE
  158 + ticket = ${ticket.id}
  159 + AND field = 'comment'" />
  160 +
  161 + <field column="uid" template="TICKET_${ticket.id}" />
  162 + <field column="type" template="ticket" />
  163 + <field column="title"
  164 + template="#${ticket.id} (${ticket.status}) - ${ticket.summary}" />
  165 + <field column="creator" template="${ticket.reporter}" />
  166 + <field column="created" name="created"
  167 + dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
  168 + <field column="modified" name="modified"
  169 + dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
  170 + </entity>
  171 +
  172 + <entity name="changeset"
  173 + dataSource="trac"
  174 + transformer="TemplateTransformer,DateFormatTransformer"
  175 + pk="rev"
  176 + deltaQuery="
  177 + SELECT
  178 + rev
  179 + FROM
  180 + revision
  181 + WHERE
  182 + time > EXTRACT(
  183 + epoch FROM TIMESTAMP '${dataimporter.changeset.last_index_time}'
  184 + )"
  185 +
  186 + query="SELECT
  187 + rev AS revision,
  188 + author AS creator,
  189 + author AS collaborator,
  190 + TIMESTAMP 'epoch' + time * INTERVAL '1s' AS created,
  191 + message
  192 + FROM
  193 + revision">
  194 + <field column="uid" template="CHANGESET_${changeset.revision}" />
  195 + <field column="id" template="${changeset.revision}" />
  196 + <field column="type" template="changeset" />
  197 + <field column="title"
  198 + template="[${changeset.revision}] - ${changeset.message}" />
  199 + <field column="created" name="created"
  200 + dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>
  201 + </entity>
  202 +
  203 + <entity name="thread"
  204 + dataSource="colab"
  205 + transformer="TemplateTransformer,DateFormatTransformer"
  206 + deltaQuery="
  207 + SELECT
  208 + thread_id AS id
  209 + FROM
  210 + super_archives_message
  211 + GROUP BY
  212 + thread_id
  213 + HAVING
  214 + max(received_time) > '${dataimporter.thread.last_index_time}'"
  215 + deltaImportQuery="SELECT
  216 + sam.thread_id AS id,
  217 + sat.subject_token AS name,
  218 + sat.latest_message_id,
  219 + saml.name AS mailinglist,
  220 + array_to_string(array_agg(sam.body), ' ') AS content
  221 + FROM
  222 + super_archives_message AS sam
  223 + JOIN super_archives_thread AS sat
  224 + ON sat.id = sam.thread_id
  225 + JOIN super_archives_mailinglist AS saml
  226 + ON sat.mailinglist_id = saml.id
  227 + WHERE
  228 + sat.id = '${dataimporter.delta.id}'
  229 + GROUP BY
  230 + sam.thread_id,
  231 + sat.subject_token,
  232 + sat.latest_message_id,
  233 + saml.name"
  234 +
  235 + query="SELECT
  236 + sam.thread_id AS id,
  237 + sat.subject_token AS name,
  238 + sat.latest_message_id,
  239 + saml.name AS mailinglist,
  240 + array_to_string(array_agg(sam.body), ' ') AS content
  241 + FROM
  242 + super_archives_message AS sam
  243 + JOIN super_archives_thread AS sat
  244 + ON sat.id = sam.thread_id
  245 + JOIN super_archives_mailinglist AS saml
  246 + ON sat.mailinglist_id = saml.id
  247 + GROUP BY
  248 + sam.thread_id,
  249 + sat.subject_token,
  250 + sat.latest_message_id,
  251 + saml.name">
  252 +
  253 + <!--
  254 + Check about "DISTINCT ON" here:
  255 + http://archives.postgresql.org/pgsql-general/2002-06/msg01330.php
  256 + -->
  257 + <entity name="first_message"
  258 + dataSource="colab"
  259 + transformer="TemplateTransformer"
  260 + query="SELECT DISTINCT ON (sam.thread_id)
  261 + sam.body AS description,
  262 + sam.received_time AS created,
  263 + sam.subject_clean AS subject,
  264 + au.username AS creator
  265 + FROM
  266 + super_archives_message AS sam
  267 + JOIN super_archives_emailaddress AS saea
  268 + ON sam.from_address_id = saea.id
  269 + LEFT JOIN auth_user AS au
  270 + ON au.id = saea.user_id
  271 + WHERE
  272 + sam.thread_id = ${thread.id}
  273 + ORDER BY
  274 + sam.thread_id,
  275 + sam.received_time">
  276 + <field column="title" template="${first_message.subject}" />
  277 + </entity>
  278 +
  279 + <entity name="latest_message"
  280 + dataSource="colab"
  281 + query="SELECT
  282 + received_time AS modified
  283 + FROM
  284 + super_archives_message
  285 + WHERE
  286 + id = ${thread.latest_message_id}" />
  287 +
  288 + <entity name="thread_collaborators"
  289 + dataSource="colab"
  290 + query="SELECT DISTINCT
  291 + au.username AS collaborator
  292 + FROM
  293 + super_archives_message AS sam
  294 + JOIN super_archives_emailaddress AS saea
  295 + ON sam.from_address_id = saea.id
  296 + JOIN auth_user AS au
  297 + ON au.id = saea.user_id
  298 + WHERE
  299 + thread_id = ${thread.id}" />
  300 +
  301 + <field column="uid" template="THREAD_${thread.id}" />
  302 + <field column="type" template="thread" />
  303 + <field column="created" name="created"
  304 + dateTimeFormat="yyyy-MM-dd hh:mm:ss" />
  305 + <field column="modified" name="modified"
  306 + dateTimeFormat="yyyy-MM-dd hh:mm:ss" />
  307 + </entity>
  308 + </document>
  309 +
  310 +</dataConfig>
  311 +
  312 +<!--
  313 +vim: ts=2 sw=2 ss=2 expandtab:
  314 +-->
... ...
sorl-conf/schema.xml 0 → 100644
... ... @@ -0,0 +1,499 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +
  19 +<!--
  20 + This is the Solr schema file. This file should be named "schema.xml" and
  21 + should be in the conf directory under the solr home
  22 + (i.e. ./solr/conf/schema.xml by default)
  23 + or located where the classloader for the Solr webapp can find it.
  24 +
  25 + This example schema is the recommended starting point for users.
  26 + It should be kept correct and concise, usable out-of-the-box.
  27 +
  28 + For more information, on how to customize this file, please see
  29 + http://wiki.apache.org/solr/SchemaXml
  30 +
  31 + PERFORMANCE NOTE: this schema includes many optional features and should not
  32 + be used for benchmarking. To improve performance one could
  33 + - set stored="false" for all fields possible (esp large fields) when you
  34 + only need to search on the field but don't need to return the original
  35 + value.
  36 + - set indexed="false" if you don't need to search on the field, but only
  37 + return the field as a result of searching on other indexed fields.
  38 + - remove all unneeded copyField statements
  39 + - for best index size and searching performance, set "index" to false
  40 + for all general text fields, use copyField to copy them to the
  41 + catchall "text" field, and use that for searching.
  42 + - For maximum indexing performance, use the StreamingUpdateSolrServer
  43 + java client.
  44 + - Remember to run the JVM in server mode, and use a higher logging level
  45 + that avoids logging every request
  46 +-->
  47 +
  48 +<schema name="example" version="1.2">
  49 + <!-- attribute "name" is the name of this schema and is only used for display purposes.
  50 + Applications should change this to reflect the nature of the search collection.
  51 + version="1.2" is Solr's version number for the schema syntax and semantics. It should
  52 + not normally be changed by applications.
  53 + 1.0: multiValued attribute did not exist, all fields are multiValued by nature
  54 + 1.1: multiValued attribute introduced, false by default
  55 + 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
  56 + -->
  57 +
  58 + <types>
  59 + <!-- field type definitions. The "name" attribute is
  60 + just a label to be used by field definitions. The "class"
  61 + attribute and any other attributes determine the real
  62 + behavior of the fieldType.
  63 + Class names starting with "solr" refer to java classes in the
  64 + org.apache.solr.analysis package.
  65 + -->
  66 +
  67 + <!-- The StrField type is not analyzed, but indexed/stored verbatim.
  68 + - StrField and TextField support an optional compressThreshold which
  69 + limits compression (if enabled in the derived fields) to values which
  70 + exceed a certain size (in characters).
  71 + -->
  72 + <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
  73 +
  74 + <!-- boolean type: "true" or "false" -->
  75 + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
  76 + <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
  77 + <fieldtype name="binary" class="solr.BinaryField"/>
  78 +
  79 + <!-- The optional sortMissingLast and sortMissingFirst attributes are
  80 + currently supported on types that are sorted internally as strings.
  81 + This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
  82 + - If sortMissingLast="true", then a sort on this field will cause documents
  83 + without the field to come after documents with the field,
  84 + regardless of the requested sort order (asc or desc).
  85 + - If sortMissingFirst="true", then a sort on this field will cause documents
  86 + without the field to come before documents with the field,
  87 + regardless of the requested sort order.
  88 + - If sortMissingLast="false" and sortMissingFirst="false" (the default),
  89 + then default lucene sorting will be used which places docs without the
  90 + field first in an ascending sort and last in a descending sort.
  91 + -->
  92 +
  93 + <!--
  94 + Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
  95 + -->
  96 + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
  97 + <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
  98 + <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
  99 + <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
  100 +
  101 + <!--
  102 + Numeric field types that index each value at various levels of precision
  103 + to accelerate range queries when the number of values between the range
  104 + endpoints is large. See the javadoc for NumericRangeQuery for internal
  105 + implementation details.
  106 +
  107 + Smaller precisionStep values (specified in bits) will lead to more tokens
  108 + indexed per value, slightly larger index size, and faster range queries.
  109 + A precisionStep of 0 disables indexing at different precision levels.
  110 + -->
  111 + <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
  112 + <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
  113 + <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
  114 + <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
  115 +
  116 + <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
  117 + is a more restricted form of the canonical representation of dateTime
  118 + http://www.w3.org/TR/xmlschema-2/#dateTime
  119 + The trailing "Z" designates UTC time and is mandatory.
  120 + Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
  121 + All other components are mandatory.
  122 +
  123 + Expressions can also be used to denote calculations that should be
  124 + performed relative to "NOW" to determine the value, ie...
  125 +
  126 + NOW/HOUR
  127 + ... Round to the start of the current hour
  128 + NOW-1DAY
  129 + ... Exactly 1 day prior to now
  130 + NOW/DAY+6MONTHS+3DAYS
  131 + ... 6 months and 3 days in the future from the start of
  132 + the current day
  133 +
  134 + Consult the DateField javadocs for more information.
  135 +
  136 + Note: For faster range queries, consider the tdate type
  137 + -->
  138 + <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
  139 +
  140 + <!-- A Trie based date field for faster date range queries and date faceting. -->
  141 + <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
  142 +
  143 +
  144 + <!--
  145 + Note:
  146 + These should only be used for compatibility with existing indexes (created with older Solr versions)
  147 + or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
  148 +
  149 + Plain numeric field types that store and index the text
  150 + value verbatim (and hence don't support range queries, since the
  151 + lexicographic ordering isn't equal to the numeric ordering)
  152 + -->
  153 + <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
  154 + <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
  155 + <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
  156 + <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
  157 + <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
  158 +
  159 +
  160 + <!--
  161 + Note:
  162 + These should only be used for compatibility with existing indexes (created with older Solr versions)
  163 + or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
  164 +
  165 + Numeric field types that manipulate the value into
  166 + a string value that isn't human-readable in its internal form,
  167 + but with a lexicographic ordering the same as the numeric ordering,
  168 + so that range queries work correctly.
  169 + -->
  170 + <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
  171 + <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
  172 + <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
  173 + <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
  174 +
  175 +
  176 + <!-- The "RandomSortField" is not used to store or search any
  177 + data. You can declare fields of this type it in your schema
  178 + to generate pseudo-random orderings of your docs for sorting
  179 + purposes. The ordering is generated based on the field name
  180 + and the version of the index, As long as the index version
  181 + remains unchanged, and the same field name is reused,
  182 + the ordering of the docs will be consistent.
  183 + If you want different psuedo-random orderings of documents,
  184 + for the same version of the index, use a dynamicField and
  185 + change the name
  186 + -->
  187 + <fieldType name="random" class="solr.RandomSortField" indexed="true" />
  188 +
  189 + <!-- solr.TextField allows the specification of custom text analyzers
  190 + specified as a tokenizer and a list of token filters. Different
  191 + analyzers may be specified for indexing and querying.
  192 +
  193 + The optional positionIncrementGap puts space between multiple fields of
  194 + this type on the same document, with the purpose of preventing false phrase
  195 + matching across fields.
  196 +
  197 + For more info on customizing your analyzer chain, please see
  198 + http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
  199 + -->
  200 +
  201 + <!-- One can also specify an existing Analyzer class that has a
  202 + default constructor via the class attribute on the analyzer element
  203 + <fieldType name="text_greek" class="solr.TextField">
  204 + <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
  205 + </fieldType>
  206 + -->
  207 +
  208 + <!-- A text field that only splits on whitespace for exact matching of words -->
  209 + <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  210 + <analyzer>
  211 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  212 + </analyzer>
  213 + </fieldType>
  214 +
  215 + <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
  216 + words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
  217 + so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
  218 + Synonyms and stopwords are customized by external files, and stemming is enabled.
  219 + -->
  220 + <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
  221 + <analyzer type="index">
  222 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  223 + <!-- in this example, we will only use synonyms at query time
  224 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  225 + -->
  226 + <!-- Case insensitive stop word removal.
  227 + add enablePositionIncrements=true in both the index and query
  228 + analyzers to leave a 'gap' for more accurate phrase queries.
  229 + -->
  230 + <filter class="solr.StopFilterFactory"
  231 + ignoreCase="true"
  232 + words="stopwords.txt"
  233 + enablePositionIncrements="true"
  234 + />
  235 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
  236 + <filter class="solr.LowerCaseFilterFactory"/>
  237 + <filter class="solr.SnowballPorterFilterFactory" language="Portuguese" protected="protwords.txt"/>
  238 + </analyzer>
  239 + <analyzer type="query">
  240 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  241 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  242 + <filter class="solr.StopFilterFactory"
  243 + ignoreCase="true"
  244 + words="stopwords.txt"
  245 + enablePositionIncrements="true"
  246 + />
  247 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
  248 + <filter class="solr.LowerCaseFilterFactory"/>
  249 + <filter class="solr.SnowballPorterFilterFactory" language="Portuguese" protected="protwords.txt"/>
  250 + </analyzer>
  251 + </fieldType>
  252 +
  253 +
  254 + <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
  255 + but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
  256 + <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
  257 + <analyzer>
  258 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  259 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
  260 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
  261 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
  262 + <filter class="solr.LowerCaseFilterFactory"/>
  263 + <filter class="solr.SnowballPorterFilterFactory" language="Portuguese" protected="protwords.txt"/>
  264 + <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
  265 + possible with WordDelimiterFilter in conjuncton with stemming. -->
  266 + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  267 + </analyzer>
  268 + </fieldType>
  269 +
  270 +
  271 + <!-- A general unstemmed text field - good if one does not know the language of the field -->
  272 + <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
  273 + <analyzer type="index">
  274 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  275 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  276 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
  277 + <filter class="solr.LowerCaseFilterFactory"/>
  278 + </analyzer>
  279 + <analyzer type="query">
  280 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  281 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  282 + <filter class="solr.StopFilterFactory"
  283 + ignoreCase="true"
  284 + words="stopwords.txt"
  285 + enablePositionIncrements="true"
  286 + />
  287 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
  288 + <filter class="solr.LowerCaseFilterFactory"/>
  289 + </analyzer>
  290 + </fieldType>
  291 +
  292 +
  293 + <!-- A general unstemmed text field that indexes tokens normally and also
  294 + reversed (via ReversedWildcardFilterFactory), to enable more efficient
  295 + leading wildcard queries. -->
  296 + <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
  297 + <analyzer type="index">
  298 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  299 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  300 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
  301 + <filter class="solr.LowerCaseFilterFactory"/>
  302 + <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
  303 + maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
  304 + </analyzer>
  305 + <analyzer type="query">
  306 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  307 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  308 + <filter class="solr.StopFilterFactory"
  309 + ignoreCase="true"
  310 + words="stopwords.txt"
  311 + enablePositionIncrements="true"
  312 + />
  313 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
  314 + <filter class="solr.LowerCaseFilterFactory"/>
  315 + </analyzer>
  316 + </fieldType>
  317 +
  318 + <!-- charFilter + WhitespaceTokenizer -->
  319 + <!--
  320 + <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
  321 + <analyzer>
  322 + <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
  323 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  324 + </analyzer>
  325 + </fieldType>
  326 + -->
  327 +
  328 + <!-- This is an example of using the KeywordTokenizer along
  329 + With various TokenFilterFactories to produce a sortable field
  330 + that does not include some properties of the source text
  331 + -->
  332 + <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
  333 + <analyzer>
  334 + <!-- KeywordTokenizer does no actual tokenizing, so the entire
  335 + input string is preserved as a single token
  336 + -->
  337 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  338 + <!-- The LowerCase TokenFilter does what you expect, which can be
  339 + when you want your sorting to be case insensitive
  340 + -->
  341 + <filter class="solr.LowerCaseFilterFactory" />
  342 + <!-- The TrimFilter removes any leading or trailing whitespace -->
  343 + <filter class="solr.TrimFilterFactory" />
  344 + <!-- The PatternReplaceFilter gives you the flexibility to use
  345 + Java Regular expression to replace any sequence of characters
  346 + matching a pattern with an arbitrary replacement string,
  347 + which may include back references to portions of the original
  348 + string matched by the pattern.
  349 +
  350 + See the Java Regular Expression documentation for more
  351 + information on pattern and replacement string syntax.
  352 +
  353 + http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
  354 + -->
  355 + <filter class="solr.PatternReplaceFilterFactory"
  356 + pattern="([^a-z])" replacement="" replace="all"
  357 + />
  358 + </analyzer>
  359 + </fieldType>
  360 +
  361 + <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
  362 + <analyzer>
  363 + <tokenizer class="solr.StandardTokenizerFactory"/>
  364 + <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
  365 + </analyzer>
  366 + </fieldtype>
  367 +
  368 + <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
  369 + <analyzer>
  370 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  371 + <!--
  372 + The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
  373 + a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
  374 + Attributes of the DelimitedPayloadTokenFilterFactory :
  375 + "delimiter" - a one character delimiter. Default is | (pipe)
  376 + "encoder" - how to encode the following value into a playload
  377 + float -> org.apache.lucene.analysis.payloads.FloatEncoder,
  378 + integer -> o.a.l.a.p.IntegerEncoder
  379 + identity -> o.a.l.a.p.IdentityEncoder
  380 + Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
  381 + -->
  382 + <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
  383 + </analyzer>
  384 + </fieldtype>
  385 +
  386 + <!-- lowercases the entire field value, keeping it as a single token. -->
  387 + <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
  388 + <analyzer>
  389 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  390 + <filter class="solr.LowerCaseFilterFactory" />
  391 + </analyzer>
  392 + </fieldType>
  393 +
  394 +
  395 + <!-- since fields of this type are by default not stored or indexed,
  396 + any data added to them will be ignored outright. -->
  397 + <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
  398 +
  399 + </types>
  400 +
  401 +
  402 + <fields>
  403 + <!-- Valid attributes for fields:
  404 + name: mandatory - the name for the field
  405 + type: mandatory - the name of a previously defined type from the
  406 + <types> section
  407 + indexed: true if this field should be indexed (searchable or sortable)
  408 + stored: true if this field should be retrievable
  409 + compressed: [false] if this field should be stored using gzip compression
  410 + (this will only apply if the field type is compressable; among
  411 + the standard field types, only TextField and StrField are)
  412 + multiValued: true if this field may contain multiple values per document
  413 + omitNorms: (expert) set to true to omit the norms associated with
  414 + this field (this disables length normalization and index-time
  415 + boosting for the field, and saves some memory). Only full-text
  416 + fields or fields that need an index-time boost need norms.
  417 + termVectors: [false] set to true to store the term vector for a
  418 + given field.
  419 + When using MoreLikeThis, fields used for similarity should be
  420 + stored for best performance.
  421 + termPositions: Store position information with the term vector.
  422 + This will increase storage costs.
  423 + termOffsets: Store offset information with the term vector. This
  424 + will increase storage costs.
  425 + default: a value that should be used if no value is specified
  426 + when adding a document.
  427 + -->
  428 +
  429 + <!-- Base fields (all should be indexed and stored)-->
  430 + <field name="uid" type="string" indexed="true" stored="true" required="true" />
  431 + <field name="id" type="string" indexed="true" stored="true" required="true" />
  432 + <field name="type" type="string" indexed="true" stored="true" required="true" />
  433 + <field name="title" type="text" indexed="true" stored="true" required="true" />
  434 + <field name="description" type="text" indexed="true" stored="true" />
  435 + <field name="creator" type="string" indexed="true" stored="true" />
  436 + <field name="created" type="date" indexed="true" stored="true" />
  437 + <field name="modified" type="date" indexed="true" stored="true" />
  438 +
  439 + <!-- All next fields shoult NOT be stored -->
  440 + <field name="collaborator" type="textgen" indexed="true" stored="false" multiValued="true" />
  441 + <field name="name" type="string" indexed="true" stored="false" />
  442 + <field name="content" type="text" indexed="true" stored="false" />
  443 + <field name="comment" type="text" indexed="true" stored="false" multiValued="true" />
  444 + <field name="keyword" type="text" indexed="true" stored="false" multiValued="true" />
  445 + <field name="milestone" type="string" indexed="true" stored="false" />
  446 + <field name="priority" type="string" indexed="true" stored="false" />
  447 + <field name="component" type="string" indexed="true" stored="false" />
  448 + <field name="version" type="string" indexed="true" stored="false" />
  449 + <field name="severity" type="string" indexed="true" stored="false" />
  450 + <field name="reporter" type="textgen" indexed="true" stored="false" />
  451 + <field name="owner" type="textgen" indexed="true" stored="false" />
  452 + <field name="status" type="string" indexed="true" stored="false" />
  453 + <field name="subject" type="text" indexed="true" stored="false" />
  454 + <field name="revision" type="int" indexed="true" stored="false" />
  455 + <field name="mailinglist" type="textgen" indexed="true" stored="false" />
  456 +
  457 + <!-- catchall field, containing all other searchable text fields (implemented
  458 + via copyField further on in this schema -->
  459 + <field name="default" type="textgen" indexed="true" stored="false" multiValued="true"/>
  460 + </fields>
  461 +
  462 + <!-- Field to use to determine and enforce document uniqueness.
  463 + Unless this field is marked with required="false", it will be a required field
  464 + -->
  465 + <uniqueKey>uid</uniqueKey>
  466 +
  467 + <!-- field for the QueryParser to use when an explicit fieldname is absent -->
  468 + <defaultSearchField>default</defaultSearchField>
  469 +
  470 + <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
  471 + <solrQueryParser defaultOperator="OR"/>
  472 +
  473 + <!-- copyField commands copy one field to another at the time a document
  474 + is added to the index. It's used either to index the same field differently,
  475 + or to add multiple fields to the same field for easier/faster searching. -->
  476 +
  477 + <copyField source="content" dest="default" />
  478 + <copyField source="title" dest="default" />
  479 + <copyField source="description" dest="default" />
  480 + <copyField source="creator" dest="default" />
  481 + <copyField source="collaborator" dest="default" />
  482 + <copyField source="comment" dest="default" />
  483 + <copyField source="keyword" dest="default" />
  484 +
  485 +<!-- Similarity is the scoring routine for each document vs. a query.
  486 + A custom similarity may be specified here, but the default is fine
  487 + for most applications. -->
  488 + <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
  489 + <!-- ... OR ...
  490 + Specify a SimilarityFactory class name implementation
  491 + allowing parameters to be used.
  492 + -->
  493 + <!--
  494 + <similarity class="com.example.solr.CustomSimilarityFactory">
  495 + <str name="paramkey">param value</str>
  496 + </similarity>
  497 + -->
  498 +
  499 +</schema>
... ...
sorl-conf/solrconfig.xml 0 → 100644
... ... @@ -0,0 +1,1040 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +<!--
  19 + For more details about configurations options that may appear in this
  20 + file, see http://wiki.apache.org/solr/SolrConfigXml.
  21 +
  22 + Specifically, the Solr Config can support XInclude, which may make it easier to manage
  23 + the configuration. See https://issues.apache.org/jira/browse/SOLR-1167
  24 +-->
  25 +<config>
  26 + <!-- Set this to 'false' if you want solr to continue working after it has
  27 + encountered an severe configuration error. In a production environment,
  28 + you may want solr to keep working even if one handler is mis-configured.
  29 +
  30 + You may also set this to false using by setting the system property:
  31 + -Dsolr.abortOnConfigurationError=false
  32 + -->
  33 + <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
  34 +
  35 + <!-- lib directives can be used to instruct Solr to load an Jars identified
  36 + and use them to resolve any "plugins" specified in your solrconfig.xml or
  37 + schema.xml (ie: Analyzers, Request Handlers, etc...).
  38 +
  39 + All directories and paths are resolved relative the instanceDir.
  40 +
  41 + If a "./lib" directory exists in your instanceDir, all files found in it
  42 + are included as if you had used the following syntax...
  43 +
  44 + <lib dir="./lib" />
  45 + -->
  46 + <!-- A dir option by itself adds any files found in the directory to the
  47 + classpath, this is useful for including all jars in a directory.
  48 + -->
  49 + <!--lib dir="../../contrib/extraction/lib" /-->
  50 + <!-- When a regex is specified in addition to a directory, only the files in that
  51 + directory which completely match the regex (anchored on both ends)
  52 + will be included.
  53 + -->
  54 + <!--lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
  55 + <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" /-->
  56 + <!-- If a dir option (with or without a regex) is used and nothing is found
  57 + that matches, it will be ignored
  58 + -->
  59 + <!--lib dir="../../contrib/clustering/lib/downloads/" />
  60 + <lib dir="../../contrib/clustering/lib/" />
  61 + <lib dir="/total/crap/dir/ignored" /-->
  62 + <!-- an exact path can be used to specify a specific file. This will cause
  63 + a serious error to be logged if it can't be loaded.
  64 + <lib path="../a-jar-that-does-not-exist.jar" />
  65 + -->
  66 +
  67 +
  68 + <!-- Used to specify an alternate directory to hold all index data
  69 + other than the default ./data under the Solr home.
  70 + If replication is in use, this should match the replication configuration. -->
  71 + <dataDir>/var/lib/solr/data</dataDir>
  72 +
  73 +
  74 + <!-- WARNING: this <indexDefaults> section only provides defaults for index writers
  75 + in general. See also the <mainIndex> section after that when changing parameters
  76 + for Solr's main Lucene index. -->
  77 + <indexDefaults>
  78 + <!-- Values here affect all index writers and act as a default unless overridden. -->
  79 + <useCompoundFile>false</useCompoundFile>
  80 +
  81 + <mergeFactor>10</mergeFactor>
  82 + <!-- If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush
  83 + based on whichever limit is hit first. -->
  84 + <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
  85 +
  86 + <!-- Sets the amount of RAM that may be used by Lucene indexing
  87 + for buffering added documents and deletions before they are
  88 + flushed to the Directory. -->
  89 + <ramBufferSizeMB>32</ramBufferSizeMB>
  90 + <!-- <maxMergeDocs>2147483647</maxMergeDocs> -->
  91 + <maxFieldLength>10000</maxFieldLength>
  92 + <writeLockTimeout>1000</writeLockTimeout>
  93 + <commitLockTimeout>10000</commitLockTimeout>
  94 +
  95 + <!--
  96 + Expert: Turn on Lucene's auto commit capability. This causes intermediate
  97 + segment flushes to write a new lucene index descriptor, enabling it to be
  98 + opened by an external IndexReader. This can greatly slow down indexing
  99 + speed. NOTE: Despite the name, this value does not have any relation to
  100 + Solr's autoCommit functionality
  101 + -->
  102 + <!--<luceneAutoCommit>false</luceneAutoCommit>-->
  103 +
  104 + <!--
  105 + Expert: The Merge Policy in Lucene controls how merging is handled by
  106 + Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
  107 + versions used LogDocMergePolicy.
  108 +
  109 + LogByteSizeMergePolicy chooses segments to merge based on their size. The
  110 + Lucene 2.2 default, LogDocMergePolicy chose when to merge based on number
  111 + of documents
  112 +
  113 + Other implementations of MergePolicy must have a no-argument constructor
  114 + -->
  115 + <!--<mergePolicy class="org.apache.lucene.index.LogByteSizeMergePolicy"/>-->
  116 +
  117 + <!--
  118 + Expert:
  119 + The Merge Scheduler in Lucene controls how merges are performed. The
  120 + ConcurrentMergeScheduler (Lucene 2.3 default) can perform merges in the
  121 + background using separate threads. The SerialMergeScheduler (Lucene 2.2
  122 + default) does not.
  123 + -->
  124 + <!--<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>-->
  125 +
  126 +
  127 + <!--
  128 + This option specifies which Lucene LockFactory implementation to use.
  129 +
  130 + single = SingleInstanceLockFactory - suggested for a read-only index
  131 + or when there is no possibility of another process trying
  132 + to modify the index.
  133 + native = NativeFSLockFactory - uses OS native file locking
  134 + simple = SimpleFSLockFactory - uses a plain file for locking
  135 +
  136 + (For backwards compatibility with Solr 1.2, 'simple' is the default
  137 + if not specified.)
  138 + -->
  139 + <lockType>native</lockType>
  140 + <!--
  141 + Expert:
  142 + Controls how often Lucene loads terms into memory -->
  143 + <!--<termIndexInterval>256</termIndexInterval>-->
  144 + </indexDefaults>
  145 +
  146 + <mainIndex>
  147 + <!-- options specific to the main on-disk lucene index -->
  148 + <useCompoundFile>false</useCompoundFile>
  149 + <ramBufferSizeMB>32</ramBufferSizeMB>
  150 + <mergeFactor>10</mergeFactor>
  151 + <!-- Deprecated -->
  152 + <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
  153 + <!--<maxMergeDocs>2147483647</maxMergeDocs>-->
  154 +
  155 + <!-- inherit from indexDefaults <maxFieldLength>10000</maxFieldLength> -->
  156 +
  157 + <!-- If true, unlock any held write or commit locks on startup.
  158 + This defeats the locking mechanism that allows multiple
  159 + processes to safely access a lucene index, and should be
  160 + used with care.
  161 + This is not needed if lock type is 'none' or 'single'
  162 + -->
  163 + <unlockOnStartup>false</unlockOnStartup>
  164 +
  165 + <!-- If true, IndexReaders will be reopened (often more efficient) instead
  166 + of closed and then opened. -->
  167 + <reopenReaders>true</reopenReaders>
  168 +
  169 + <!--
  170 + Expert:
  171 + Controls how often Lucene loads terms into memory. Default is 128 and is likely good for most everyone. -->
  172 + <!--<termIndexInterval>256</termIndexInterval>-->
  173 +
  174 + <!--
  175 + Custom deletion policies can specified here. The class must
  176 + implement org.apache.lucene.index.IndexDeletionPolicy.
  177 +
  178 + http://lucene.apache.org/java/2_3_2/api/org/apache/lucene/index/IndexDeletionPolicy.html
  179 +
  180 + The standard Solr IndexDeletionPolicy implementation supports deleting
  181 + index commit points on number of commits, age of commit point and
  182 + optimized status.
  183 +
  184 + The latest commit point should always be preserved regardless
  185 + of the criteria.
  186 + -->
  187 + <deletionPolicy class="solr.SolrDeletionPolicy">
  188 + <!-- The number of commit points to be kept -->
  189 + <str name="maxCommitsToKeep">1</str>
  190 + <!-- The number of optimized commit points to be kept -->
  191 + <str name="maxOptimizedCommitsToKeep">0</str>
  192 + <!--
  193 + Delete all commit points once they have reached the given age.
  194 + Supports DateMathParser syntax e.g.
  195 +
  196 + <str name="maxCommitAge">30MINUTES</str>
  197 + <str name="maxCommitAge">1DAY</str>
  198 + -->
  199 + </deletionPolicy>
  200 +
  201 + <!-- To aid in advanced debugging, you may turn on IndexWriter debug logging.
  202 + Setting to true will set the file that the underlying Lucene IndexWriter
  203 + will write its debug infostream to. -->
  204 + <infoStream file="INFOSTREAM.txt">false</infoStream>
  205 +
  206 + </mainIndex>
  207 +
  208 + <!-- Enables JMX if and only if an existing MBeanServer is found, use this
  209 + if you want to configure JMX through JVM parameters. Remove this to disable
  210 + exposing Solr configuration and statistics to JMX.
  211 +
  212 + If you want to connect to a particular server, specify the agentId
  213 + e.g. <jmx agentId="myAgent" />
  214 +
  215 + If you want to start a new MBeanServer, specify the serviceUrl
  216 + e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
  217 +
  218 + For more details see http://wiki.apache.org/solr/SolrJmx
  219 + -->
  220 + <jmx />
  221 +
  222 + <!-- the default high-performance update handler -->
  223 + <updateHandler class="solr.DirectUpdateHandler2">
  224 + <!-- A prefix of "solr." for class names is an alias that
  225 + causes solr to search appropriate packages, including
  226 + org.apache.solr.(search|update|request|core|analysis)
  227 + -->
  228 +
  229 + <!-- Perform a <commit/> automatically under certain conditions:
  230 + maxDocs - number of updates since last commit is greater than this
  231 + maxTime - oldest uncommited update (in ms) is this long ago
  232 + Instead of enabling autoCommit, consider using "commitWithin"
  233 + when adding documents. http://wiki.apache.org/solr/UpdateXmlMessages
  234 + <autoCommit>
  235 + <maxDocs>10000</maxDocs>
  236 + <maxTime>1000</maxTime>
  237 + </autoCommit>
  238 + -->
  239 +
  240 +
  241 + <!-- The RunExecutableListener executes an external command from a
  242 + hook such as postCommit or postOptimize.
  243 + exe - the name of the executable to run
  244 + dir - dir to use as the current working directory. default="."
  245 + wait - the calling thread waits until the executable returns. default="true"
  246 + args - the arguments to pass to the program. default=nothing
  247 + env - environment variables to set. default=nothing
  248 + -->
  249 + <!-- A postCommit event is fired after every commit or optimize command
  250 + <listener event="postCommit" class="solr.RunExecutableListener">
  251 + <str name="exe">solr/bin/snapshooter</str>
  252 + <str name="dir">.</str>
  253 + <bool name="wait">true</bool>
  254 + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
  255 + <arr name="env"> <str>MYVAR=val1</str> </arr>
  256 + </listener>
  257 + -->
  258 + <!-- A postOptimize event is fired only after every optimize command
  259 + <listener event="postOptimize" class="solr.RunExecutableListener">
  260 + <str name="exe">snapshooter</str>
  261 + <str name="dir">solr/bin</str>
  262 + <bool name="wait">true</bool>
  263 + </listener>
  264 + -->
  265 +
  266 + </updateHandler>
  267 +
  268 + <!-- Use the following format to specify a custom IndexReaderFactory - allows for alternate
  269 + IndexReader implementations.
  270 +
  271 + ** Experimental Feature **
  272 + Please note - Using a custom IndexReaderFactory may prevent certain other features
  273 + from working. The API to IndexReaderFactory may change without warning or may even
  274 + be removed from future releases if the problems cannot be resolved.
  275 +
  276 + ** Features that may not work with custom IndexReaderFactory **
  277 + The ReplicationHandler assumes a disk-resident index. Using a custom
  278 + IndexReader implementation may cause incompatibility with ReplicationHandler and
  279 + may cause replication to not work correctly. See SOLR-1366 for details.
  280 +
  281 + <indexReaderFactory name="IndexReaderFactory" class="package.class">
  282 + Parameters as required by the implementation
  283 + </indexReaderFactory >
  284 + -->
  285 + <!-- To set the termInfosIndexDivisor, do this: -->
  286 + <!--<indexReaderFactory name="IndexReaderFactory" class="org.apache.solr.core.StandardIndexReaderFactory">
  287 + <int name="termInfosIndexDivisor">12</int>
  288 + </indexReaderFactory >-->
  289 +
  290 +
  291 + <query>
  292 + <!-- Maximum number of clauses in a boolean query... in the past, this affected
  293 + range or prefix queries that expanded to big boolean queries - built in Solr
  294 + query parsers no longer create queries with this limitation.
  295 + An exception is thrown if exceeded. -->
  296 + <maxBooleanClauses>1024</maxBooleanClauses>
  297 +
  298 +
  299 + <!-- There are two implementations of cache available for Solr,
  300 + LRUCache, based on a synchronized LinkedHashMap, and
  301 + FastLRUCache, based on a ConcurrentHashMap. FastLRUCache has faster gets
  302 + and slower puts in single threaded operation and thus is generally faster
  303 + than LRUCache when the hit ratio of the cache is high (> 75%), and may be
  304 + faster under other scenarios on multi-cpu systems. -->
  305 + <!-- Cache used by SolrIndexSearcher for filters (DocSets),
  306 + unordered sets of *all* documents that match a query.
  307 + When a new searcher is opened, its caches may be prepopulated
  308 + or "autowarmed" using data from caches in the old searcher.
  309 + autowarmCount is the number of items to prepopulate. For LRUCache,
  310 + the autowarmed items will be the most recently accessed items.
  311 + Parameters:
  312 + class - the SolrCache implementation LRUCache or FastLRUCache
  313 + size - the maximum number of entries in the cache
  314 + initialSize - the initial capacity (number of entries) of
  315 + the cache. (seel java.util.HashMap)
  316 + autowarmCount - the number of entries to prepopulate from
  317 + and old cache.
  318 + -->
  319 + <filterCache
  320 + class="solr.FastLRUCache"
  321 + size="512"
  322 + initialSize="512"
  323 + autowarmCount="0"/>
  324 +
  325 + <!-- Cache used to hold field values that are quickly accessible
  326 + by document id. The fieldValueCache is created by default
  327 + even if not configured here.
  328 + <fieldValueCache
  329 + class="solr.FastLRUCache"
  330 + size="512"
  331 + autowarmCount="128"
  332 + showItems="32"
  333 + />
  334 + -->
  335 +
  336 + <!-- queryResultCache caches results of searches - ordered lists of
  337 + document ids (DocList) based on a query, a sort, and the range
  338 + of documents requested. -->
  339 + <queryResultCache
  340 + class="solr.LRUCache"
  341 + size="512"
  342 + initialSize="512"
  343 + autowarmCount="0"/>
  344 +
  345 + <!-- documentCache caches Lucene Document objects (the stored fields for each document).
  346 + Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
  347 + <documentCache
  348 + class="solr.LRUCache"
  349 + size="512"
  350 + initialSize="512"
  351 + autowarmCount="0"/>
  352 +
  353 + <!-- If true, stored fields that are not requested will be loaded lazily.
  354 + This can result in a significant speed improvement if the usual case is to
  355 + not load all stored fields, especially if the skipped fields are large
  356 + compressed text fields.
  357 + -->
  358 + <enableLazyFieldLoading>true</enableLazyFieldLoading>
  359 +
  360 + <!-- Example of a generic cache. These caches may be accessed by name
  361 + through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
  362 + The purpose is to enable easy caching of user/application level data.
  363 + The regenerator argument should be specified as an implementation
  364 + of solr.search.CacheRegenerator if autowarming is desired. -->
  365 + <!--
  366 + <cache name="myUserCache"
  367 + class="solr.LRUCache"
  368 + size="4096"
  369 + initialSize="1024"
  370 + autowarmCount="1024"
  371 + regenerator="org.mycompany.mypackage.MyRegenerator"
  372 + />
  373 + -->
  374 +
  375 + <!-- An optimization that attempts to use a filter to satisfy a search.
  376 + If the requested sort does not include score, then the filterCache
  377 + will be checked for a filter matching the query. If found, the filter
  378 + will be used as the source of document ids, and then the sort will be
  379 + applied to that.
  380 + <useFilterForSortedQuery>true</useFilterForSortedQuery>
  381 + -->
  382 +
  383 + <!-- An optimization for use with the queryResultCache. When a search
  384 + is requested, a superset of the requested number of document ids
  385 + are collected. For example, if a search for a particular query
  386 + requests matching documents 10 through 19, and queryWindowSize is 50,
  387 + then documents 0 through 49 will be collected and cached. Any further
  388 + requests in that range can be satisfied via the cache. -->
  389 + <queryResultWindowSize>20</queryResultWindowSize>
  390 +
  391 + <!-- Maximum number of documents to cache for any entry in the
  392 + queryResultCache. -->
  393 + <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
  394 +
  395 + <!-- a newSearcher event is fired whenever a new searcher is being prepared
  396 + and there is a current searcher handling requests (aka registered).
  397 + It can be used to prime certain caches to prevent long request times for
  398 + certain requests.
  399 + -->
  400 + <!-- QuerySenderListener takes an array of NamedList and executes a
  401 + local query request for each NamedList in sequence. -->
  402 + <listener event="newSearcher" class="solr.QuerySenderListener">
  403 + <arr name="queries">
  404 + <!--
  405 + <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
  406 + <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
  407 + <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
  408 + -->
  409 + </arr>
  410 + </listener>
  411 +
  412 + <!-- a firstSearcher event is fired whenever a new searcher is being
  413 + prepared but there is no current registered searcher to handle
  414 + requests or to gain autowarming data from. -->
  415 + <listener event="firstSearcher" class="solr.QuerySenderListener">
  416 + <arr name="queries">
  417 + <lst> <str name="q">solr rocks</str><str name="start">0</str><str name="rows">10</str></lst>
  418 + <lst><str name="q">static firstSearcher warming query from solrconfig.xml</str></lst>
  419 + </arr>
  420 + </listener>
  421 +
  422 + <!-- If a search request comes in and there is no current registered searcher,
  423 + then immediately register the still warming searcher and use it. If
  424 + "false" then all requests will block until the first searcher is done
  425 + warming. -->
  426 + <useColdSearcher>false</useColdSearcher>
  427 +
  428 + <!-- Maximum number of searchers that may be warming in the background
  429 + concurrently. An error is returned if this limit is exceeded. Recommend
  430 + 1-2 for read-only slaves, higher for masters w/o cache warming. -->
  431 + <maxWarmingSearchers>2</maxWarmingSearchers>
  432 +
  433 + </query>
  434 +
  435 + <!--
  436 + Let the dispatch filter handler /select?qt=XXX
  437 + handleSelect=true will use consistent error handling for /select and /update
  438 + handleSelect=false will use solr1.1 style error formatting
  439 + -->
  440 + <requestDispatcher handleSelect="true" >
  441 + <!--Make sure your system has some authentication before enabling remote streaming! -->
  442 + <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" />
  443 +
  444 + <!-- Set HTTP caching related parameters (for proxy caches and clients).
  445 +
  446 + To get the behaviour of Solr 1.2 (ie: no caching related headers)
  447 + use the never304="true" option and do not specify a value for
  448 + <cacheControl>
  449 + -->
  450 + <!-- <httpCaching never304="true"> -->
  451 + <httpCaching lastModifiedFrom="openTime"
  452 + etagSeed="Solr">
  453 + <!-- lastModFrom="openTime" is the default, the Last-Modified value
  454 + (and validation against If-Modified-Since requests) will all be
  455 + relative to when the current Searcher was opened.
  456 + You can change it to lastModFrom="dirLastMod" if you want the
  457 + value to exactly corrispond to when the physical index was last
  458 + modified.
  459 +
  460 + etagSeed="..." is an option you can change to force the ETag
  461 + header (and validation against If-None-Match requests) to be
  462 + differnet even if the index has not changed (ie: when making
  463 + significant changes to your config file)
  464 +
  465 + lastModifiedFrom and etagSeed are both ignored if you use the
  466 + never304="true" option.
  467 + -->
  468 + <!-- If you include a <cacheControl> directive, it will be used to
  469 + generate a Cache-Control header, as well as an Expires header
  470 + if the value contains "max-age="
  471 +
  472 + By default, no Cache-Control header is generated.
  473 +
  474 + You can use the <cacheControl> option even if you have set
  475 + never304="true"
  476 + -->
  477 + <!-- <cacheControl>max-age=30, public</cacheControl> -->
  478 + </httpCaching>
  479 + </requestDispatcher>
  480 +
  481 +
  482 + <!-- requestHandler plugins... incoming queries will be dispatched to the
  483 + correct handler based on the path or the qt (query type) param.
  484 + Names starting with a '/' are accessed with the a path equal to the
  485 + registered name. Names without a leading '/' are accessed with:
  486 + http://host/app/select?qt=name
  487 + If no qt is defined, the requestHandler that declares default="true"
  488 + will be used.
  489 + -->
  490 + <requestHandler name="standard" class="solr.SearchHandler" default="true">
  491 + <!-- default values for query parameters -->
  492 + <lst name="defaults">
  493 + <str name="echoParams">explicit</str>
  494 + <!--
  495 + <int name="rows">10</int>
  496 + <str name="fl">*</str>
  497 + <str name="version">2.1</str>
  498 + -->
  499 + </lst>
  500 + </requestHandler>
  501 +
  502 +<!-- Please refer to http://wiki.apache.org/solr/SolrReplication for details on configuring replication -->
  503 +<!-- remove the <lst name="master"> section if this is just a slave -->
  504 +<!-- remove the <lst name="slave"> section if this is just a master -->
  505 +<!--
  506 +<requestHandler name="/replication" class="solr.ReplicationHandler" >
  507 + <lst name="master">
  508 + <str name="replicateAfter">commit</str>
  509 + <str name="replicateAfter">startup</str>
  510 + <str name="confFiles">schema.xml,stopwords.txt</str>
  511 + </lst>
  512 + <lst name="slave">
  513 + <str name="masterUrl">http://localhost:8983/solr/replication</str>
  514 + <str name="pollInterval">00:00:60</str>
  515 + </lst>
  516 +</requestHandler>-->
  517 +
  518 + <!-- DisMaxRequestHandler allows easy searching across multiple fields
  519 + for simple user-entered phrases. It's implementation is now
  520 + just the standard SearchHandler with a default query type
  521 + of "dismax".
  522 + see http://wiki.apache.org/solr/DisMaxRequestHandler
  523 + -->
  524 + <requestHandler name="dismax" class="solr.SearchHandler" >
  525 + <lst name="defaults">
  526 + <str name="defType">dismax</str>
  527 + <str name="echoParams">explicit</str>
  528 + <float name="tie">0.01</float>
  529 + <str name="qf">
  530 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  531 + </str>
  532 + <str name="pf">
  533 + text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9
  534 + </str>
  535 + <str name="bf">
  536 + popularity^0.5 recip(price,1,1000,1000)^0.3
  537 + </str>
  538 + <str name="fl">
  539 + id,name,price,score
  540 + </str>
  541 + <str name="mm">
  542 + 2&lt;-1 5&lt;-2 6&lt;90%
  543 + </str>
  544 + <int name="ps">100</int>
  545 + <str name="q.alt">*:*</str>
  546 + <!-- example highlighter config, enable per-query with hl=true -->
  547 + <str name="hl.fl">text features name</str>
  548 + <!-- for this field, we want no fragmenting, just highlighting -->
  549 + <str name="f.name.hl.fragsize">0</str>
  550 + <!-- instructs Solr to return the field itself if no query terms are
  551 + found -->
  552 + <str name="f.name.hl.alternateField">name</str>
  553 + <str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
  554 + </lst>
  555 + </requestHandler>
  556 +
  557 + <!-- Note how you can register the same handler multiple times with
  558 + different names (and different init parameters)
  559 + -->
  560 + <requestHandler name="partitioned" class="solr.SearchHandler" >
  561 + <lst name="defaults">
  562 + <str name="defType">dismax</str>
  563 + <str name="echoParams">explicit</str>
  564 + <str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
  565 + <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
  566 + <!-- This is an example of using Date Math to specify a constantly
  567 + moving date range in a config...
  568 + -->
  569 + <str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
  570 + </lst>
  571 + <!-- In addition to defaults, "appends" params can be specified
  572 + to identify values which should be appended to the list of
  573 + multi-val params from the query (or the existing "defaults").
  574 +
  575 + In this example, the param "fq=instock:true" will be appended to
  576 + any query time fq params the user may specify, as a mechanism for
  577 + partitioning the index, independent of any user selected filtering
  578 + that may also be desired (perhaps as a result of faceted searching).
  579 +
  580 + NOTE: there is *absolutely* nothing a client can do to prevent these
  581 + "appends" values from being used, so don't use this mechanism
  582 + unless you are sure you always want it.
  583 + -->
  584 + <lst name="appends">
  585 + <str name="fq">inStock:true</str>
  586 + </lst>
  587 + <!-- "invariants" are a way of letting the Solr maintainer lock down
  588 + the options available to Solr clients. Any params values
  589 + specified here are used regardless of what values may be specified
  590 + in either the query, the "defaults", or the "appends" params.
  591 +
  592 + In this example, the facet.field and facet.query params are fixed,
  593 + limiting the facets clients can use. Faceting is not turned on by
  594 + default - but if the client does specify facet=true in the request,
  595 + these are the only facets they will be able to see counts for;
  596 + regardless of what other facet.field or facet.query params they
  597 + may specify.
  598 +
  599 + NOTE: there is *absolutely* nothing a client can do to prevent these
  600 + "invariants" values from being used, so don't use this mechanism
  601 + unless you are sure you always want it.
  602 + -->
  603 + <lst name="invariants">
  604 + <str name="facet.field">cat</str>
  605 + <str name="facet.field">manu_exact</str>
  606 + <str name="facet.query">price:[* TO 500]</str>
  607 + <str name="facet.query">price:[500 TO *]</str>
  608 + </lst>
  609 + </requestHandler>
  610 +
  611 +
  612 + <!--
  613 + Search components are registered to SolrCore and used by Search Handlers
  614 +
  615 + By default, the following components are avaliable:
  616 +
  617 + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
  618 + <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
  619 + <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
  620 + <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
  621 + <searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" />
  622 + <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
  623 +
  624 + Default configuration in a requestHandler would look like:
  625 + <arr name="components">
  626 + <str>query</str>
  627 + <str>facet</str>
  628 + <str>mlt</str>
  629 + <str>highlight</str>
  630 + <str>stats</str>
  631 + <str>debug</str>
  632 + </arr>
  633 +
  634 + If you register a searchComponent to one of the standard names, that will be used instead.
  635 + To insert components before or after the 'standard' components, use:
  636 +
  637 + <arr name="first-components">
  638 + <str>myFirstComponentName</str>
  639 + </arr>
  640 +
  641 + <arr name="last-components">
  642 + <str>myLastComponentName</str>
  643 + </arr>
  644 + -->
  645 +
  646 + <!-- The spell check component can return a list of alternative spelling
  647 + suggestions. -->
  648 + <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
  649 +
  650 + <str name="queryAnalyzerFieldType">textSpell</str>
  651 +
  652 + <lst name="spellchecker">
  653 + <str name="name">default</str>
  654 + <str name="field">name</str>
  655 + <str name="spellcheckIndexDir">./spellchecker</str>
  656 + </lst>
  657 +
  658 + <!-- a spellchecker that uses a different distance measure
  659 + <lst name="spellchecker">
  660 + <str name="name">jarowinkler</str>
  661 + <str name="field">spell</str>
  662 + <str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
  663 + <str name="spellcheckIndexDir">./spellchecker2</str>
  664 + </lst>
  665 + -->
  666 +
  667 + <!-- a file based spell checker
  668 + <lst name="spellchecker">
  669 + <str name="classname">solr.FileBasedSpellChecker</str>
  670 + <str name="name">file</str>
  671 + <str name="sourceLocation">spellings.txt</str>
  672 + <str name="characterEncoding">UTF-8</str>
  673 + <str name="spellcheckIndexDir">./spellcheckerFile</str>
  674 + </lst>
  675 + -->
  676 + </searchComponent>
  677 +
  678 + <!-- A request handler utilizing the spellcheck component.
  679 + #############################################################################
  680 + NOTE: This is purely as an example. The whole purpose of the
  681 + SpellCheckComponent is to hook it into the request handler that handles (i.e.
  682 + the standard or dismax SearchHandler) queries such that a separate request is
  683 + not needed to get suggestions.
  684 +
  685 + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
  686 + WANT FOR YOUR PRODUCTION SYSTEM!
  687 + #############################################################################
  688 + -->
  689 + <requestHandler name="/spell" class="solr.SearchHandler" lazy="true">
  690 + <lst name="defaults">
  691 + <!-- omp = Only More Popular -->
  692 + <str name="spellcheck.onlyMorePopular">false</str>
  693 + <!-- exr = Extended Results -->
  694 + <str name="spellcheck.extendedResults">false</str>
  695 + <!-- The number of suggestions to return -->
  696 + <str name="spellcheck.count">1</str>
  697 + </lst>
  698 + <arr name="last-components">
  699 + <str>spellcheck</str>
  700 + </arr>
  701 + </requestHandler>
  702 +
  703 + <searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
  704 + <!-- A Req Handler for working with the tvComponent. This is purely as an example.
  705 + You will likely want to add the component to your already specified request handlers. -->
  706 + <requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
  707 + <lst name="defaults">
  708 + <bool name="tv">true</bool>
  709 + </lst>
  710 + <arr name="last-components">
  711 + <str>tvComponent</str>
  712 + </arr>
  713 + </requestHandler>
  714 +
  715 + <!-- Clustering Component
  716 + http://wiki.apache.org/solr/ClusteringComponent
  717 + This relies on third party jars which are not included in the release.
  718 + To use this component (and the "/clustering" handler)
  719 + Those jars will need to be downloaded, and you'll need to set the
  720 + solr.cluster.enabled system property when running solr...
  721 + java -Dsolr.clustering.enabled=true -jar start.jar
  722 + -->
  723 + <searchComponent
  724 + name="clusteringComponent"
  725 + enable="${solr.clustering.enabled:false}"
  726 + class="org.apache.solr.handler.clustering.ClusteringComponent" >
  727 + <!-- Declare an engine -->
  728 + <lst name="engine">
  729 + <!-- The name, only one can be named "default" -->
  730 + <str name="name">default</str>
  731 + <!--
  732 + Class name of Carrot2 clustering algorithm. Currently available algorithms are:
  733 +
  734 + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
  735 + * org.carrot2.clustering.stc.STCClusteringAlgorithm
  736 +
  737 + See http://project.carrot2.org/algorithms.html for the algorithm's characteristics.
  738 + -->
  739 + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
  740 + <!--
  741 + Overriding values for Carrot2 default algorithm attributes. For a description
  742 + of all available attributes, see: http://download.carrot2.org/stable/manual/#chapter.components.
  743 + Use attribute key as name attribute of str elements below. These can be further
  744 + overridden for individual requests by specifying attribute key as request
  745 + parameter name and attribute value as parameter value.
  746 + -->
  747 + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
  748 + </lst>
  749 + <lst name="engine">
  750 + <str name="name">stc</str>
  751 + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
  752 + </lst>
  753 + </searchComponent>
  754 + <requestHandler name="/clustering"
  755 + enable="${solr.clustering.enabled:false}"
  756 + class="solr.SearchHandler">
  757 + <lst name="defaults">
  758 + <bool name="clustering">true</bool>
  759 + <str name="clustering.engine">default</str>
  760 + <bool name="clustering.results">true</bool>
  761 + <!-- The title field -->
  762 + <str name="carrot.title">name</str>
  763 + <str name="carrot.url">id</str>
  764 + <!-- The field to cluster on -->
  765 + <str name="carrot.snippet">features</str>
  766 + <!-- produce summaries -->
  767 + <bool name="carrot.produceSummary">true</bool>
  768 + <!-- the maximum number of labels per cluster -->
  769 + <!--<int name="carrot.numDescriptions">5</int>-->
  770 + <!-- produce sub clusters -->
  771 + <bool name="carrot.outputSubClusters">false</bool>
  772 + </lst>
  773 + <arr name="last-components">
  774 + <str>clusteringComponent</str>
  775 + </arr>
  776 + </requestHandler>
  777 +
  778 + <!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
  779 + <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler" startup="lazy">
  780 + <lst name="defaults">
  781 + <!-- All the main content goes into "text"... if you need to return
  782 + the extracted text or do highlighting, use a stored field. -->
  783 + <str name="fmap.content">text</str>
  784 + <str name="lowernames">true</str>
  785 + <str name="uprefix">ignored_</str>
  786 +
  787 + <!-- capture link hrefs but ignore div attributes -->
  788 + <str name="captureAttr">true</str>
  789 + <str name="fmap.a">links</str>
  790 + <str name="fmap.div">ignored_</str>
  791 + </lst>
  792 + </requestHandler>
  793 +
  794 +
  795 + <!-- A component to return terms and document frequency of those terms.
  796 + This component does not yet support distributed search. -->
  797 + <searchComponent name="termsComponent" class="org.apache.solr.handler.component.TermsComponent"/>
  798 +
  799 + <requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
  800 + <lst name="defaults">
  801 + <bool name="terms">true</bool>
  802 + </lst>
  803 + <arr name="components">
  804 + <str>termsComponent</str>
  805 + </arr>
  806 + </requestHandler>
  807 +
  808 +
  809 + <!-- a search component that enables you to configure the top results for
  810 + a given query regardless of the normal lucene scoring.-->
  811 + <searchComponent name="elevator" class="solr.QueryElevationComponent" >
  812 + <!-- pick a fieldType to analyze queries -->
  813 + <str name="queryFieldType">string</str>
  814 + <str name="config-file">elevate.xml</str>
  815 + </searchComponent>
  816 +
  817 + <!-- a request handler utilizing the elevator component -->
  818 + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
  819 + <lst name="defaults">
  820 + <str name="echoParams">explicit</str>
  821 + </lst>
  822 + <arr name="last-components">
  823 + <str>elevator</str>
  824 + </arr>
  825 + </requestHandler>
  826 +
  827 +
  828 + <!-- Update request handler.
  829 +
  830 + Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
  831 + the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
  832 + The response format differs from solr1.1 formatting and returns a standard error code.
  833 + To enable solr1.1 behavior, remove the /update handler or change its path
  834 + -->
  835 + <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
  836 +
  837 +
  838 + <requestHandler name="/update/javabin" class="solr.BinaryUpdateRequestHandler" />
  839 +
  840 + <!--
  841 + Analysis request handler. Since Solr 1.3. Use to return how a document is analyzed. Useful
  842 + for debugging and as a token server for other types of applications.
  843 +
  844 + This is deprecated in favor of the improved DocumentAnalysisRequestHandler and FieldAnalysisRequestHandler
  845 +
  846 + <requestHandler name="/analysis" class="solr.AnalysisRequestHandler" />
  847 + -->
  848 +
  849 + <!--
  850 + An analysis handler that provides a breakdown of the analysis process of provided docuemnts. This handler expects a
  851 + (single) content stream with the following format:
  852 +
  853 + <docs>
  854 + <doc>
  855 + <field name="id">1</field>
  856 + <field name="name">The Name</field>
  857 + <field name="text">The Text Value</field>
  858 + <doc>
  859 + <doc>...</doc>
  860 + <doc>...</doc>
  861 + ...
  862 + </docs>
  863 +
  864 + Note: Each document must contain a field which serves as the unique key. This key is used in the returned
  865 + response to assoicate an analysis breakdown to the analyzed document.
  866 +
  867 + Like the FieldAnalysisRequestHandler, this handler also supports query analysis by
  868 + sending either an "analysis.query" or "q" request paraemter that holds the query text to be analyized. It also
  869 + supports the "analysis.showmatch" parameter which when set to true, all field tokens that match the query
  870 + tokens will be marked as a "match".
  871 + -->
  872 + <requestHandler name="/analysis/document" class="solr.DocumentAnalysisRequestHandler" />
  873 +
  874 + <!--
  875 + RequestHandler that provides much the same functionality as analysis.jsp. Provides the ability
  876 + to specify multiple field types and field names in the same request and outputs index-time and
  877 + query-time analysis for each of them.
  878 +
  879 + Request parameters are:
  880 + analysis.fieldname - The field name whose analyzers are to be used
  881 + analysis.fieldtype - The field type whose analyzers are to be used
  882 + analysis.fieldvalue - The text for index-time analysis
  883 + q (or analysis.q) - The text for query time analysis
  884 + analysis.showmatch (true|false) - When set to true and when query analysis is performed, the produced
  885 + tokens of the field value analysis will be marked as "matched" for every
  886 + token that is produces by the query analysis
  887 + -->
  888 + <requestHandler name="/analysis/field" class="solr.FieldAnalysisRequestHandler" />
  889 +
  890 +
  891 + <!-- CSV update handler, loaded on demand -->
  892 + <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
  893 +
  894 +
  895 + <!--
  896 + Admin Handlers - This will register all the standard admin RequestHandlers. Adding
  897 + this single handler is equivalent to registering:
  898 +
  899 + <requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler" />
  900 + <requestHandler name="/admin/system" class="org.apache.solr.handler.admin.SystemInfoHandler" />
  901 + <requestHandler name="/admin/plugins" class="org.apache.solr.handler.admin.PluginInfoHandler" />
  902 + <requestHandler name="/admin/threads" class="org.apache.solr.handler.admin.ThreadDumpHandler" />
  903 + <requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" />
  904 + <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
  905 +
  906 + If you wish to hide files under ${solr.home}/conf, explicitly register the ShowFileRequestHandler using:
  907 + <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
  908 + <lst name="invariants">
  909 + <str name="hidden">synonyms.txt</str>
  910 + <str name="hidden">anotherfile.txt</str>
  911 + </lst>
  912 + </requestHandler>
  913 + -->
  914 + <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
  915 +
  916 + <!-- ping/healthcheck -->
  917 + <requestHandler name="/admin/ping" class="PingRequestHandler">
  918 + <lst name="defaults">
  919 + <str name="qt">standard</str>
  920 + <str name="q">solrpingquery</str>
  921 + <str name="echoParams">all</str>
  922 + </lst>
  923 + </requestHandler>
  924 +
  925 + <!-- Echo the request contents back to the client -->
  926 + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
  927 + <lst name="defaults">
  928 + <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' -->
  929 + <str name="echoHandler">true</str>
  930 + </lst>
  931 + </requestHandler>
  932 +
  933 + <highlighting>
  934 + <!-- Configure the standard fragmenter -->
  935 + <!-- This could most likely be commented out in the "default" case -->
  936 + <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
  937 + <lst name="defaults">
  938 + <int name="hl.fragsize">100</int>
  939 + </lst>
  940 + </fragmenter>
  941 +
  942 + <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) -->
  943 + <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
  944 + <lst name="defaults">
  945 + <!-- slightly smaller fragsizes work better because of slop -->
  946 + <int name="hl.fragsize">70</int>
  947 + <!-- allow 50% slop on fragment sizes -->
  948 + <float name="hl.regex.slop">0.5</float>
  949 + <!-- a basic sentence pattern -->
  950 + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
  951 + </lst>
  952 + </fragmenter>
  953 +
  954 + <!-- Configure the standard formatter -->
  955 + <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
  956 + <lst name="defaults">
  957 + <str name="hl.simple.pre"><![CDATA[<em>]]></str>
  958 + <str name="hl.simple.post"><![CDATA[</em>]]></str>
  959 + </lst>
  960 + </formatter>
  961 + </highlighting>
  962 +
  963 + <!-- An example dedup update processor that creates the "id" field on the fly
  964 + based on the hash code of some other fields. This example has overwriteDupes
  965 + set to false since we are using the id field as the signatureField and Solr
  966 + will maintain uniqueness based on that anyway.
  967 +
  968 + You have to link the chain to an update handler above to use it ie:
  969 + <requestHandler name="/update "class="solr.XmlUpdateRequestHandler">
  970 + <lst name="defaults">
  971 + <str name="update.processor">dedupe</str>
  972 + </lst>
  973 + </requestHandler>
  974 + -->
  975 + <!--
  976 + <updateRequestProcessorChain name="dedupe">
  977 + <processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
  978 + <bool name="enabled">true</bool>
  979 + <str name="signatureField">id</str>
  980 + <bool name="overwriteDupes">false</bool>
  981 + <str name="fields">name,features,cat</str>
  982 + <str name="signatureClass">org.apache.solr.update.processor.Lookup3Signature</str>
  983 + </processor>
  984 + <processor class="solr.LogUpdateProcessorFactory" />
  985 + <processor class="solr.RunUpdateProcessorFactory" />
  986 + </updateRequestProcessorChain>
  987 + -->
  988 +
  989 +
  990 + <!-- queryResponseWriter plugins... query responses will be written using the
  991 + writer specified by the 'wt' request parameter matching the name of a registered
  992 + writer.
  993 + The "default" writer is the default and will be used if 'wt' is not specified
  994 + in the request. XMLResponseWriter will be used if nothing is specified here.
  995 + The json, python, and ruby writers are also available by default.
  996 +
  997 + <queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/>
  998 + <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
  999 + <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/>
  1000 + <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/>
  1001 + <queryResponseWriter name="php" class="org.apache.solr.request.PHPResponseWriter"/>
  1002 + <queryResponseWriter name="phps" class="org.apache.solr.request.PHPSerializedResponseWriter"/>
  1003 +
  1004 + <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>
  1005 + -->
  1006 +
  1007 + <!-- XSLT response writer transforms the XML output by any xslt file found
  1008 + in Solr's conf/xslt directory. Changes to xslt files are checked for
  1009 + every xsltCacheLifetimeSeconds.
  1010 + -->
  1011 + <queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
  1012 + <int name="xsltCacheLifetimeSeconds">5</int>
  1013 + </queryResponseWriter>
  1014 +
  1015 +
  1016 + <!-- example of registering a query parser
  1017 + <queryParser name="lucene" class="org.apache.solr.search.LuceneQParserPlugin"/>
  1018 + -->
  1019 +
  1020 + <!-- example of registering a custom function parser
  1021 + <valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" />
  1022 + -->
  1023 +
  1024 + <!-- config for the admin interface -->
  1025 + <admin>
  1026 + <defaultQuery>solr</defaultQuery>
  1027 +
  1028 + <!-- configure a healthcheck file for servers behind a loadbalancer
  1029 + <healthcheck type="file">server-enabled</healthcheck>
  1030 + -->
  1031 + </admin>
  1032 +
  1033 + <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
  1034 + <lst name="defaults">
  1035 + <str name="config">/etc/solr/conf/data-config.xml</str>
  1036 + </lst>
  1037 + </requestHandler>
  1038 +
  1039 +
  1040 +</config>
... ...