Commit dacfcb1a4e665089453e7b5c9de4ec844d57a351

Authored by Luan
1 parent 920d9998

Removing solr-conf folder

solr-conf/README
@@ -1,46 +0,0 @@ @@ -1,46 +0,0 @@
1 -Installation instructions for Ubuntu 10.04  
2 --------------------------------------------  
3 -  
4 -* Install Java, tomcat, JDBC Postgres drivers (Ubuntu partner repositories must be enabled):  
5 -sudo apt-get install sun-java6-bin tomcat6 libpg-java  
6 -  
7 -* Download Solr 3.3 and extract it:  
8 -wget http://ftp.unicamp.br/pub/apache/lucene/solr/3.3.0/apache-solr-3.3.0.tgz  
9 -tar xzf apache-solr-3.3.0.tgz  
10 -  
11 -* Create the directory /var/local/lib/solr/ and give the right permissions:  
12 -sudo mkdir -p /var/local/lib/solr/  
13 -sudo chown tomcat6:tomcat6 /var/local/lib/solr/  
14 -  
15 -* Copy the solr home example to /usr/local/share/:  
16 -sudo cp -R apache-solr-3.3.0/example/solr /usr/local/share/  
17 -  
18 -* Create a folder for libs in the solr home:  
19 -sudo mkdir /usr/local/share/solr/lib/  
20 -  
21 -* Copy Solr libs to libs folder:  
22 -sudo cp apache-solr-3.3.0/dist/*.jar /usr/local/share/solr/lib/  
23 -  
24 -* Copy Solr distribution to solr home:  
25 -sudo cp apache-solr-3.3.0/dist/apache-solr-3.3.0.war /usr/local/share/solr/  
26 -  
27 -* Link the JDBC Postgres drivers into the Solr installation:  
28 -sudo ln -s /usr/share/java/postgresql-jdbc3-8.4.jar /usr/local/share/solr/lib/  
29 -  
30 -* Link configurations to /etc  
31 -sudo ln -s /usr/local/share/solr/conf/ /etc/solr  
32 -  
33 -* Copy the configuration files from this folder into /etc/solr/  
34 -  
35 -* Link the solr-tomcat.xml file in the Tomcat configuration:  
36 -sudo ln -s /etc/solr/solr-tomcat.xml /etc/tomcat6/Catalina/localhost/solr.xml  
37 -  
38 -* Check data-config.xml to make sure all information to connect to the databases are right  
39 -  
40 -* Create a dataimport.properties on /etc/solr and give write access to tomcat6:  
41 -sudo touch /etc/solr/dataimport.properties  
42 -sudo chown tomcat6:tomcat6 /etc/solr/dataimport.properties  
43 -  
44 -* Restart tomcat:  
45 -sudo /etc/init.d/tomcat6 restart  
46 -  
solr-conf/data-config.xml
@@ -1,365 +0,0 @@ @@ -1,365 +0,0 @@
1 -<dataConfig>  
2 - <dataSource name="trac"  
3 - type="JdbcDataSource"  
4 - driver="org.postgresql.Driver"  
5 - url="jdbc:postgresql://bdinterlegis.interlegis.leg.br/trac_colab"  
6 - user="colab" />  
7 - <dataSource name="colab"  
8 - type="JdbcDataSource"  
9 - driver="org.postgresql.Driver"  
10 - url="jdbc:postgresql://bdinterlegis.interlegis.leg.br/colab"  
11 - user="colab" />  
12 -  
13 - <document>  
14 -  
15 - <entity name="wiki"  
16 - dataSource="trac"  
17 - transformer="TemplateTransformer,DateFormatTransformer"  
18 - query="SELECT  
19 - name,  
20 - TIMESTAMP WITH TIME ZONE 'epoch' + (max(time)/1000000) * INTERVAL '1s' AS modified,  
21 - max(version) AS version  
22 - FROM wiki GROUP BY name"  
23 - deltaQuery="  
24 - SELECT DISTINCT  
25 - name  
26 - FROM  
27 - wiki  
28 - WHERE  
29 - time > (EXTRACT(  
30 - epoch FROM TIMESTAMP WITH TIME ZONE '${dataimporter.wiki.last_index_time}'  
31 - ) * 1000000)"  
32 - deltaImportQuery="  
33 - SELECT  
34 - name,  
35 - max(version) AS version  
36 - FROM  
37 - wiki  
38 - WHERE  
39 - name = '${dataimporter.delta.id}'  
40 - GROUP BY name">  
41 -  
42 - <entity name="wiki_creation"  
43 - dataSource="trac"  
44 - query="SELECT  
45 - author AS Creator,  
46 - TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS created  
47 - FROM  
48 - wiki  
49 - WHERE  
50 - name = '${wiki.name}'  
51 - AND version = 1" />  
52 -  
53 - <entity name="wiki_modification"  
54 - dataSource="trac"  
55 - query="SELECT  
56 - author AS last_author,  
57 - TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS modified  
58 - FROM  
59 - wiki  
60 - WHERE  
61 - name = '${wiki.name}'  
62 - AND version = '${wiki.version}'" />  
63 -  
64 - <entity name="wiki_collaborators"  
65 - dataSource="trac"  
66 - query="SELECT DISTINCT  
67 - author AS collaborator  
68 - FROM  
69 - wiki  
70 - WHERE  
71 - name = '${wiki.name}'  
72 - AND author != ''" />  
73 -  
74 - <entity name="content"  
75 - dataSource="trac"  
76 - query="SELECT  
77 - text AS content  
78 - FROM  
79 - wiki  
80 - WHERE  
81 - name = '${wiki.name}'  
82 - AND version = '${wiki.version}'" />  
83 -  
84 - <field column="UID" template="WIKI_${wiki.name}" />  
85 - <field column="getId" template="${wiki.name}" />  
86 - <field column="Type" template="wiki" />  
87 - <field column="Title" template="${wiki.name}" />  
88 - <field column="created" name="created"  
89 - dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>  
90 - <field column="modified" name="modified"  
91 - dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>  
92 - <field column="path_string" template="/wiki/${wiki.name}" />  
93 - </entity>  
94 -  
95 - <entity name="ticket"  
96 - dataSource="trac"  
97 - transformer="TemplateTransformer,DateFormatTransformer"  
98 - pk="id"  
99 - deltaQuery="  
100 - SELECT  
101 - id  
102 - FROM  
103 - ticket  
104 - WHERE  
105 - time > (EXTRACT(  
106 - epoch FROM TIMESTAMP WITH TIME ZONE '${dataimporter.ticket.last_index_time}'  
107 - ) * 1000000)"  
108 - query="SELECT  
109 - id,  
110 - summary,  
111 - description AS Description,  
112 - milestone,  
113 - priority,  
114 - component,  
115 - version,  
116 - severity,  
117 - reporter,  
118 - owner,  
119 - status,  
120 - TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000)* INTERVAL '1s' AS created,  
121 - TIMESTAMP WITH TIME ZONE 'epoch' + (changetime/1000000) * INTERVAL '1s' AS modified  
122 - FROM  
123 - ticket">  
124 -  
125 - <entity name="ticket_collaborator"  
126 - dataSource="trac"  
127 - query="SELECT  
128 - reporter AS collaborator  
129 - FROM  
130 - ticket  
131 - WHERE  
132 - id = ${ticket.id}  
133 -  
134 - UNION  
135 -  
136 - SELECT  
137 - owner AS collaborator  
138 - FROM  
139 - ticket  
140 - WHERE  
141 - id = ${ticket.id}  
142 -  
143 - UNION  
144 -  
145 - SELECT DISTINCT  
146 - author AS collaborator  
147 - FROM  
148 - ticket_change  
149 - WHERE  
150 - ticket = ${ticket.id}" />  
151 -  
152 - <entity name="ticket_keywords"  
153 - dataSource="trac"  
154 - query="SELECT DISTINCT  
155 - REGEXP_SPLIT_TO_TABLE(keywords, ',|\\s') AS keyword  
156 - FROM  
157 - ticket  
158 - WHERE  
159 - id = ${ticket.id} AND  
160 - keywords != ''" />  
161 -  
162 - <entity name="ticket_modification"  
163 - dataSource="trac"  
164 - query="SELECT DISTINCT  
165 - author AS last_author  
166 - FROM  
167 - ticket_change  
168 - WHERE  
169 - ticket = ${ticket.id} AND  
170 - time = (SELECT max(time)  
171 - FROM ticket_change  
172 - WHERE ticket = ${ticket.id});" />  
173 -  
174 - <entity name="ticket_comments"  
175 - dataSource="trac"  
176 - query="SELECT  
177 - newvalue AS comment  
178 - FROM  
179 - ticket_change  
180 - WHERE  
181 - ticket = ${ticket.id}  
182 - AND field = 'comment'" />  
183 -  
184 - <field column="UID" template="TICKET_${ticket.id}" />  
185 - <field column="getId" template="${ticket.id}" />  
186 - <field column="Type" template="ticket" />  
187 - <field column="path_string" template="/ticket/${ticket.id}" />  
188 - <field column="Title"  
189 - template="#${ticket.id} (${ticket.status}) - ${ticket.summary}" />  
190 - <field column="Creator" template="${ticket.reporter}" />  
191 - <field column="created" name="created"  
192 - dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>  
193 - <field column="modified" name="modified"  
194 - dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>  
195 - </entity>  
196 -  
197 - <entity name="changeset"  
198 - dataSource="trac"  
199 - transformer="TemplateTransformer,DateFormatTransformer"  
200 - pk="rev"  
201 - deltaQuery="  
202 - SELECT  
203 - rev  
204 - FROM  
205 - revision  
206 - WHERE  
207 - time > (EXTRACT(  
208 - epoch FROM TIMESTAMP WITH TIME ZONE '${dataimporter.changeset.last_index_time}'  
209 - ) * 1000000)"  
210 -  
211 - query="SELECT  
212 - rev AS revision,  
213 - author AS Creator,  
214 - author AS collaborator,  
215 - repos.value AS repos_name,  
216 - TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS created,  
217 - TIMESTAMP WITH TIME ZONE 'epoch' + (time/1000000) * INTERVAL '1s' AS modified,  
218 - message  
219 - FROM  
220 - revision AS rev JOIN  
221 - repository AS repos  
222 - ON rev.repos = repos.id AND  
223 - repos.name = 'name' AND repos.value != ''">  
224 -  
225 - <field column="UID" template="CHANGESET_${changeset.revision}" />  
226 - <field column="getId" template="${changeset.revision}" />  
227 - <field column="Type" template="changeset" />  
228 - <field column="path_string"  
229 - template="/changeset/${changeset.revision}/${changeset.repos_name}"  
230 - />  
231 - <field column="Title"  
232 - template="[${changeset.revision}] - ${changeset.message}" />  
233 - <field column="created" name="created"  
234 - dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>  
235 - <field column="modified" name="modified"  
236 - dateTimeFormat="yyyy-MM-dd hh:mm:ss"/>  
237 - </entity>  
238 -  
239 - <entity name="thread"  
240 - dataSource="colab"  
241 - transformer="TemplateTransformer,DateFormatTransformer"  
242 - deltaQuery="  
243 - SELECT  
244 - thread_id AS id  
245 - FROM  
246 - super_archives_message  
247 - GROUP BY  
248 - thread_id  
249 - HAVING  
250 - max(received_time) > '${dataimporter.thread.last_index_time}'"  
251 - deltaImportQuery="SELECT  
252 - sam.thread_id AS id,  
253 - sat.subject_token AS name,  
254 - sat.latest_message_id,  
255 - saml.name AS mailinglist,  
256 - array_to_string(array_agg(sam.body), ' ') AS content  
257 - FROM  
258 - super_archives_message AS sam  
259 - JOIN super_archives_thread AS sat  
260 - ON sat.id = sam.thread_id  
261 - JOIN super_archives_mailinglist AS saml  
262 - ON sat.mailinglist_id = saml.id  
263 - WHERE  
264 - sat.id = '${dataimporter.delta.id}'  
265 - GROUP BY  
266 - sam.thread_id,  
267 - sat.subject_token,  
268 - sat.latest_message_id,  
269 - saml.name"  
270 -  
271 - query="SELECT  
272 - sam.thread_id AS id,  
273 - sat.subject_token AS name,  
274 - sat.latest_message_id,  
275 - saml.name AS mailinglist,  
276 - array_to_string(array_agg(sam.body), ' ') AS content  
277 - FROM  
278 - super_archives_message AS sam  
279 - JOIN super_archives_thread AS sat  
280 - ON sat.id = sam.thread_id  
281 - JOIN super_archives_mailinglist AS saml  
282 - ON sat.mailinglist_id = saml.id  
283 - WHERE  
284 - sat.spam IS NOT True  
285 - GROUP BY  
286 - sam.thread_id,  
287 - sat.subject_token,  
288 - sat.latest_message_id,  
289 - saml.name">  
290 -  
291 - <!--  
292 - Check about "DISTINCT ON" here:  
293 - http://archives.postgresql.org/pgsql-general/2002-06/msg01330.php  
294 - -->  
295 - <entity name="first_message"  
296 - dataSource="colab"  
297 - transformer="TemplateTransformer"  
298 - query="SELECT DISTINCT ON (sam.thread_id)  
299 - sam.received_time AS created,  
300 - sam.subject_clean AS subject,  
301 - saea.real_name AS creator_real_name,  
302 - saea.md5 AS creator_email_md5,  
303 - au.username AS Creator  
304 - FROM  
305 - super_archives_message AS sam  
306 - JOIN super_archives_emailaddress AS saea  
307 - ON sam.from_address_id = saea.id  
308 - LEFT JOIN auth_user AS au  
309 - ON au.id = saea.user_id  
310 - WHERE  
311 - sam.thread_id = ${thread.id}  
312 - ORDER BY  
313 - sam.thread_id,  
314 - sam.received_time">  
315 - <field column="Title" template="${first_message.subject}" />  
316 - <field column="creator_profile_uri"  
317 - template="/user/hash/${first_message.creator_email_md5}" />  
318 - </entity>  
319 -  
320 - <entity name="latest_message"  
321 - dataSource="colab"  
322 - query="SELECT  
323 - sam.body AS Description,  
324 - sam.received_time AS modified,  
325 - au.username AS last_author  
326 - FROM  
327 - super_archives_message AS sam  
328 - JOIN super_archives_emailaddress AS saea  
329 - ON sam.from_address_id = saea.id  
330 - LEFT JOIN auth_user AS au  
331 - ON au.id = saea.user_id  
332 - WHERE  
333 - sam.id = ${thread.latest_message_id}" />  
334 -  
335 - <entity name="thread_collaborators"  
336 - dataSource="colab"  
337 - query="SELECT DISTINCT  
338 - au.username AS collaborator  
339 - FROM  
340 - super_archives_message AS sam  
341 - JOIN super_archives_emailaddress AS saea  
342 - ON sam.from_address_id = saea.id  
343 - JOIN auth_user AS au  
344 - ON au.id = saea.user_id  
345 - WHERE  
346 - thread_id = ${thread.id}" />  
347 -  
348 - <field column="UID" template="THREAD_${thread.id}" />  
349 - <field column="getId" template="${thread.name}" />  
350 - <field column="Type" template="thread" />  
351 - <field column="path_string"  
352 - template="/archives/thread/${thread.mailinglist}/${thread.name}" />  
353 - <field column="created" name="created"  
354 - dateTimeFormat="yyyy-MM-dd hh:mm:ss" />  
355 - <field column="modified" name="modified"  
356 - dateTimeFormat="yyyy-MM-dd hh:mm:ss" />  
357 - </entity>  
358 - </document>  
359 -  
360 -</dataConfig>  
361 -  
362 -<!--  
363 -vim: ts=2 sw=2 ss=2 expandtab:  
364 --->  
365 -  
solr-conf/schema.xml
@@ -1,614 +0,0 @@ @@ -1,614 +0,0 @@
1 -<?xml version="1.0" encoding="UTF-8" ?>  
2 -<!--  
3 - Licensed to the Apache Software Foundation (ASF) under one or more  
4 - contributor license agreements. See the NOTICE file distributed with  
5 - this work for additional information regarding copyright ownership.  
6 - The ASF licenses this file to You under the Apache License, Version 2.0  
7 - (the "License"); you may not use this file except in compliance with  
8 - the License. You may obtain a copy of the License at  
9 -  
10 - http://www.apache.org/licenses/LICENSE-2.0  
11 -  
12 - Unless required by applicable law or agreed to in writing, software  
13 - distributed under the License is distributed on an "AS IS" BASIS,  
14 - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
15 - See the License for the specific language governing permissions and  
16 - limitations under the License.  
17 --->  
18 -  
19 -<!--  
20 - This is the Solr schema file. This file should be named "schema.xml" and  
21 - should be in the conf directory under the solr home  
22 - (i.e. ./solr/conf/schema.xml by default)  
23 - or located where the classloader for the Solr webapp can find it.  
24 -  
25 - This example schema is the recommended starting point for users.  
26 - It should be kept correct and concise, usable out-of-the-box.  
27 -  
28 - For more information, on how to customize this file, please see  
29 - http://wiki.apache.org/solr/SchemaXml  
30 --->  
31 -  
32 -<schema name="solr-instance" version="1.4">  
33 - <!-- attribute "name" is the name of this schema and is only used for display purposes.  
34 - Applications should change this to reflect the nature of the search collection.  
35 - version="1.4" is Solr's version number for the schema syntax and semantics. It should  
36 - not normally be changed by applications.  
37 - 1.0: multiValued attribute did not exist, all fields are multiValued by nature  
38 - 1.1: multiValued attribute introduced, false by default  
39 - 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.  
40 - 1.3: removed optional field compress feature  
41 - 1.4: default auto-phrase (QueryParser feature) to off  
42 - -->  
43 -  
44 - <types>  
45 - <!-- field type definitions. The "name" attribute is  
46 - just a label to be used by field definitions. The "class"  
47 - attribute and any other attributes determine the real  
48 - behavior of the fieldType.  
49 - Class names starting with "solr" refer to java classes in the  
50 - org.apache.solr.analysis package.  
51 - -->  
52 -  
53 - <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->  
54 - <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>  
55 -  
56 - <!-- boolean type: "true" or "false" -->  
57 - <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>  
58 - <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->  
59 - <fieldtype name="binary" class="solr.BinaryField"/>  
60 -  
61 - <!-- The optional sortMissingLast and sortMissingFirst attributes are  
62 - currently supported on types that are sorted internally as strings.  
63 - This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"  
64 - - If sortMissingLast="true", then a sort on this field will cause documents  
65 - without the field to come after documents with the field,  
66 - regardless of the requested sort order (asc or desc).  
67 - - If sortMissingFirst="true", then a sort on this field will cause documents  
68 - without the field to come before documents with the field,  
69 - regardless of the requested sort order.  
70 - - If sortMissingLast="false" and sortMissingFirst="false" (the default),  
71 - then default lucene sorting will be used which places docs without the  
72 - field first in an ascending sort and last in a descending sort.  
73 - -->  
74 -  
75 - <!--  
76 - Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.  
77 - -->  
78 - <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>  
79 - <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>  
80 - <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>  
81 - <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>  
82 -  
83 - <!-- BBB support for existing schemas based on collective.solr -->  
84 - <fieldType name="integer" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>  
85 -  
86 - <!--  
87 - Numeric field types that index each value at various levels of precision  
88 - to accelerate range queries when the number of values between the range  
89 - endpoints is large. See the javadoc for NumericRangeQuery for internal  
90 - implementation details.  
91 -  
92 - Smaller precisionStep values (specified in bits) will lead to more tokens  
93 - indexed per value, slightly larger index size, and faster range queries.  
94 - A precisionStep of 0 disables indexing at different precision levels.  
95 - -->  
96 - <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>  
97 - <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>  
98 - <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>  
99 - <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>  
100 -  
101 - <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and  
102 - is a more restricted form of the canonical representation of dateTime  
103 - http://www.w3.org/TR/xmlschema-2/#dateTime  
104 - The trailing "Z" designates UTC time and is mandatory.  
105 - Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z  
106 - All other components are mandatory.  
107 -  
108 - Expressions can also be used to denote calculations that should be  
109 - performed relative to "NOW" to determine the value, ie...  
110 -  
111 - NOW/HOUR  
112 - ... Round to the start of the current hour  
113 - NOW-1DAY  
114 - ... Exactly 1 day prior to now  
115 - NOW/DAY+6MONTHS+3DAYS  
116 - ... 6 months and 3 days in the future from the start of  
117 - the current day  
118 -  
119 - Consult the DateField javadocs for more information.  
120 -  
121 - Note: For faster range queries, consider the tdate type  
122 - -->  
123 - <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>  
124 -  
125 - <!-- A Trie based date field for faster date range queries and date faceting. -->  
126 - <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>  
127 -  
128 - <!-- The "RandomSortField" is not used to store or search any  
129 - data. You can declare fields of this type it in your schema  
130 - to generate pseudo-random orderings of your docs for sorting  
131 - purposes. The ordering is generated based on the field name  
132 - and the version of the index, As long as the index version  
133 - remains unchanged, and the same field name is reused,  
134 - the ordering of the docs will be consistent.  
135 - If you want different psuedo-random orderings of documents,  
136 - for the same version of the index, use a dynamicField and  
137 - change the name  
138 - -->  
139 - <fieldType name="random" class="solr.RandomSortField" indexed="true" />  
140 -  
141 - <!-- solr.TextField allows the specification of custom text analyzers  
142 - specified as a tokenizer and a list of token filters. Different  
143 - analyzers may be specified for indexing and querying.  
144 -  
145 - The optional positionIncrementGap puts space between multiple fields of  
146 - this type on the same document, with the purpose of preventing false phrase  
147 - matching across fields.  
148 -  
149 - For more info on customizing your analyzer chain, please see  
150 - http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters  
151 - -->  
152 -  
153 - <!-- One can also specify an existing Analyzer class that has a  
154 - default constructor via the class attribute on the analyzer element  
155 - <fieldType name="text_greek" class="solr.TextField">  
156 - <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>  
157 - </fieldType>  
158 - -->  
159 -  
160 - <fieldType name="text_ptbr" class="solr.TextField">  
161 - <analyzer class="org.apache.lucene.analysis.br.BrazilianAnalyzer">  
162 - <filter class="solr.LowerCaseFilterFactory"/>  
163 - <filter class="solr.BrazilianStemFilterFactory"/>  
164 - </analyzer>  
165 - </fieldType>  
166 -  
167 - <!-- A text field that only splits on whitespace for exact matching of words -->  
168 - <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">  
169 - <analyzer>  
170 - <tokenizer class="solr.WhitespaceTokenizerFactory"/>  
171 - </analyzer>  
172 - </fieldType>  
173 -  
174 - <!-- A general text field that has reasonable, generic  
175 - cross-language defaults: it tokenizes with StandardTokenizer,  
176 - removes stop words from case-insensitive "stopwords.txt"  
177 - (empty by default), and down cases. At query time only, it  
178 - also applies synonyms. -->  
179 - <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">  
180 - <analyzer type="index">  
181 - <tokenizer class="solr.StandardTokenizerFactory"/>  
182 - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />  
183 - <!-- in this example, we will only use synonyms at query time  
184 - <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>  
185 - -->  
186 - <filter class="solr.LowerCaseFilterFactory"/>  
187 - </analyzer>  
188 - <analyzer type="query">  
189 - <tokenizer class="solr.StandardTokenizerFactory"/>  
190 - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />  
191 - <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>  
192 - <filter class="solr.LowerCaseFilterFactory"/>  
193 - </analyzer>  
194 - </fieldType>  
195 -  
196 - <!-- A text field with defaults appropriate for English: it  
197 - tokenizes with StandardTokenizer, removes English stop words  
198 - (stopwords_en.txt), down cases, protects words from protwords.txt, and  
199 - finally applies Porter's stemming. The query time analyzer  
200 - also applies synonyms from synonyms.txt. -->  
201 - <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">  
202 - <analyzer type="index">  
203 - <tokenizer class="solr.StandardTokenizerFactory"/>  
204 - <!-- in this example, we will only use synonyms at query time  
205 - <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>  
206 - -->  
207 - <!-- Case insensitive stop word removal.  
208 - add enablePositionIncrements=true in both the index and query  
209 - analyzers to leave a 'gap' for more accurate phrase queries.  
210 - -->  
211 - <filter class="solr.StopFilterFactory"  
212 - ignoreCase="true"  
213 - words="stopwords_en.txt"  
214 - enablePositionIncrements="true"  
215 - />  
216 - <filter class="solr.LowerCaseFilterFactory"/>  
217 - <filter class="solr.EnglishPossessiveFilterFactory"/>  
218 - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>  
219 - <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:  
220 - <filter class="solr.EnglishMinimalStemFilterFactory"/>  
221 - -->  
222 - <filter class="solr.PorterStemFilterFactory"/>  
223 - </analyzer>  
224 - <analyzer type="query">  
225 - <tokenizer class="solr.StandardTokenizerFactory"/>  
226 - <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>  
227 - <filter class="solr.StopFilterFactory"  
228 - ignoreCase="true"  
229 - words="stopwords_en.txt"  
230 - enablePositionIncrements="true"  
231 - />  
232 - <filter class="solr.LowerCaseFilterFactory"/>  
233 - <filter class="solr.EnglishPossessiveFilterFactory"/>  
234 - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>  
235 - <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:  
236 - <filter class="solr.EnglishMinimalStemFilterFactory"/>  
237 - -->  
238 - <filter class="solr.PorterStemFilterFactory"/>  
239 - </analyzer>  
240 - </fieldType>  
241 -  
242 - <!-- A text field with defaults appropriate for English, plus  
243 - aggressive word-splitting and autophrase features enabled.  
244 - This field is just like text_en, except it adds  
245 - WordDelimiterFilter to enable splitting and matching of  
246 - words on case-change, alpha numeric boundaries, and  
247 - non-alphanumeric chars. This means certain compound word  
248 - cases will work, for example query "wi fi" will match  
249 - document "WiFi" or "wi-fi". However, other cases will still  
250 - not match, for example if the query is "wifi" and the  
251 - document is "wi fi" or if the query is "wi-fi" and the  
252 - document is "wifi".  
253 - -->  
254 - <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">  
255 - <analyzer type="index">  
256 - <tokenizer class="solr.WhitespaceTokenizerFactory"/>  
257 - <!-- in this example, we will only use synonyms at query time  
258 - <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>  
259 - -->  
260 - <!-- Case insensitive stop word removal.  
261 - add enablePositionIncrements=true in both the index and query  
262 - analyzers to leave a 'gap' for more accurate phrase queries.  
263 - -->  
264 - <filter class="solr.StopFilterFactory"  
265 - ignoreCase="true"  
266 - words="stopwords_en.txt"  
267 - enablePositionIncrements="true"  
268 - />  
269 - <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>  
270 - <filter class="solr.LowerCaseFilterFactory"/>  
271 - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>  
272 - <filter class="solr.PorterStemFilterFactory"/>  
273 - </analyzer>  
274 - <analyzer type="query">  
275 - <tokenizer class="solr.WhitespaceTokenizerFactory"/>  
276 - <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>  
277 - <filter class="solr.StopFilterFactory"  
278 - ignoreCase="true"  
279 - words="stopwords_en.txt"  
280 - enablePositionIncrements="true"  
281 - />  
282 - <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>  
283 - <filter class="solr.LowerCaseFilterFactory"/>  
284 - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>  
285 - <filter class="solr.PorterStemFilterFactory"/>  
286 - </analyzer>  
287 - </fieldType>  
288 -  
289 - <!-- Less flexible matching, but less false matches. Probably not ideal for product names,  
290 - but may be good for SKUs. Can insert dashes in the wrong place and still match. -->  
291 - <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">  
292 - <analyzer>  
293 - <tokenizer class="solr.WhitespaceTokenizerFactory"/>  
294 - <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>  
295 - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>  
296 - <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>  
297 - <filter class="solr.LowerCaseFilterFactory"/>  
298 - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>  
299 - <filter class="solr.EnglishMinimalStemFilterFactory"/>  
300 - <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes  
301 - possible with WordDelimiterFilter in conjuncton with stemming. -->  
302 - <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>  
303 - </analyzer>  
304 - </fieldType>  
305 -  
306 - <!-- Just like text_general except it reverses the characters of  
307 - each token, to enable more efficient leading wildcard queries. -->  
308 - <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">  
309 - <analyzer type="index">  
310 - <tokenizer class="solr.StandardTokenizerFactory"/>  
311 - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />  
312 - <filter class="solr.LowerCaseFilterFactory"/>  
313 - <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"  
314 - maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>  
315 - </analyzer>  
316 - <analyzer type="query">  
317 - <tokenizer class="solr.StandardTokenizerFactory"/>  
318 - <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>  
319 - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />  
320 - <filter class="solr.LowerCaseFilterFactory"/>  
321 - </analyzer>  
322 - </fieldType>  
323 -  
324 - <!-- charFilter + WhitespaceTokenizer -->  
325 - <!--  
326 - <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >  
327 - <analyzer>  
328 - <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>  
329 - <tokenizer class="solr.WhitespaceTokenizerFactory"/>  
330 - </analyzer>  
331 - </fieldType>  
332 - -->  
333 -  
334 - <!-- This is an example of using the KeywordTokenizer along  
335 - With various TokenFilterFactories to produce a sortable field  
336 - that does not include some properties of the source text  
337 - -->  
338 - <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">  
339 - <analyzer>  
340 - <!-- KeywordTokenizer does no actual tokenizing, so the entire  
341 - input string is preserved as a single token  
342 - -->  
343 - <tokenizer class="solr.KeywordTokenizerFactory"/>  
344 - <!-- The LowerCase TokenFilter does what you expect, which can be  
345 - when you want your sorting to be case insensitive  
346 - -->  
347 - <filter class="solr.LowerCaseFilterFactory" />  
348 - <!-- The TrimFilter removes any leading or trailing whitespace -->  
349 - <filter class="solr.TrimFilterFactory" />  
350 - <!-- The PatternReplaceFilter gives you the flexibility to use  
351 - Java Regular expression to replace any sequence of characters  
352 - matching a pattern with an arbitrary replacement string,  
353 - which may include back references to portions of the original  
354 - string matched by the pattern.  
355 -  
356 - See the Java Regular Expression documentation for more  
357 - information on pattern and replacement string syntax.  
358 -  
359 - http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html  
360 - -->  
361 - <filter class="solr.PatternReplaceFilterFactory"  
362 - pattern="([^a-z])" replacement="" replace="all"  
363 - />  
364 - </analyzer>  
365 - </fieldType>  
366 -  
367 - <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >  
368 - <analyzer>  
369 - <tokenizer class="solr.StandardTokenizerFactory"/>  
370 - <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>  
371 - </analyzer>  
372 - </fieldtype>  
373 -  
374 - <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >  
375 - <analyzer>  
376 - <tokenizer class="solr.WhitespaceTokenizerFactory"/>  
377 - <!--  
378 - The DelimitedPayloadTokenFilter can put payloads on tokens... for example,  
379 - a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f  
380 - Attributes of the DelimitedPayloadTokenFilterFactory :  
381 - "delimiter" - a one character delimiter. Default is | (pipe)  
382 - "encoder" - how to encode the following value into a playload  
383 - float -> org.apache.lucene.analysis.payloads.FloatEncoder,  
384 - integer -> o.a.l.a.p.IntegerEncoder  
385 - identity -> o.a.l.a.p.IdentityEncoder  
386 - Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.  
387 - -->  
388 - <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>  
389 - </analyzer>  
390 - </fieldtype>  
391 -  
392 - <!-- lowercases the entire field value, keeping it as a single token. -->  
393 - <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">  
394 - <analyzer>  
395 - <tokenizer class="solr.KeywordTokenizerFactory"/>  
396 - <filter class="solr.LowerCaseFilterFactory" />  
397 - </analyzer>  
398 - </fieldType>  
399 -  
400 - <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">  
401 - <analyzer>  
402 - <tokenizer class="solr.PathHierarchyTokenizerFactory"/>  
403 - </analyzer>  
404 - </fieldType>  
405 -  
406 - <!-- since fields of this type are by default not stored or indexed,  
407 - any data added to them will be ignored outright. -->  
408 - <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />  
409 -  
410 - <!-- This point type indexes the coordinates as separate fields (subFields)  
411 - If subFieldType is defined, it references a type, and a dynamic field  
412 - definition is created matching *___<typename>. Alternately, if  
413 - subFieldSuffix is defined, that is used to create the subFields.  
414 - Example: if subFieldType="double", then the coordinates would be  
415 - indexed in fields myloc_0___double,myloc_1___double.  
416 - Example: if subFieldSuffix="_d" then the coordinates would be indexed  
417 - in fields myloc_0_d,myloc_1_d  
418 - The subFields are an implementation detail of the fieldType, and end  
419 - users normally should not need to know about them.  
420 - -->  
421 - <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>  
422 -  
423 - <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->  
424 - <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>  
425 -  
426 - <!--  
427 - A Geohash is a compact representation of a latitude longitude pair in a single field.  
428 - See http://wiki.apache.org/solr/SpatialSearch  
429 - -->  
430 - <fieldtype name="geohash" class="solr.GeoHashField"/>  
431 -  
432 -  
433 - </types>  
434 -  
435 -  
436 - <fields>  
437 - <!-- Valid attributes for fields:  
438 - name: mandatory - the name for the field  
439 - type: mandatory - the name of a previously defined type from the  
440 - <types> section  
441 - indexed: true if this field should be indexed (searchable or sortable)  
442 - stored: true if this field should be retrievable  
443 - multiValued: true if this field may contain multiple values per document  
444 - omitNorms: (expert) set to true to omit the norms associated with  
445 - this field (this disables length normalization and index-time  
446 - boosting for the field, and saves some memory). Only full-text  
447 - fields or fields that need an index-time boost need norms.  
448 - termVectors: [false] set to true to store the term vector for a  
449 - given field.  
450 - When using MoreLikeThis, fields used for similarity should be  
451 - stored for best performance.  
452 - termPositions: Store position information with the term vector.  
453 - This will increase storage costs.  
454 - termOffsets: Store offset information with the term vector. This  
455 - will increase storage costs.  
456 - default: a value that should be used if no value is specified  
457 - when adding a document.  
458 - -->  
459 -  
460 - <!-- Base fields (all should be indexed and stored)-->  
461 - <field name="UID" type="string" indexed="true"  
462 - stored="true" required="true" multiValued="false"/>  
463 - <field name="getId" type="string" indexed="false"  
464 - stored="true" required="true" multiValued="false"/>  
465 - <field name="Type" type="string" indexed="true"  
466 - stored="true" required="true" multiValued="false"/>  
467 - <field name="Title" type="text_ptbr" indexed="true"  
468 - stored="true" required="true" multiValued="false"/>  
469 - <field name="Description" type="text_ptbr" indexed="true"  
470 - stored="true" required="false" multiValued="false"/>  
471 - <field name="Creator" type="string" indexed="true"  
472 - stored="true" required="false" multiValued="false"/>  
473 - <field name="last_author" type="string" indexed="true"  
474 - stored="true" required="false" multiValued="false"/>  
475 - <field name="created" type="date" indexed="true"  
476 - stored="true" required="false" multiValued="false"/>  
477 - <field name="modified" type="date" indexed="true"  
478 - stored="true" required="false" multiValued="false"/>  
479 - <field name="mailinglist" type="string" indexed="true"  
480 - stored="true" required="false" multiValued="false"/>  
481 - <field name="creator_real_name" type="string" indexed="true"  
482 - stored="true" required="false" multiValued="false"/>  
483 - <field name="creator_profile_uri" type="string" indexed="true"  
484 - stored="true" required="false" multiValued="false"/>  
485 -  
486 - <!-- All next fields shoult NOT be stored -->  
487 - <field name="name" type="string" indexed="true"  
488 - stored="false" required="false" multiValued="false"/>  
489 - <field name="comment" type="text_ptbr" indexed="true"  
490 - stored="false" required="false" multiValued="true" />  
491 - <field name="content" type="text_ptbr" indexed="true"  
492 - stored="false" required="false" multiValued="false"/>  
493 - <field name="keyword" type="text_ptbr" indexed="true"  
494 - stored="false" required="false" multiValued="true"/>  
495 - <field name="milestone" type="string" indexed="true"  
496 - stored="false" required="false" multiValued="false"/>  
497 - <field name="priority" type="string" indexed="true"  
498 - stored="false" required="false" multiValued="false"/>  
499 - <field name="component" type="string" indexed="true"  
500 - stored="false" required="false" multiValued="false"/>  
501 - <field name="version" type="string" indexed="true"  
502 - stored="false" required="false" multiValued="false"/>  
503 - <field name="severity" type="string" indexed="true"  
504 - stored="false" required="false" multiValued="false"/>  
505 - <field name="reporter" type="string" indexed="true"  
506 - stored="false" required="false" multiValued="false"/>  
507 - <field name="owner" type="string" indexed="true"  
508 - stored="false" required="false" multiValued="false"/>  
509 - <field name="status" type="string" indexed="true"  
510 - stored="false" required="false" multiValued="false"/>  
511 - <field name="revision" type="int" indexed="true"  
512 - stored="false" required="false" multiValued="false"/>  
513 - <field name="subject" type="text_ptbr" indexed="true"  
514 - stored="false" required="false" multiValued="false"/>  
515 - <field name="path_string" type="string" indexed="false"  
516 - stored="true" required="false" multiValued="false"/>  
517 -  
518 - <!-- Plone only fields -->  
519 - <field name="review_state" type="string" indexed="true"  
520 - stored="true" required="false" multiValued="false"/>  
521 - <field name="effective" type="date" indexed="true"  
522 - stored="true" required="false" multiValued="false"/>  
523 - <field name="expires" type="date" indexed="true"  
524 - stored="true" required="false" multiValued="false"/>  
525 - <field name="getIcon" type="string" indexed="false"  
526 - stored="true" required="false" multiValued="false"/>  
527 - <field name="getRemoteUrl" type="string" indexed="false"  
528 - stored="true" required="false" multiValued="false"/>  
529 - <field name="exclude_from_nav" type="boolean" indexed="false"  
530 - stored="true" required="false" multiValued="false"/>  
531 - <field name="portal_type" type="string" indexed="true"  
532 - stored="true" required="false" multiValued="false"/>  
533 - <field name="is_folderish" type="boolean" indexed="true"  
534 - stored="true" required="false" multiValued="false"/>  
535 - <field name="Language" type="string" indexed="true"  
536 - stored="true" required="false" multiValued="false"/>  
537 - <field name="Date" type="date" indexed="true"  
538 - stored="true" required="false" multiValued="false"/>  
539 - <field name="allowedRolesAndUsers" type="string" indexed="true"  
540 - stored="false" required="false" multiValued="true"/>  
541 - <field name="object_provides" type="string" indexed="true"  
542 - stored="false" required="false" multiValued="true"/>  
543 - <field name="path_depth" type="integer" indexed="true"  
544 - stored="false" required="false" multiValued="false"/>  
545 - <field name="path_parents" type="string" indexed="true"  
546 - stored="false" required="false" multiValued="true"/>  
547 - <field name="SearchableText" type="text_general" indexed="true"  
548 - stored="false" required="false" multiValued="false"/>  
549 - <field name="searchwords" type="string" indexed="true"  
550 - stored="false" required="false" multiValued="true"/>  
551 - <field name="showinsearch" type="boolean" indexed="true"  
552 - stored="false" required="false" multiValued="false"/>  
553 -  
554 - <dynamicField name="tika_*" type="ignored" />  
555 -  
556 - <field name="collaborator" type="string" indexed="true"  
557 - stored="false" required="false" multiValued="true" />  
558 -  
559 - <!-- catchall field, containing all other searchable text fields (implemented  
560 - via copyField further on in this schema -->  
561 - <field name="default" type="text_general" indexed="true"  
562 - stored="false" required="false" multiValued="true"/>  
563 - </fields>  
564 -  
565 - <!-- Field to use to determine and enforce document uniqueness.  
566 - Unless this field is marked with required="false", it will be a required field  
567 - -->  
568 - <uniqueKey>UID</uniqueKey>  
569 -  
570 - <!-- field for the QueryParser to use when an explicit fieldname is absent -->  
571 - <defaultSearchField>default</defaultSearchField>  
572 -  
573 - <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->  
574 - <solrQueryParser defaultOperator="OR"/>  
575 -  
576 - <!-- copyField commands copy one field to another at the time a document  
577 - is added to the index. It's used either to index the same field differently,  
578 - or to add multiple fields to the same field for easier/faster searching. -->  
579 - <copyField source="Description" dest="default"/>  
580 - <copyField source="SearchableText" dest="default"/>  
581 - <copyField source="Title" dest="default"/>  
582 - <copyField source="content" dest="default" />  
583 - <copyField source="Creator" dest="default" />  
584 - <copyField source="collaborator" dest="default" />  
585 - <copyField source="comment" dest="default" />  
586 - <copyField source="keyword" dest="default" />  
587 -  
588 - <!-- Above, multiple source fields are copied to the [text] field.  
589 - Another way to map multiple source fields to the same  
590 - destination field is to use the dynamic field syntax.  
591 - copyField also supports a maxChars to copy setting. -->  
592 -  
593 - <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->  
594 -  
595 - <!-- copy name to alphaNameSort, a field designed for sorting by name -->  
596 - <!-- <copyField source="name" dest="alphaNameSort"/> -->  
597 -  
598 - <!-- Similarity is the scoring routine for each document vs. a query.  
599 - A custom similarity may be specified here, but the default is fine  
600 - for most applications. -->  
601 - <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->  
602 - <!-- ... OR ...  
603 - Specify a SimilarityFactory class name implementation  
604 - allowing parameters to be used.  
605 - -->  
606 - <!--  
607 - <similarity class="com.example.solr.CustomSimilarityFactory">  
608 - <str name="paramkey">param value</str>  
609 - </similarity>  
610 - -->  
611 -  
612 -  
613 -</schema>  
614 -  
solr-conf/solr-tomcat.xml
@@ -1,12 +0,0 @@ @@ -1,12 +0,0 @@
1 -<!--  
2 - Context configuration file for the Solr Web App  
3 --->  
4 -  
5 -<Context path="/solr" docBase="/usr/local/share/solr/apache-solr-3.3.0.war"  
6 - debug="0" privileged="true" allowLinking="true" crossContext="true">  
7 - <!-- make symlinks work in Tomcat -->  
8 - <Resources className="org.apache.naming.resources.FileDirContext" allowLinking="true" />  
9 -  
10 - <Environment name="solr/home" type="java.lang.String" value="/usr/local/share/solr" override="true" />  
11 - </Context>  
12 -  
solr-conf/solrconfig.xml
@@ -1,1535 +0,0 @@ @@ -1,1535 +0,0 @@
1 -<?xml version="1.0" encoding="UTF-8" ?>  
2 -<!--  
3 - Licensed to the Apache Software Foundation (ASF) under one or more  
4 - contributor license agreements. See the NOTICE file distributed with  
5 - this work for additional information regarding copyright ownership.  
6 - The ASF licenses this file to You under the Apache License, Version 2.0  
7 - (the "License"); you may not use this file except in compliance with  
8 - the License. You may obtain a copy of the License at  
9 -  
10 - http://www.apache.org/licenses/LICENSE-2.0  
11 -  
12 - Unless required by applicable law or agreed to in writing, software  
13 - distributed under the License is distributed on an "AS IS" BASIS,  
14 - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
15 - See the License for the specific language governing permissions and  
16 - limitations under the License.  
17 --->  
18 -  
19 -<!--  
20 - For more details about configurations options that may appear in  
21 - this file, see http://wiki.apache.org/solr/SolrConfigXml.  
22 --->  
23 -<config>  
24 - <!-- In all configuration below, a prefix of "solr." for class names  
25 - is an alias that causes solr to search appropriate packages,  
26 - including org.apache.solr.(search|update|request|core|analysis)  
27 -  
28 - You may also specify a fully qualified Java classname if you  
29 - have your own custom plugins.  
30 - -->  
31 -  
32 - <!-- Set this to 'false' if you want solr to continue working after  
33 - it has encountered an severe configuration error. In a  
34 - production environment, you may want solr to keep working even  
35 - if one handler is mis-configured.  
36 -  
37 - You may also set this to false using by setting the system  
38 - property:  
39 -  
40 - -Dsolr.abortOnConfigurationError=false  
41 - -->  
42 - <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>  
43 -  
44 - <!-- Controls what version of Lucene various components of Solr  
45 - adhere to. Generally, you want to use the latest version to  
46 - get all bug fixes and improvements. It is highly recommended  
47 - that you fully re-index after changing this setting as it can  
48 - affect both how text is indexed and queried.  
49 - -->  
50 - <luceneMatchVersion>LUCENE_33</luceneMatchVersion>  
51 -  
52 - <!-- lib directives can be used to instruct Solr to load an Jars  
53 - identified and use them to resolve any "plugins" specified in  
54 - your solrconfig.xml or schema.xml (ie: Analyzers, Request  
55 - Handlers, etc...).  
56 -  
57 - All directories and paths are resolved relative to the  
58 - instanceDir.  
59 -  
60 - If a "./lib" directory exists in your instanceDir, all files  
61 - found in it are included as if you had used the following  
62 - syntax...  
63 -  
64 - <lib dir="./lib" />  
65 - -->  
66 - <!-- A dir option by itself adds any files found in the directory to  
67 - the classpath, this is useful for including all jars in a  
68 - directory.  
69 - -->  
70 - <lib dir="../../contrib/extraction/lib" />  
71 - <!-- When a regex is specified in addition to a directory, only the  
72 - files in that directory which completely match the regex  
73 - (anchored on both ends) will be included.  
74 - -->  
75 - <lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />  
76 - <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />  
77 - <lib dir="../../dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />  
78 -  
79 - <!-- If a dir option (with or without a regex) is used and nothing  
80 - is found that matches, it will be ignored  
81 - -->  
82 - <lib dir="../../contrib/clustering/lib/" />  
83 - <lib dir="/total/crap/dir/ignored" />  
84 - <!-- an exact path can be used to specify a specific file. This  
85 - will cause a serious error to be logged if it can't be loaded.  
86 - -->  
87 - <!--  
88 - <lib path="../a-jar-that-does-not-exist.jar" />  
89 - -->  
90 -  
91 - <!-- Data Directory  
92 -  
93 - Used to specify an alternate directory to hold all index data  
94 - other than the default ./data under the Solr home. If  
95 - replication is in use, this should match the replication  
96 - configuration.  
97 - -->  
98 - <dataDir>${solr.data.dir:/var/local/lib/solr/data}</dataDir>  
99 -  
100 -  
101 - <!-- The DirectoryFactory to use for indexes.  
102 -  
103 - solr.StandardDirectoryFactory, the default, is filesystem  
104 - based. solr.RAMDirectoryFactory is memory based, not  
105 - persistent, and doesn't work with replication.  
106 - -->  
107 - <directoryFactory name="DirectoryFactory"  
108 - class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>  
109 -  
110 -  
111 - <!-- Index Defaults  
112 -  
113 - Values here affect all index writers and act as a default  
114 - unless overridden.  
115 -  
116 - WARNING: See also the <mainIndex> section below for parameters  
117 - that overfor Solr's main Lucene index.  
118 - -->  
119 - <indexDefaults>  
120 -  
121 - <useCompoundFile>false</useCompoundFile>  
122 -  
123 - <mergeFactor>10</mergeFactor>  
124 - <!-- Sets the amount of RAM that may be used by Lucene indexing  
125 - for buffering added documents and deletions before they are  
126 - flushed to the Directory. -->  
127 - <ramBufferSizeMB>32</ramBufferSizeMB>  
128 - <!-- If both ramBufferSizeMB and maxBufferedDocs is set, then  
129 - Lucene will flush based on whichever limit is hit first.  
130 - -->  
131 - <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->  
132 -  
133 - <maxFieldLength>10000</maxFieldLength>  
134 - <writeLockTimeout>1000</writeLockTimeout>  
135 - <commitLockTimeout>10000</commitLockTimeout>  
136 -  
137 - <!-- Expert: Merge Policy  
138 -  
139 - The Merge Policy in Lucene controls how merging is handled by  
140 - Lucene. The default in Solr 3.3 is TieredMergePolicy.  
141 -  
142 - The default in 2.3 was the LogByteSizeMergePolicy,  
143 - previous versions used LogDocMergePolicy.  
144 -  
145 - LogByteSizeMergePolicy chooses segments to merge based on  
146 - their size. The Lucene 2.2 default, LogDocMergePolicy chose  
147 - when to merge based on number of documents  
148 -  
149 - Other implementations of MergePolicy must have a no-argument  
150 - constructor  
151 - -->  
152 - <!--  
153 - <mergePolicy class="org.apache.lucene.index.TieredMergePolicy"/>  
154 - -->  
155 -  
156 - <!-- Expert: Merge Scheduler  
157 -  
158 - The Merge Scheduler in Lucene controls how merges are  
159 - performed. The ConcurrentMergeScheduler (Lucene 2.3 default)  
160 - can perform merges in the background using separate threads.  
161 - The SerialMergeScheduler (Lucene 2.2 default) does not.  
162 - -->  
163 - <!--  
164 - <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>  
165 - -->  
166 -  
167 - <!-- LockFactory  
168 -  
169 - This option specifies which Lucene LockFactory implementation  
170 - to use.  
171 -  
172 - single = SingleInstanceLockFactory - suggested for a  
173 - read-only index or when there is no possibility of  
174 - another process trying to modify the index.  
175 - native = NativeFSLockFactory - uses OS native file locking.  
176 - Do not use when multiple solr webapps in the same  
177 - JVM are attempting to share a single index.  
178 - simple = SimpleFSLockFactory - uses a plain file for locking  
179 -  
180 - (For backwards compatibility with Solr 1.2, 'simple' is the  
181 - default if not specified.)  
182 -  
183 - More details on the nuances of each LockFactory...  
184 - http://wiki.apache.org/lucene-java/AvailableLockFactories  
185 - -->  
186 - <lockType>native</lockType>  
187 -  
188 - <!-- Expert: Controls how often Lucene loads terms into memory  
189 - Default is 128 and is likely good for most everyone.  
190 - -->  
191 - <!-- <termIndexInterval>256</termIndexInterval> -->  
192 - </indexDefaults>  
193 -  
194 - <!-- Main Index  
195 -  
196 - Values here override the values in the <indexDefaults> section  
197 - for the main on disk index.  
198 - -->  
199 - <mainIndex>  
200 -  
201 - <useCompoundFile>false</useCompoundFile>  
202 - <ramBufferSizeMB>32</ramBufferSizeMB>  
203 - <mergeFactor>10</mergeFactor>  
204 -  
205 - <!-- Unlock On Startup  
206 -  
207 - If true, unlock any held write or commit locks on startup.  
208 - This defeats the locking mechanism that allows multiple  
209 - processes to safely access a lucene index, and should be used  
210 - with care.  
211 -  
212 - This is not needed if lock type is 'none' or 'single'  
213 - -->  
214 - <unlockOnStartup>false</unlockOnStartup>  
215 -  
216 - <!-- If true, IndexReaders will be reopened (often more efficient)  
217 - instead of closed and then opened.  
218 - -->  
219 - <reopenReaders>true</reopenReaders>  
220 -  
221 - <!-- Commit Deletion Policy  
222 -  
223 - Custom deletion policies can specified here. The class must  
224 - implement org.apache.lucene.index.IndexDeletionPolicy.  
225 -  
226 - http://lucene.apache.org/java/2_9_1/api/all/org/apache/lucene/index/IndexDeletionPolicy.html  
227 -  
228 - The standard Solr IndexDeletionPolicy implementation supports  
229 - deleting index commit points on number of commits, age of  
230 - commit point and optimized status.  
231 -  
232 - The latest commit point should always be preserved regardless  
233 - of the criteria.  
234 - -->  
235 - <deletionPolicy class="solr.SolrDeletionPolicy">  
236 - <!-- The number of commit points to be kept -->  
237 - <str name="maxCommitsToKeep">1</str>  
238 - <!-- The number of optimized commit points to be kept -->  
239 - <str name="maxOptimizedCommitsToKeep">0</str>  
240 - <!--  
241 - Delete all commit points once they have reached the given age.  
242 - Supports DateMathParser syntax e.g.  
243 - -->  
244 - <!--  
245 - <str name="maxCommitAge">30MINUTES</str>  
246 - <str name="maxCommitAge">1DAY</str>  
247 - -->  
248 - </deletionPolicy>  
249 -  
250 - <!-- Lucene Infostream  
251 -  
252 - To aid in advanced debugging, Lucene provides an "InfoStream"  
253 - of detailed information when indexing.  
254 -  
255 - Setting The value to true will instruct the underlying Lucene  
256 - IndexWriter to write its debugging info the specified file  
257 - -->  
258 - <infoStream file="INFOSTREAM.txt">false</infoStream>  
259 -  
260 - </mainIndex>  
261 -  
262 - <!-- JMX  
263 -  
264 - This example enables JMX if and only if an existing MBeanServer  
265 - is found, use this if you want to configure JMX through JVM  
266 - parameters. Remove this to disable exposing Solr configuration  
267 - and statistics to JMX.  
268 -  
269 - For more details see http://wiki.apache.org/solr/SolrJmx  
270 - -->  
271 - <jmx />  
272 - <!-- If you want to connect to a particular server, specify the  
273 - agentId  
274 - -->  
275 - <!-- <jmx agentId="myAgent" /> -->  
276 - <!-- If you want to start a new MBeanServer, specify the serviceUrl -->  
277 - <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>  
278 - -->  
279 -  
280 - <!-- The default high-performance update handler -->  
281 - <updateHandler class="solr.DirectUpdateHandler2">  
282 -  
283 - <!-- AutoCommit  
284 -  
285 - Perform a <commit/> automatically under certain conditions.  
286 - Instead of enabling autoCommit, consider using "commitWithin"  
287 - when adding documents.  
288 -  
289 - http://wiki.apache.org/solr/UpdateXmlMessages  
290 -  
291 - maxDocs - Maximum number of documents to add since the last  
292 - commit before automatically triggering a new commit.  
293 -  
294 - maxTime - Maximum amount of time that is allowed to pass  
295 - since a document was added before automaticly  
296 - triggering a new commit.  
297 - -->  
298 - <!--  
299 - <autoCommit>  
300 - <maxDocs>10000</maxDocs>  
301 - <maxTime>1000</maxTime>  
302 - </autoCommit>  
303 - -->  
304 -  
305 - <!-- Update Related Event Listeners  
306 -  
307 - Various IndexWriter related events can trigger Listeners to  
308 - take actions.  
309 -  
310 - postCommit - fired after every commit or optimize command  
311 - postOptimize - fired after every optimize command  
312 - -->  
313 - <!-- The RunExecutableListener executes an external command from a  
314 - hook such as postCommit or postOptimize.  
315 -  
316 - exe - the name of the executable to run  
317 - dir - dir to use as the current working directory. (default=".")  
318 - wait - the calling thread waits until the executable returns.  
319 - (default="true")  
320 - args - the arguments to pass to the program. (default is none)  
321 - env - environment variables to set. (default is none)  
322 - -->  
323 - <!-- This example shows how RunExecutableListener could be used  
324 - with the script based replication...  
325 - http://wiki.apache.org/solr/CollectionDistribution  
326 - -->  
327 - <!--  
328 - <listener event="postCommit" class="solr.RunExecutableListener">  
329 - <str name="exe">solr/bin/snapshooter</str>  
330 - <str name="dir">.</str>  
331 - <bool name="wait">true</bool>  
332 - <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>  
333 - <arr name="env"> <str>MYVAR=val1</str> </arr>  
334 - </listener>  
335 - -->  
336 - </updateHandler>  
337 -  
338 - <!-- IndexReaderFactory  
339 -  
340 - Use the following format to specify a custom IndexReaderFactory,  
341 - which allows for alternate IndexReader implementations.  
342 -  
343 - ** Experimental Feature **  
344 -  
345 - Please note - Using a custom IndexReaderFactory may prevent  
346 - certain other features from working. The API to  
347 - IndexReaderFactory may change without warning or may even be  
348 - removed from future releases if the problems cannot be  
349 - resolved.  
350 -  
351 -  
352 - ** Features that may not work with custom IndexReaderFactory **  
353 -  
354 - The ReplicationHandler assumes a disk-resident index. Using a  
355 - custom IndexReader implementation may cause incompatibility  
356 - with ReplicationHandler and may cause replication to not work  
357 - correctly. See SOLR-1366 for details.  
358 -  
359 - -->  
360 - <!--  
361 - <indexReaderFactory name="IndexReaderFactory" class="package.class">  
362 - <str name="someArg">Some Value</str>  
363 - </indexReaderFactory >  
364 - -->  
365 - <!-- By explicitly declaring the Factory, the termIndexDivisor can  
366 - be specified.  
367 - -->  
368 - <!--  
369 - <indexReaderFactory name="IndexReaderFactory"  
370 - class="solr.StandardIndexReaderFactory">  
371 - <int name="setTermIndexDivisor">12</int>  
372 - </indexReaderFactory >  
373 - -->  
374 -  
375 -  
376 - <query>  
377 - <!-- Max Boolean Clauses  
378 -  
379 - Maximum number of clauses in each BooleanQuery, an exception  
380 - is thrown if exceeded.  
381 -  
382 - ** WARNING **  
383 -  
384 - This option actually modifies a global Lucene property that  
385 - will affect all SolrCores. If multiple solrconfig.xml files  
386 - disagree on this property, the value at any given moment will  
387 - be based on the last SolrCore to be initialized.  
388 -  
389 - -->  
390 - <maxBooleanClauses>1024</maxBooleanClauses>  
391 -  
392 -  
393 - <!-- Solr Internal Query Caches  
394 -  
395 - There are two implementations of cache available for Solr,  
396 - LRUCache, based on a synchronized LinkedHashMap, and  
397 - FastLRUCache, based on a ConcurrentHashMap.  
398 -  
399 - FastLRUCache has faster gets and slower puts in single  
400 - threaded operation and thus is generally faster than LRUCache  
401 - when the hit ratio of the cache is high (> 75%), and may be  
402 - faster under other scenarios on multi-cpu systems.  
403 - -->  
404 -  
405 - <!-- Filter Cache  
406 -  
407 - Cache used by SolrIndexSearcher for filters (DocSets),  
408 - unordered sets of *all* documents that match a query. When a  
409 - new searcher is opened, its caches may be prepopulated or  
410 - "autowarmed" using data from caches in the old searcher.  
411 - autowarmCount is the number of items to prepopulate. For  
412 - LRUCache, the autowarmed items will be the most recently  
413 - accessed items.  
414 -  
415 - Parameters:  
416 - class - the SolrCache implementation LRUCache or  
417 - (LRUCache or FastLRUCache)  
418 - size - the maximum number of entries in the cache  
419 - initialSize - the initial capacity (number of entries) of  
420 - the cache. (see java.util.HashMap)  
421 - autowarmCount - the number of entries to prepopulate from  
422 - and old cache.  
423 - -->  
424 - <filterCache class="solr.FastLRUCache"  
425 - size="512"  
426 - initialSize="512"  
427 - autowarmCount="0"/>  
428 -  
429 - <!-- Query Result Cache  
430 -  
431 - Caches results of searches - ordered lists of document ids  
432 - (DocList) based on a query, a sort, and the range of documents requested.  
433 - -->  
434 - <queryResultCache class="solr.LRUCache"  
435 - size="512"  
436 - initialSize="512"  
437 - autowarmCount="0"/>  
438 -  
439 - <!-- Document Cache  
440 -  
441 - Caches Lucene Document objects (the stored fields for each  
442 - document). Since Lucene internal document ids are transient,  
443 - this cache will not be autowarmed.  
444 - -->  
445 - <documentCache class="solr.LRUCache"  
446 - size="512"  
447 - initialSize="512"  
448 - autowarmCount="0"/>  
449 -  
450 - <!-- Field Value Cache  
451 -  
452 - Cache used to hold field values that are quickly accessible  
453 - by document id. The fieldValueCache is created by default  
454 - even if not configured here.  
455 - -->  
456 - <!--  
457 - <fieldValueCache class="solr.FastLRUCache"  
458 - size="512"  
459 - autowarmCount="128"  
460 - showItems="32" />  
461 - -->  
462 -  
463 - <!-- Custom Cache  
464 -  
465 - Example of a generic cache. These caches may be accessed by  
466 - name through SolrIndexSearcher.getCache(),cacheLookup(), and  
467 - cacheInsert(). The purpose is to enable easy caching of  
468 - user/application level data. The regenerator argument should  
469 - be specified as an implementation of solr.CacheRegenerator  
470 - if autowarming is desired.  
471 - -->  
472 - <!--  
473 - <cache name="myUserCache"  
474 - class="solr.LRUCache"  
475 - size="4096"  
476 - initialSize="1024"  
477 - autowarmCount="1024"  
478 - regenerator="com.mycompany.MyRegenerator"  
479 - />  
480 - -->  
481 -  
482 -  
483 - <!-- Lazy Field Loading  
484 -  
485 - If true, stored fields that are not requested will be loaded  
486 - lazily. This can result in a significant speed improvement  
487 - if the usual case is to not load all stored fields,  
488 - especially if the skipped fields are large compressed text  
489 - fields.  
490 - -->  
491 - <enableLazyFieldLoading>true</enableLazyFieldLoading>  
492 -  
493 - <!-- Use Filter For Sorted Query  
494 -  
495 - A possible optimization that attempts to use a filter to  
496 - satisfy a search. If the requested sort does not include  
497 - score, then the filterCache will be checked for a filter  
498 - matching the query. If found, the filter will be used as the  
499 - source of document ids, and then the sort will be applied to  
500 - that.  
501 -  
502 - For most situations, this will not be useful unless you  
503 - frequently get the same search repeatedly with different sort  
504 - options, and none of them ever use "score"  
505 - -->  
506 - <!--  
507 - <useFilterForSortedQuery>true</useFilterForSortedQuery>  
508 - -->  
509 -  
510 - <!-- Result Window Size  
511 -  
512 - An optimization for use with the queryResultCache. When a search  
513 - is requested, a superset of the requested number of document ids  
514 - are collected. For example, if a search for a particular query  
515 - requests matching documents 10 through 19, and queryWindowSize is 50,  
516 - then documents 0 through 49 will be collected and cached. Any further  
517 - requests in that range can be satisfied via the cache.  
518 - -->  
519 - <queryResultWindowSize>20</queryResultWindowSize>  
520 -  
521 - <!-- Maximum number of documents to cache for any entry in the  
522 - queryResultCache.  
523 - -->  
524 - <queryResultMaxDocsCached>200</queryResultMaxDocsCached>  
525 -  
526 - <!-- Query Related Event Listeners  
527 -  
528 - Various IndexSearcher related events can trigger Listeners to  
529 - take actions.  
530 -  
531 - newSearcher - fired whenever a new searcher is being prepared  
532 - and there is a current searcher handling requests (aka  
533 - registered). It can be used to prime certain caches to  
534 - prevent long request times for certain requests.  
535 -  
536 - firstSearcher - fired whenever a new searcher is being  
537 - prepared but there is no current registered searcher to handle  
538 - requests or to gain autowarming data from.  
539 -  
540 -  
541 - -->  
542 - <!-- QuerySenderListener takes an array of NamedList and executes a  
543 - local query request for each NamedList in sequence.  
544 - -->  
545 - <listener event="newSearcher" class="solr.QuerySenderListener">  
546 - <arr name="queries">  
547 - <!--  
548 - <lst><str name="q">solr</str><str name="sort">price asc</str></lst>  
549 - <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>  
550 - -->  
551 - </arr>  
552 - </listener>  
553 - <listener event="firstSearcher" class="solr.QuerySenderListener">  
554 - <arr name="queries">  
555 - <lst>  
556 - <str name="q">static firstSearcher warming in solrconfig.xml</str>  
557 - </lst>  
558 - </arr>  
559 - </listener>  
560 -  
561 - <!-- Use Cold Searcher  
562 -  
563 - If a search request comes in and there is no current  
564 - registered searcher, then immediately register the still  
565 - warming searcher and use it. If "false" then all requests  
566 - will block until the first searcher is done warming.  
567 - -->  
568 - <useColdSearcher>false</useColdSearcher>  
569 -  
570 - <!-- Max Warming Searchers  
571 -  
572 - Maximum number of searchers that may be warming in the  
573 - background concurrently. An error is returned if this limit  
574 - is exceeded.  
575 -  
576 - Recommend values of 1-2 for read-only slaves, higher for  
577 - masters w/o cache warming.  
578 - -->  
579 - <maxWarmingSearchers>2</maxWarmingSearchers>  
580 -  
581 - </query>  
582 -  
583 -  
584 - <!-- Request Dispatcher  
585 -  
586 - This section contains instructions for how the SolrDispatchFilter  
587 - should behave when processing requests for this SolrCore.  
588 -  
589 - handleSelect affects the behavior of requests such as /select?qt=XXX  
590 -  
591 - handleSelect="true" will cause the SolrDispatchFilter to process  
592 - the request and will result in consistent error handling and  
593 - formatting for all types of requests.  
594 -  
595 - handleSelect="false" will cause the SolrDispatchFilter to  
596 - ignore "/select" requests and fallback to using the legacy  
597 - SolrServlet and it's Solr 1.1 style error formatting  
598 - -->  
599 - <requestDispatcher handleSelect="true" >  
600 - <!-- Request Parsing  
601 -  
602 - These settings indicate how Solr Requests may be parsed, and  
603 - what restrictions may be placed on the ContentStreams from  
604 - those requests  
605 -  
606 - enableRemoteStreaming - enables use of the stream.file  
607 - and stream.url parameters for specifying remote streams.  
608 -  
609 - multipartUploadLimitInKB - specifies the max size of  
610 - Multipart File Uploads that Solr will allow in a Request.  
611 -  
612 - *** WARNING ***  
613 - The settings below authorize Solr to fetch remote files, You  
614 - should make sure your system has some authentication before  
615 - using enableRemoteStreaming="true"  
616 -  
617 - -->  
618 - <requestParsers enableRemoteStreaming="true"  
619 - multipartUploadLimitInKB="2048000" />  
620 -  
621 - <!-- HTTP Caching  
622 -  
623 - Set HTTP caching related parameters (for proxy caches and clients).  
624 -  
625 - The options below instruct Solr not to output any HTTP Caching  
626 - related headers  
627 - -->  
628 - <httpCaching never304="true" />  
629 - <!-- If you include a <cacheControl> directive, it will be used to  
630 - generate a Cache-Control header (as well as an Expires header  
631 - if the value contains "max-age=")  
632 -  
633 - By default, no Cache-Control header is generated.  
634 -  
635 - You can use the <cacheControl> option even if you have set  
636 - never304="true"  
637 - -->  
638 - <!--  
639 - <httpCaching never304="true" >  
640 - <cacheControl>max-age=30, public</cacheControl>  
641 - </httpCaching>  
642 - -->  
643 - <!-- To enable Solr to respond with automatically generated HTTP  
644 - Caching headers, and to response to Cache Validation requests  
645 - correctly, set the value of never304="false"  
646 -  
647 - This will cause Solr to generate Last-Modified and ETag  
648 - headers based on the properties of the Index.  
649 -  
650 - The following options can also be specified to affect the  
651 - values of these headers...  
652 -  
653 - lastModFrom - the default value is "openTime" which means the  
654 - Last-Modified value (and validation against If-Modified-Since  
655 - requests) will all be relative to when the current Searcher  
656 - was opened. You can change it to lastModFrom="dirLastMod" if  
657 - you want the value to exactly correspond to when the physical  
658 - index was last modified.  
659 -  
660 - etagSeed="..." is an option you can change to force the ETag  
661 - header (and validation against If-None-Match requests) to be  
662 - different even if the index has not changed (ie: when making  
663 - significant changes to your config file)  
664 -  
665 - (lastModifiedFrom and etagSeed are both ignored if you use  
666 - the never304="true" option)  
667 - -->  
668 - <!--  
669 - <httpCaching lastModifiedFrom="openTime"  
670 - etagSeed="Solr">  
671 - <cacheControl>max-age=30, public</cacheControl>  
672 - </httpCaching>  
673 - -->  
674 - </requestDispatcher>  
675 -  
676 - <!-- Request Handlers  
677 -  
678 - http://wiki.apache.org/solr/SolrRequestHandler  
679 -  
680 - incoming queries will be dispatched to the correct handler  
681 - based on the path or the qt (query type) param.  
682 -  
683 - Names starting with a '/' are accessed with the a path equal to  
684 - the registered name. Names without a leading '/' are accessed  
685 - with: http://host/app/[core/]select?qt=name  
686 -  
687 - If a /select request is processed with out a qt param  
688 - specified, the requestHandler that declares default="true" will  
689 - be used.  
690 -  
691 - If a Request Handler is declared with startup="lazy", then it will  
692 - not be initialized until the first request that uses it.  
693 -  
694 - -->  
695 - <!-- SearchHandler  
696 -  
697 - http://wiki.apache.org/solr/SearchHandler  
698 -  
699 - For processing Search Queries, the primary Request Handler  
700 - provided with Solr is "SearchHandler" It delegates to a sequent  
701 - of SearchComponents (see below) and supports distributed  
702 - queries across multiple shards  
703 - -->  
704 - <requestHandler name="search" class="solr.SearchHandler" default="true">  
705 - <!-- default values for query parameters can be specified, these  
706 - will be overridden by parameters in the request  
707 - -->  
708 - <lst name="defaults">  
709 - <str name="echoParams">explicit</str>  
710 - <int name="rows">10</int>  
711 - </lst>  
712 - <!-- In addition to defaults, "appends" params can be specified  
713 - to identify values which should be appended to the list of  
714 - multi-val params from the query (or the existing "defaults").  
715 - -->  
716 - <!-- In this example, the param "fq=instock:true" would be appended to  
717 - any query time fq params the user may specify, as a mechanism for  
718 - partitioning the index, independent of any user selected filtering  
719 - that may also be desired (perhaps as a result of faceted searching).  
720 -  
721 - NOTE: there is *absolutely* nothing a client can do to prevent these  
722 - "appends" values from being used, so don't use this mechanism  
723 - unless you are sure you always want it.  
724 - -->  
725 - <!--  
726 - <lst name="appends">  
727 - <str name="fq">inStock:true</str>  
728 - </lst>  
729 - -->  
730 - <!-- "invariants" are a way of letting the Solr maintainer lock down  
731 - the options available to Solr clients. Any params values  
732 - specified here are used regardless of what values may be specified  
733 - in either the query, the "defaults", or the "appends" params.  
734 -  
735 - In this example, the facet.field and facet.query params would  
736 - be fixed, limiting the facets clients can use. Faceting is  
737 - not turned on by default - but if the client does specify  
738 - facet=true in the request, these are the only facets they  
739 - will be able to see counts for; regardless of what other  
740 - facet.field or facet.query params they may specify.  
741 -  
742 - NOTE: there is *absolutely* nothing a client can do to prevent these  
743 - "invariants" values from being used, so don't use this mechanism  
744 - unless you are sure you always want it.  
745 - -->  
746 - <!--  
747 - <lst name="invariants">  
748 - <str name="facet.field">cat</str>  
749 - <str name="facet.field">manu_exact</str>  
750 - <str name="facet.query">price:[* TO 500]</str>  
751 - <str name="facet.query">price:[500 TO *]</str>  
752 - </lst>  
753 - -->  
754 - <!-- If the default list of SearchComponents is not desired, that  
755 - list can either be overridden completely, or components can be  
756 - prepended or appended to the default list. (see below)  
757 - -->  
758 - <!--  
759 - <arr name="components">  
760 - <str>nameOfCustomComponent1</str>  
761 - <str>nameOfCustomComponent2</str>  
762 - </arr>  
763 - -->  
764 - </requestHandler>  
765 -  
766 - <!-- A Robust Example  
767 -  
768 - This example SearchHandler declaration shows off usage of the  
769 - SearchHandler with many defaults declared  
770 -  
771 - Note that multiple instances of the same Request Handler  
772 - (SearchHandler) can be registered multiple times with different  
773 - names (and different init parameters)  
774 - -->  
775 - <requestHandler name="/browse" class="solr.SearchHandler">  
776 - <lst name="defaults">  
777 - <str name="echoParams">explicit</str>  
778 -  
779 - <!-- VelocityResponseWriter settings -->  
780 - <str name="wt">velocity</str>  
781 -  
782 - <str name="v.template">browse</str>  
783 - <str name="v.layout">layout</str>  
784 - <str name="title">Solritas</str>  
785 -  
786 - <str name="defType">edismax</str>  
787 - <str name="q.alt">*:*</str>  
788 - <str name="rows">10</str>  
789 - <str name="fl">*,score</str>  
790 - <str name="mlt.qf">  
791 - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4  
792 - </str>  
793 - <str name="mlt.fl">text,features,name,sku,id,manu,cat</str>  
794 - <int name="mlt.count">3</int>  
795 -  
796 - <str name="qf">  
797 - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4  
798 - </str>  
799 -  
800 - <str name="facet">on</str>  
801 - <str name="facet.field">cat</str>  
802 - <str name="facet.field">manu_exact</str>  
803 - <str name="facet.query">ipod</str>  
804 - <str name="facet.query">GB</str>  
805 - <str name="facet.mincount">1</str>  
806 - <str name="facet.pivot">cat,inStock</str>  
807 - <str name="facet.range">price</str>  
808 - <int name="f.price.facet.range.start">0</int>  
809 - <int name="f.price.facet.range.end">600</int>  
810 - <int name="f.price.facet.range.gap">50</int>  
811 - <str name="f.price.facet.range.other">after</str>  
812 - <str name="facet.range">manufacturedate_dt</str>  
813 - <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>  
814 - <str name="f.manufacturedate_dt.facet.range.end">NOW</str>  
815 - <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>  
816 - <str name="f.manufacturedate_dt.facet.range.other">before</str>  
817 - <str name="f.manufacturedate_dt.facet.range.other">after</str>  
818 -  
819 -  
820 - <!-- Highlighting defaults -->  
821 - <str name="hl">on</str>  
822 - <str name="hl.fl">text features name</str>  
823 - <str name="f.name.hl.fragsize">0</str>  
824 - <str name="f.name.hl.alternateField">name</str>  
825 - </lst>  
826 - <arr name="last-components">  
827 - <str>spellcheck</str>  
828 - </arr>  
829 - <!--  
830 - <str name="url-scheme">httpx</str>  
831 - -->  
832 - </requestHandler>  
833 -  
834 - <!-- XML Update Request Handler.  
835 -  
836 - http://wiki.apache.org/solr/UpdateXmlMessages  
837 -  
838 - The canonical Request Handler for Modifying the Index through  
839 - commands specified using XML.  
840 -  
841 - Note: Since solr1.1 requestHandlers requires a valid content  
842 - type header if posted in the body. For example, curl now  
843 - requires: -H 'Content-type:text/xml; charset=utf-8'  
844 - -->  
845 - <requestHandler name="/update"  
846 - class="solr.XmlUpdateRequestHandler">  
847 - <!-- See below for information on defining  
848 - updateRequestProcessorChains that can be used by name  
849 - on each Update Request  
850 - -->  
851 - <!--  
852 - <lst name="defaults">  
853 - <str name="update.chain">dedupe</str>  
854 - </lst>  
855 - -->  
856 - </requestHandler>  
857 - <!-- Binary Update Request Handler  
858 - http://wiki.apache.org/solr/javabin  
859 - -->  
860 - <requestHandler name="/update/javabin"  
861 - class="solr.BinaryUpdateRequestHandler" />  
862 -  
863 - <!-- CSV Update Request Handler  
864 - http://wiki.apache.org/solr/UpdateCSV  
865 - -->  
866 - <requestHandler name="/update/csv"  
867 - class="solr.CSVRequestHandler"  
868 - startup="lazy" />  
869 -  
870 - <!-- JSON Update Request Handler  
871 - http://wiki.apache.org/solr/UpdateJSON  
872 - -->  
873 - <requestHandler name="/update/json"  
874 - class="solr.JsonUpdateRequestHandler"  
875 - startup="lazy" />  
876 -  
877 - <!-- Solr Cell Update Request Handler  
878 -  
879 - http://wiki.apache.org/solr/ExtractingRequestHandler  
880 -  
881 - -->  
882 - <requestHandler name="/update/extract"  
883 - startup="lazy"  
884 - class="solr.extraction.ExtractingRequestHandler" >  
885 - <lst name="defaults">  
886 - <!-- All the main content goes into "text"... if you need to return  
887 - the extracted text or do highlighting, use a stored field. -->  
888 - <str name="fmap.content">text</str>  
889 - <str name="lowernames">true</str>  
890 - <str name="uprefix">ignored_</str>  
891 -  
892 - <!-- capture link hrefs but ignore div attributes -->  
893 - <str name="captureAttr">true</str>  
894 - <str name="fmap.a">links</str>  
895 - <str name="fmap.div">ignored_</str>  
896 - </lst>  
897 - </requestHandler>  
898 -  
899 - <!-- Field Analysis Request Handler  
900 -  
901 - RequestHandler that provides much the same functionality as  
902 - analysis.jsp. Provides the ability to specify multiple field  
903 - types and field names in the same request and outputs  
904 - index-time and query-time analysis for each of them.  
905 -  
906 - Request parameters are:  
907 - analysis.fieldname - field name whose analyzers are to be used  
908 -  
909 - analysis.fieldtype - field type whose analyzers are to be used  
910 - analysis.fieldvalue - text for index-time analysis  
911 - q (or analysis.q) - text for query time analysis  
912 - analysis.showmatch (true|false) - When set to true and when  
913 - query analysis is performed, the produced tokens of the  
914 - field value analysis will be marked as "matched" for every  
915 - token that is produces by the query analysis  
916 - -->  
917 - <requestHandler name="/analysis/field"  
918 - startup="lazy"  
919 - class="solr.FieldAnalysisRequestHandler" />  
920 -  
921 -  
922 - <!-- Document Analysis Handler  
923 -  
924 - http://wiki.apache.org/solr/AnalysisRequestHandler  
925 -  
926 - An analysis handler that provides a breakdown of the analysis  
927 - process of provided docuemnts. This handler expects a (single)  
928 - content stream with the following format:  
929 -  
930 - <docs>  
931 - <doc>  
932 - <field name="id">1</field>  
933 - <field name="name">The Name</field>  
934 - <field name="text">The Text Value</field>  
935 - </doc>  
936 - <doc>...</doc>  
937 - <doc>...</doc>  
938 - ...  
939 - </docs>  
940 -  
941 - Note: Each document must contain a field which serves as the  
942 - unique key. This key is used in the returned response to associate  
943 - an analysis breakdown to the analyzed document.  
944 -  
945 - Like the FieldAnalysisRequestHandler, this handler also supports  
946 - query analysis by sending either an "analysis.query" or "q"  
947 - request parameter that holds the query text to be analyzed. It  
948 - also supports the "analysis.showmatch" parameter which when set to  
949 - true, all field tokens that match the query tokens will be marked  
950 - as a "match".  
951 - -->  
952 - <requestHandler name="/analysis/document"  
953 - class="solr.DocumentAnalysisRequestHandler"  
954 - startup="lazy" />  
955 -  
956 - <!-- Admin Handlers  
957 -  
958 - Admin Handlers - This will register all the standard admin  
959 - RequestHandlers.  
960 - -->  
961 - <requestHandler name="/admin/"  
962 - class="solr.admin.AdminHandlers" />  
963 - <!-- This single handler is equivalent to the following... -->  
964 - <!--  
965 - <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />  
966 - <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />  
967 - <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />  
968 - <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />  
969 - <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />  
970 - <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >  
971 - -->  
972 - <!-- If you wish to hide files under ${solr.home}/conf, explicitly  
973 - register the ShowFileRequestHandler using:  
974 - -->  
975 - <!--  
976 - <requestHandler name="/admin/file"  
977 - class="solr.admin.ShowFileRequestHandler" >  
978 - <lst name="invariants">  
979 - <str name="hidden">synonyms.txt</str>  
980 - <str name="hidden">anotherfile.txt</str>  
981 - </lst>  
982 - </requestHandler>  
983 - -->  
984 -  
985 - <!-- ping/healthcheck -->  
986 - <requestHandler name="/admin/ping" class="solr.PingRequestHandler">  
987 - <lst name="defaults">  
988 - <str name="qt">search</str>  
989 - <str name="q">solrpingquery</str>  
990 - <str name="echoParams">all</str>  
991 - </lst>  
992 - </requestHandler>  
993 -  
994 - <!-- Echo the request contents back to the client -->  
995 - <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >  
996 - <lst name="defaults">  
997 - <str name="echoParams">explicit</str>  
998 - <str name="echoHandler">true</str>  
999 - </lst>  
1000 - </requestHandler>  
1001 -  
1002 - <!-- Solr Replication  
1003 -  
1004 - The SolrReplicationHandler supports replicating indexes from a  
1005 - "master" used for indexing and "salves" used for queries.  
1006 -  
1007 - http://wiki.apache.org/solr/SolrReplication  
1008 -  
1009 - In the example below, remove the <lst name="master"> section if  
1010 - this is just a slave and remove the <lst name="slave"> section  
1011 - if this is just a master.  
1012 - -->  
1013 - <!--  
1014 - <requestHandler name="/replication" class="solr.ReplicationHandler" >  
1015 - <lst name="master">  
1016 - <str name="replicateAfter">commit</str>  
1017 - <str name="replicateAfter">startup</str>  
1018 - <str name="confFiles">schema.xml,stopwords.txt</str>  
1019 - </lst>  
1020 - <lst name="slave">  
1021 - <str name="masterUrl">http://localhost:8983/solr/replication</str>  
1022 - <str name="pollInterval">00:00:60</str>  
1023 - </lst>  
1024 - </requestHandler>  
1025 - -->  
1026 -  
1027 - <!-- Search Components  
1028 -  
1029 - Search components are registered to SolrCore and used by  
1030 - instances of SearchHandler (which can access them by name)  
1031 -  
1032 - By default, the following components are available:  
1033 -  
1034 - <searchComponent name="query" class="solr.QueryComponent" />  
1035 - <searchComponent name="facet" class="solr.FacetComponent" />  
1036 - <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />  
1037 - <searchComponent name="highlight" class="solr.HighlightComponent" />  
1038 - <searchComponent name="stats" class="solr.StatsComponent" />  
1039 - <searchComponent name="debug" class="solr.DebugComponent" />  
1040 -  
1041 - Default configuration in a requestHandler would look like:  
1042 -  
1043 - <arr name="components">  
1044 - <str>query</str>  
1045 - <str>facet</str>  
1046 - <str>mlt</str>  
1047 - <str>highlight</str>  
1048 - <str>stats</str>  
1049 - <str>debug</str>  
1050 - </arr>  
1051 -  
1052 - If you register a searchComponent to one of the standard names,  
1053 - that will be used instead of the default.  
1054 -  
1055 - To insert components before or after the 'standard' components, use:  
1056 -  
1057 - <arr name="first-components">  
1058 - <str>myFirstComponentName</str>  
1059 - </arr>  
1060 -  
1061 - <arr name="last-components">  
1062 - <str>myLastComponentName</str>  
1063 - </arr>  
1064 -  
1065 - NOTE: The component registered with the name "debug" will  
1066 - always be executed after the "last-components"  
1067 -  
1068 - -->  
1069 -  
1070 - <!-- Spell Check  
1071 -  
1072 - The spell check component can return a list of alternative spelling  
1073 - suggestions.  
1074 -  
1075 - http://wiki.apache.org/solr/SpellCheckComponent  
1076 - -->  
1077 - <searchComponent name="spellcheck" class="solr.SpellCheckComponent">  
1078 -  
1079 - <str name="queryAnalyzerFieldType">textSpell</str>  
1080 -  
1081 - <!-- Multiple "Spell Checkers" can be declared and used by this  
1082 - component  
1083 - -->  
1084 -  
1085 - <!-- a spellchecker built from a field of the main index, and  
1086 - written to disk  
1087 - -->  
1088 - <lst name="spellchecker">  
1089 - <str name="name">default</str>  
1090 - <str name="field">name</str>  
1091 - <str name="spellcheckIndexDir">spellchecker</str>  
1092 - <!-- uncomment this to require terms to occur in 1% of the documents in order to be included in the dictionary  
1093 - <float name="thresholdTokenFrequency">.01</float>  
1094 - -->  
1095 - </lst>  
1096 -  
1097 - <!-- a spellchecker that uses a different distance measure -->  
1098 - <!--  
1099 - <lst name="spellchecker">  
1100 - <str name="name">jarowinkler</str>  
1101 - <str name="field">spell</str>  
1102 - <str name="distanceMeasure">  
1103 - org.apache.lucene.search.spell.JaroWinklerDistance  
1104 - </str>  
1105 - <str name="spellcheckIndexDir">spellcheckerJaro</str>  
1106 - </lst>  
1107 - -->  
1108 -  
1109 - <!-- a spellchecker that use an alternate comparator  
1110 -  
1111 - comparatorClass be one of:  
1112 - 1. score (default)  
1113 - 2. freq (Frequency first, then score)  
1114 - 3. A fully qualified class name  
1115 - -->  
1116 - <!--  
1117 - <lst name="spellchecker">  
1118 - <str name="name">freq</str>  
1119 - <str name="field">lowerfilt</str>  
1120 - <str name="spellcheckIndexDir">spellcheckerFreq</str>  
1121 - <str name="comparatorClass">freq</str>  
1122 - <str name="buildOnCommit">true</str>  
1123 - -->  
1124 -  
1125 - <!-- A spellchecker that reads the list of words from a file -->  
1126 - <!--  
1127 - <lst name="spellchecker">  
1128 - <str name="classname">solr.FileBasedSpellChecker</str>  
1129 - <str name="name">file</str>  
1130 - <str name="sourceLocation">spellings.txt</str>  
1131 - <str name="characterEncoding">UTF-8</str>  
1132 - <str name="spellcheckIndexDir">spellcheckerFile</str>  
1133 - </lst>  
1134 - -->  
1135 - </searchComponent>  
1136 -  
1137 - <!-- A request handler for demonstrating the spellcheck component.  
1138 -  
1139 - NOTE: This is purely as an example. The whole purpose of the  
1140 - SpellCheckComponent is to hook it into the request handler that  
1141 - handles your normal user queries so that a separate request is  
1142 - not needed to get suggestions.  
1143 -  
1144 - IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS  
1145 - NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!  
1146 -  
1147 - See http://wiki.apache.org/solr/SpellCheckComponent for details  
1148 - on the request parameters.  
1149 - -->  
1150 - <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">  
1151 - <lst name="defaults">  
1152 - <str name="spellcheck.onlyMorePopular">false</str>  
1153 - <str name="spellcheck.extendedResults">false</str>  
1154 - <str name="spellcheck.count">1</str>  
1155 - </lst>  
1156 - <arr name="last-components">  
1157 - <str>spellcheck</str>  
1158 - </arr>  
1159 - </requestHandler>  
1160 -  
1161 - <!-- Term Vector Component  
1162 -  
1163 - http://wiki.apache.org/solr/TermVectorComponent  
1164 - -->  
1165 - <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>  
1166 -  
1167 - <!-- A request handler for demonstrating the term vector component  
1168 -  
1169 - This is purely as an example.  
1170 -  
1171 - In reality you will likely want to add the component to your  
1172 - already specified request handlers.  
1173 - -->  
1174 - <requestHandler name="tvrh" class="solr.SearchHandler" startup="lazy">  
1175 - <lst name="defaults">  
1176 - <bool name="tv">true</bool>  
1177 - </lst>  
1178 - <arr name="last-components">  
1179 - <str>tvComponent</str>  
1180 - </arr>  
1181 - </requestHandler>  
1182 -  
1183 - <!-- Clustering Component  
1184 -  
1185 - http://wiki.apache.org/solr/ClusteringComponent  
1186 -  
1187 - This relies on third party jars which are notincluded in the  
1188 - release. To use this component (and the "/clustering" handler)  
1189 - Those jars will need to be downloaded, and you'll need to set  
1190 - the solr.cluster.enabled system property when running solr...  
1191 -  
1192 - java -Dsolr.clustering.enabled=true -jar start.jar  
1193 - -->  
1194 - <searchComponent name="clustering"  
1195 - enable="${solr.clustering.enabled:false}"  
1196 - class="solr.clustering.ClusteringComponent" >  
1197 - <!-- Declare an engine -->  
1198 - <lst name="engine">  
1199 - <!-- The name, only one can be named "default" -->  
1200 - <str name="name">default</str>  
1201 -  
1202 - <!-- Class name of Carrot2 clustering algorithm.  
1203 -  
1204 - Currently available algorithms are:  
1205 -  
1206 - * org.carrot2.clustering.lingo.LingoClusteringAlgorithm  
1207 - * org.carrot2.clustering.stc.STCClusteringAlgorithm  
1208 - * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm  
1209 -  
1210 - See http://project.carrot2.org/algorithms.html for the  
1211 - algorithm's characteristics.  
1212 - -->  
1213 - <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>  
1214 -  
1215 - <!-- Overriding values for Carrot2 default algorithm attributes.  
1216 -  
1217 - For a description of all available attributes, see:  
1218 - http://download.carrot2.org/stable/manual/#chapter.components.  
1219 - Use attribute key as name attribute of str elements  
1220 - below. These can be further overridden for individual  
1221 - requests by specifying attribute key as request parameter  
1222 - name and attribute value as parameter value.  
1223 - -->  
1224 - <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>  
1225 -  
1226 - <!-- Location of Carrot2 lexical resources.  
1227 -  
1228 - A directory from which to load Carrot2-specific stop words  
1229 - and stop labels. Absolute or relative to Solr config directory.  
1230 - If a specific resource (e.g. stopwords.en) is present in the  
1231 - specified dir, it will completely override the corresponding  
1232 - default one that ships with Carrot2.  
1233 -  
1234 - For an overview of Carrot2 lexical resources, see:  
1235 - http://download.carrot2.org/head/manual/#chapter.lexical-resources  
1236 - -->  
1237 - <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>  
1238 -  
1239 - <!-- The language to assume for the documents.  
1240 -  
1241 - For a list of allowed values, see:  
1242 - http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage  
1243 - -->  
1244 - <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>  
1245 - </lst>  
1246 - <lst name="engine">  
1247 - <str name="name">stc</str>  
1248 - <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>  
1249 - </lst>  
1250 - </searchComponent>  
1251 -  
1252 - <!-- A request handler for demonstrating the clustering component  
1253 -  
1254 - This is purely as an example.  
1255 -  
1256 - In reality you will likely want to add the component to your  
1257 - already specified request handlers.  
1258 - -->  
1259 - <requestHandler name="/clustering"  
1260 - startup="lazy"  
1261 - enable="${solr.clustering.enabled:false}"  
1262 - class="solr.SearchHandler">  
1263 - <lst name="defaults">  
1264 - <bool name="clustering">true</bool>  
1265 - <str name="clustering.engine">default</str>  
1266 - <bool name="clustering.results">true</bool>  
1267 - <!-- The title field -->  
1268 - <str name="carrot.title">name</str>  
1269 - <str name="carrot.url">id</str>  
1270 - <!-- The field to cluster on -->  
1271 - <str name="carrot.snippet">features</str>  
1272 - <!-- produce summaries -->  
1273 - <bool name="carrot.produceSummary">true</bool>  
1274 - <!-- the maximum number of labels per cluster -->  
1275 - <!--<int name="carrot.numDescriptions">5</int>-->  
1276 - <!-- produce sub clusters -->  
1277 - <bool name="carrot.outputSubClusters">false</bool>  
1278 -  
1279 - <str name="defType">edismax</str>  
1280 - <str name="qf">  
1281 - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4  
1282 - </str>  
1283 - <str name="q.alt">*:*</str>  
1284 - <str name="rows">10</str>  
1285 - <str name="fl">*,score</str>  
1286 - </lst>  
1287 - <arr name="last-components">  
1288 - <str>clustering</str>  
1289 - </arr>  
1290 - </requestHandler>  
1291 -  
1292 - <!-- Terms Component  
1293 -  
1294 - http://wiki.apache.org/solr/TermsComponent  
1295 -  
1296 - A component to return terms and document frequency of those  
1297 - terms  
1298 - -->  
1299 - <searchComponent name="terms" class="solr.TermsComponent"/>  
1300 -  
1301 - <!-- A request handler for demonstrating the terms component -->  
1302 - <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">  
1303 - <lst name="defaults">  
1304 - <bool name="terms">true</bool>  
1305 - </lst>  
1306 - <arr name="components">  
1307 - <str>terms</str>  
1308 - </arr>  
1309 - </requestHandler>  
1310 -  
1311 -  
1312 - <!-- Query Elevation Component  
1313 -  
1314 - http://wiki.apache.org/solr/QueryElevationComponent  
1315 -  
1316 - a search component that enables you to configure the top  
1317 - results for a given query regardless of the normal lucene  
1318 - scoring.  
1319 - -->  
1320 - <searchComponent name="elevator" class="solr.QueryElevationComponent" >  
1321 - <!-- pick a fieldType to analyze queries -->  
1322 - <str name="queryFieldType">string</str>  
1323 - <str name="config-file">elevate.xml</str>  
1324 - </searchComponent>  
1325 -  
1326 - <!-- A request handler for demonstrating the elevator component -->  
1327 - <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">  
1328 - <lst name="defaults">  
1329 - <str name="echoParams">explicit</str>  
1330 - </lst>  
1331 - <arr name="last-components">  
1332 - <str>elevator</str>  
1333 - </arr>  
1334 - </requestHandler>  
1335 -  
1336 - <!-- Highlighting Component  
1337 -  
1338 - http://wiki.apache.org/solr/HighlightingParameters  
1339 - -->  
1340 - <searchComponent class="solr.HighlightComponent" name="highlight">  
1341 - <highlighting>  
1342 - <!-- Configure the standard fragmenter -->  
1343 - <!-- This could most likely be commented out in the "default" case -->  
1344 - <fragmenter name="gap"  
1345 - default="true"  
1346 - class="solr.highlight.GapFragmenter">  
1347 - <lst name="defaults">  
1348 - <int name="hl.fragsize">100</int>  
1349 - </lst>  
1350 - </fragmenter>  
1351 -  
1352 - <!-- A regular-expression-based fragmenter  
1353 - (for sentence extraction)  
1354 - -->  
1355 - <fragmenter name="regex"  
1356 - class="solr.highlight.RegexFragmenter">  
1357 - <lst name="defaults">  
1358 - <!-- slightly smaller fragsizes work better because of slop -->  
1359 - <int name="hl.fragsize">70</int>  
1360 - <!-- allow 50% slop on fragment sizes -->  
1361 - <float name="hl.regex.slop">0.5</float>  
1362 - <!-- a basic sentence pattern -->  
1363 - <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>  
1364 - </lst>  
1365 - </fragmenter>  
1366 -  
1367 - <!-- Configure the standard formatter -->  
1368 - <formatter name="html"  
1369 - default="true"  
1370 - class="solr.highlight.HtmlFormatter">  
1371 - <lst name="defaults">  
1372 - <str name="hl.simple.pre"><![CDATA[<em>]]></str>  
1373 - <str name="hl.simple.post"><![CDATA[</em>]]></str>  
1374 - </lst>  
1375 - </formatter>  
1376 -  
1377 - <!-- Configure the standard encoder -->  
1378 - <encoder name="html"  
1379 - class="solr.highlight.HtmlEncoder" />  
1380 -  
1381 - <!-- Configure the standard fragListBuilder -->  
1382 - <fragListBuilder name="simple"  
1383 - default="true"  
1384 - class="solr.highlight.SimpleFragListBuilder"/>  
1385 -  
1386 - <!-- Configure the single fragListBuilder -->  
1387 - <fragListBuilder name="single"  
1388 - class="solr.highlight.SingleFragListBuilder"/>  
1389 -  
1390 - <!-- default tag FragmentsBuilder -->  
1391 - <fragmentsBuilder name="default"  
1392 - default="true"  
1393 - class="solr.highlight.ScoreOrderFragmentsBuilder">  
1394 - <!--  
1395 - <lst name="defaults">  
1396 - <str name="hl.multiValuedSeparatorChar">/</str>  
1397 - </lst>  
1398 - -->  
1399 - </fragmentsBuilder>  
1400 -  
1401 - <!-- multi-colored tag FragmentsBuilder -->  
1402 - <fragmentsBuilder name="colored"  
1403 - class="solr.highlight.ScoreOrderFragmentsBuilder">  
1404 - <lst name="defaults">  
1405 - <str name="hl.tag.pre"><![CDATA[  
1406 - <b style="background:yellow">,<b style="background:lawgreen">,  
1407 - <b style="background:aquamarine">,<b style="background:magenta">,  
1408 - <b style="background:palegreen">,<b style="background:coral">,  
1409 - <b style="background:wheat">,<b style="background:khaki">,  
1410 - <b style="background:lime">,<b style="background:deepskyblue">]]></str>  
1411 - <str name="hl.tag.post"><![CDATA[</b>]]></str>  
1412 - </lst>  
1413 - </fragmentsBuilder>  
1414 - </highlighting>  
1415 - </searchComponent>  
1416 -  
1417 - <!-- Update Processors  
1418 -  
1419 - Chains of Update Processor Factories for dealing with Update  
1420 - Requests can be declared, and then used by name in Update  
1421 - Request Processors  
1422 -  
1423 - http://wiki.apache.org/solr/UpdateRequestProcessor  
1424 -  
1425 - -->  
1426 - <!-- Deduplication  
1427 -  
1428 - An example dedup update processor that creates the "id" field  
1429 - on the fly based on the hash code of some other fields. This  
1430 - example has overwriteDupes set to false since we are using the  
1431 - id field as the signatureField and Solr will maintain  
1432 - uniqueness based on that anyway.  
1433 -  
1434 - -->  
1435 - <!--  
1436 - <updateRequestProcessorChain name="dedupe">  
1437 - <processor class="solr.processor.SignatureUpdateProcessorFactory">  
1438 - <bool name="enabled">true</bool>  
1439 - <str name="signatureField">id</str>  
1440 - <bool name="overwriteDupes">false</bool>  
1441 - <str name="fields">name,features,cat</str>  
1442 - <str name="signatureClass">solr.processor.Lookup3Signature</str>  
1443 - </processor>  
1444 - <processor class="solr.LogUpdateProcessorFactory" />  
1445 - <processor class="solr.RunUpdateProcessorFactory" />  
1446 - </updateRequestProcessorChain>  
1447 - -->  
1448 -  
1449 - <!-- Response Writers  
1450 -  
1451 - http://wiki.apache.org/solr/QueryResponseWriter  
1452 -  
1453 - Request responses will be written using the writer specified by  
1454 - the 'wt' request parameter matching the name of a registered  
1455 - writer.  
1456 -  
1457 - The "default" writer is the default and will be used if 'wt' is  
1458 - not specified in the request.  
1459 - -->  
1460 - <!-- The following response writers are implicitly configured unless  
1461 - overridden...  
1462 - -->  
1463 - <!--  
1464 - <queryResponseWriter name="xml"  
1465 - default="true"  
1466 - class="solr.XMLResponseWriter" />  
1467 - <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>  
1468 - <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>  
1469 - <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>  
1470 - <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>  
1471 - <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>  
1472 - <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"/>  
1473 - <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>  
1474 - -->  
1475 - <!--  
1476 - Custom response writers can be declared as needed...  
1477 - -->  
1478 - <!--  
1479 - <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>  
1480 - -->  
1481 -  
1482 - <!-- XSLT response writer transforms the XML output by any xslt file found  
1483 - in Solr's conf/xslt directory. Changes to xslt files are checked for  
1484 - every xsltCacheLifetimeSeconds.  
1485 - -->  
1486 - <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">  
1487 - <int name="xsltCacheLifetimeSeconds">5</int>  
1488 - </queryResponseWriter>  
1489 -  
1490 - <!-- Query Parsers  
1491 -  
1492 - http://wiki.apache.org/solr/SolrQuerySyntax  
1493 -  
1494 - Multiple QParserPlugins can be registered by name, and then  
1495 - used in either the "defType" param for the QueryComponent (used  
1496 - by SearchHandler) or in LocalParams  
1497 - -->  
1498 - <!-- example of registering a query parser -->  
1499 - <!--  
1500 - <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>  
1501 - -->  
1502 -  
1503 - <!-- Function Parsers  
1504 -  
1505 - http://wiki.apache.org/solr/FunctionQuery  
1506 -  
1507 - Multiple ValueSourceParsers can be registered by name, and then  
1508 - used as function names when using the "func" QParser.  
1509 - -->  
1510 - <!-- example of registering a custom function parser -->  
1511 - <!--  
1512 - <valueSourceParser name="myfunc"  
1513 - class="com.mycompany.MyValueSourceParser" />  
1514 - -->  
1515 -  
1516 - <!-- Legacy config for the admin interface -->  
1517 - <admin>  
1518 - <defaultQuery>*:*</defaultQuery>  
1519 -  
1520 - <!-- configure a healthcheck file for servers behind a  
1521 - loadbalancer  
1522 - -->  
1523 - <!--  
1524 - <healthcheck type="file">server-enabled</healthcheck>  
1525 - -->  
1526 - </admin>  
1527 -  
1528 - <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">  
1529 - <lst name="defaults">  
1530 - <str name="config">/etc/solr/data-config.xml</str>  
1531 - </lst>  
1532 - </requestHandler>  
1533 -  
1534 -  
1535 -</config>