Commit eae469285098a323fd86383f6fa0f3b4d21c6b19
0 parents
Exists in
master
and in
1 other branch
Commit inicial do projeto
Showing
7 changed files
with
8917 additions
and
0 deletions
Show diff stats
1 | +++ a/Dockerfile | |
... | ... | @@ -0,0 +1,111 @@ |
1 | +########################################################### | |
2 | +# Dockerfile de construção do container Solr | |
3 | +# utilizado pelo SEI e pelo SIP | |
4 | +# | |
5 | +# Baseado no CentOS 6.X | |
6 | +############################################################ | |
7 | + | |
8 | +# Define a imagem de base para o CentOS 6.X | |
9 | +FROM centos:centos6 | |
10 | + | |
11 | +# Autor e mantenedor do container | |
12 | +MAINTAINER MPOG\Guilherme Andrade Del Cantoni | |
13 | + | |
14 | +# Atualização do source list do repositório | |
15 | +RUN yum update -y && yum upgrade -y | |
16 | + | |
17 | +################## INICIO DA INSTALACAO ###################### | |
18 | +# Instalação do MySQL seguindo as recomendações descritas na documentação do SEI | |
19 | +# Ref: http://<WIKI DO PROJETO>#<SESSAO DE INSTALAÇÃO DO MYSQL> | |
20 | + | |
21 | +# Instalação do pacote Java JDK | |
22 | +RUN yum install -y java-1.7.0-openjdk-devel | |
23 | + | |
24 | +# Instalação dos utilitários utilizados no provisionamento | |
25 | +RUN yum install -y wget tar which curl | |
26 | + | |
27 | +# Criação de usuário de execução do serviço | |
28 | +RUN useradd -r -s /bin/bash solr | |
29 | + | |
30 | +# Obtenção do pacote de instalação do Solr 4.9 | |
31 | +RUN wget -q -O /opt/solr-4.9.0.tgz http://archive.apache.org/dist/lucene/solr/4.9.0/solr-4.9.0.tgz | |
32 | + | |
33 | +# Instalação do Solr | |
34 | +RUN tar xvz -C /opt -f /opt/solr-4.9.0.tgz && chown -R solr:solr /opt/solr-4.9.0 && rm /opt/solr-4.9.0.tgz | |
35 | + | |
36 | +# Configuração dos parâmetros default | |
37 | +RUN echo "JAVA_OPTIONS='-Dsolr.solr.home=/opt/solr-4.9.0/example/solr $JAVA_OPTIONS'" | tee -a /etc/default/jetty && \ | |
38 | + echo "JETTY_HOME=/opt/solr-4.9.0/example" | tee -a /etc/default/jetty && \ | |
39 | + echo "JETTY_USER=solr" | tee -a /etc/default/jetty | |
40 | + | |
41 | +# Configuração do script de inicialização automática | |
42 | +RUN wget -q -O /etc/init.d/solr http://dev.eclipse.org/svnroot/rt/org.eclipse.jetty/jetty/trunk/jetty-distribution/src/main/resources/bin/jetty.sh | |
43 | +RUN chmod +x /etc/init.d/solr | |
44 | + | |
45 | +# Criação dos diretório de armazenamento de índices | |
46 | +RUN mkdir /var/opt/sei && \ | |
47 | + mkdir /var/opt/sei/sei-protocolos && ln -s /var/opt/sei/sei-protocolos /opt/solr-4.9.0/example/solr/ && \ | |
48 | + mkdir /var/opt/sei/sei-bases-conhecimento && ln -s /var/opt/sei/sei-bases-conhecimento /opt/solr-4.9.0/example/solr/ && \ | |
49 | + mkdir /var/opt/sei/sei-publicacoes && ln -s /var/opt/sei/sei-publicacoes /opt/solr-4.9.0/example/solr/ | |
50 | + | |
51 | +# Copia do conjunto de configurações padrão do Solr | |
52 | +RUN cp -R /opt/solr-4.9.0/example/solr/collection1/conf /var/opt/sei/sei-protocolos/ && \ | |
53 | + cp -R /opt/solr-4.9.0/example/solr/collection1/conf /var/opt/sei/sei-bases-conhecimento/ && \ | |
54 | + cp -R /opt/solr-4.9.0/example/solr/collection1/conf /var/opt/sei/sei-publicacoes/ | |
55 | + | |
56 | +# Removeção dos arquivos schema.xml e solrconfig.xml das cópias | |
57 | +RUN rm /var/opt/sei/sei-protocolos/conf/schema.xml && \ | |
58 | + rm /var/opt/sei/sei-bases-conhecimento/conf/schema.xml && \ | |
59 | + rm /var/opt/sei/sei-publicacoes/conf/schema.xml && \ | |
60 | + rm /var/opt/sei/sei-protocolos/conf/solrconfig.xml && \ | |
61 | + rm /var/opt/sei/sei-bases-conhecimento/conf/solrconfig.xml && \ | |
62 | + rm /var/opt/sei/sei-publicacoes/conf/solrconfig.xml | |
63 | + | |
64 | +# Configuração dos índices do SEI | |
65 | +ADD index/sei-protocolos-schema.xml /var/opt/sei/sei-protocolos/conf/sei-protocolos-schema.xml | |
66 | +ADD index/sei-bases-conhecimento-schema.xml /var/opt/sei/sei-bases-conhecimento/conf/sei-bases-conhecimento-schema.xml | |
67 | +ADD index/sei-publicacoes-schema.xml /var/opt/sei/sei-publicacoes/conf/sei-publicacoes-schema.xml | |
68 | +ADD index/sei-protocolos-config.xml /var/opt/sei/sei-protocolos/conf/sei-protocolos-config.xml | |
69 | +ADD index/sei-bases-conhecimento-config.xml /var/opt/sei/sei-bases-conhecimento/conf/sei-bases-conhecimento-config.xml | |
70 | +ADD index/sei-publicacoes-config.xml /var/opt/sei/sei-publicacoes/conf/sei-publicacoes-config.xml | |
71 | + | |
72 | +# Criação de diretório de conteúdo para os índices | |
73 | +RUN mkdir /var/opt/sei/sei-protocolos/conteudo && \ | |
74 | + mkdir /var/opt/sei/sei-bases-conhecimento/conteudo && \ | |
75 | + mkdir /var/opt/sei/sei-publicacoes/conteudo | |
76 | + | |
77 | +# Configuração dos links a instalação do Solr e o índice Protocolo | |
78 | +RUN ln -s /opt/solr-4.9.0/contrib/ /var/opt/sei/sei-protocolos/contrib && \ | |
79 | + ln -s /opt/solr-4.9.0/dist/ /var/opt/sei/sei-protocolos/dist && \ | |
80 | + ln -s /opt/solr-4.9.0/example/lib/ /var/opt/sei/sei-protocolos/lib | |
81 | + | |
82 | +# Configuração dos links a instalação do Solr e o índice Base de Conhecimento | |
83 | +RUN ln -s /opt/solr-4.9.0/contrib/ /var/opt/sei/sei-bases-conhecimento/contrib && \ | |
84 | + ln -s /opt/solr-4.9.0/dist/ /var/opt/sei/sei-bases-conhecimento/dist && \ | |
85 | + ln -s /opt/solr-4.9.0/example/lib/ /var/opt/sei/sei-bases-conhecimento/lib | |
86 | + | |
87 | +# Configuração dos links a instalação do Solr e o índice Publicações | |
88 | +RUN ln -s /opt/solr-4.9.0/contrib/ /var/opt/sei/sei-publicacoes/contrib && \ | |
89 | + ln -s /opt/solr-4.9.0/dist/ /var/opt/sei/sei-publicacoes/dist && \ | |
90 | + ln -s /opt/solr-4.9.0/example/lib/ /var/opt/sei/sei-publicacoes/lib | |
91 | + | |
92 | +# Configuração de log e permissões de pastas | |
93 | +RUN mkdir /var/log/solr | |
94 | +RUN chown solr:solr -R /opt/solr-4.9.0 /var/opt/sei /var/log/solr | |
95 | + | |
96 | +# Construção dos índices do SEI | |
97 | +RUN service solr start && sleep 6 && \ | |
98 | + curl http://localhost/solr/admin/cores?action=CREATE&name=seiprotocolos&instanceDir=/var/opt/sei/sei-protocolos&config=sei-protocolosconfig.xml&schema=sei-protocolos-schema.xml&dataDir=/var/opt/sei/sei-protocolos/conteudo && \ | |
99 | + curl http://localhost/solr/admin/cores?action=CREATE&name=sei-basesconhecimento&instanceDir=/var/opt/sei/sei-bases-conhecimento&config=sei-bases-conhecimentoconfig.xml&schema=sei-bases-conhecimento-schema.xml&dataDir=/var/opt/sei/sei-basesconhecimento/conteudo && \ | |
100 | + curl http://localhost/solr/admin/cores?action=CREATE&name=seipublicacoes&instanceDir=/var/opt/sei/sei-publicacoes&config=sei-publicacoesconfig.xml&schema=sei-publicacoes-schema.xml&dataDir=/var/opt/sei/sei-publicacoes/conteudo | |
101 | + | |
102 | +#RUN service solr start && sleep 6 && curl http://localhost/solr/admin/cores?action=RELOAD | |
103 | + | |
104 | +##################### FIM DA INSTALACAO ##################### | |
105 | +WORKDIR /opt/solr-4.9.0/example | |
106 | + | |
107 | +# Expõe a porta padrão do MySQL Server | |
108 | +EXPOSE 8983 | |
109 | + | |
110 | +# Default port to execute the entrypoint (MongoDB) | |
111 | +CMD ["java", "-jar", "start.jar"] | |
0 | 112 | \ No newline at end of file | ... | ... |
1 | +++ a/index/sei-bases-conhecimento-config.xml | |
... | ... | @@ -0,0 +1,1773 @@ |
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<!-- | |
3 | + Licensed to the Apache Software Foundation (ASF) under one or more | |
4 | + contributor license agreements. See the NOTICE file distributed with | |
5 | + this work for additional information regarding copyright ownership. | |
6 | + The ASF licenses this file to You under the Apache License, Version 2.0 | |
7 | + (the "License"); you may not use this file except in compliance with | |
8 | + the License. You may obtain a copy of the License at | |
9 | + | |
10 | + http://www.apache.org/licenses/LICENSE-2.0 | |
11 | + | |
12 | + Unless required by applicable law or agreed to in writing, software | |
13 | + distributed under the License is distributed on an "AS IS" BASIS, | |
14 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | + See the License for the specific language governing permissions and | |
16 | + limitations under the License. | |
17 | +--> | |
18 | + | |
19 | +<!-- | |
20 | + For more details about configurations options that may appear in | |
21 | + this file, see http://wiki.apache.org/solr/SolrConfigXml. | |
22 | +--> | |
23 | +<config> | |
24 | + <!-- In all configuration below, a prefix of "solr." for class names | |
25 | + is an alias that causes solr to search appropriate packages, | |
26 | + including org.apache.solr.(search|update|request|core|analysis) | |
27 | + | |
28 | + You may also specify a fully qualified Java classname if you | |
29 | + have your own custom plugins. | |
30 | + --> | |
31 | + | |
32 | + <!-- Controls what version of Lucene various components of Solr | |
33 | + adhere to. Generally, you want to use the latest version to | |
34 | + get all bug fixes and improvements. It is highly recommended | |
35 | + that you fully re-index after changing this setting as it can | |
36 | + affect both how text is indexed and queried. | |
37 | + --> | |
38 | + <luceneMatchVersion>LUCENE_40</luceneMatchVersion> | |
39 | + | |
40 | + <!-- <lib/> directives can be used to instruct Solr to load an Jars | |
41 | + identified and use them to resolve any "plugins" specified in | |
42 | + your solrconfig.xml or schema.xml (ie: Analyzers, Request | |
43 | + Handlers, etc...). | |
44 | + | |
45 | + All directories and paths are resolved relative to the | |
46 | + instanceDir. | |
47 | + | |
48 | + Please note that <lib/> directives are processed in the order | |
49 | + that they appear in your solrconfig.xml file, and are "stacked" | |
50 | + on top of each other when building a ClassLoader - so if you have | |
51 | + plugin jars with dependencies on other jars, the "lower level" | |
52 | + dependency jars should be loaded first. | |
53 | + | |
54 | + If a "./lib" directory exists in your instanceDir, all files | |
55 | + found in it are included as if you had used the following | |
56 | + syntax... | |
57 | + | |
58 | +--> | |
59 | + <lib dir="./lib" /> | |
60 | + | |
61 | + | |
62 | + <!-- A 'dir' option by itself adds any files found in the directory | |
63 | + to the classpath, this is useful for including all jars in a | |
64 | + directory. | |
65 | + | |
66 | + When a 'regex' is specified in addition to a 'dir', only the | |
67 | + files in that directory which completely match the regex | |
68 | + (anchored on both ends) will be included. | |
69 | + | |
70 | + The examples below can be used to load some solr-contribs along | |
71 | + with their external dependencies. | |
72 | + --> | |
73 | + <lib dir="./contrib/extraction/lib" regex=".*\.jar" /> | |
74 | + <lib dir="./dist/" regex="apache-solr-cell-\d.*\.jar" /> | |
75 | + | |
76 | + <lib dir="./contrib/clustering/lib/" regex=".*\.jar" /> | |
77 | + <lib dir="./dist/" regex="apache-solr-clustering-\d.*\.jar" /> | |
78 | + | |
79 | + <lib dir="./contrib/langid/lib/" regex=".*\.jar" /> | |
80 | + <lib dir="./dist/" regex="apache-solr-langid-\d.*\.jar" /> | |
81 | + | |
82 | + <lib dir="./contrib/velocity/lib" regex=".*\.jar" /> | |
83 | + <lib dir="./dist/" regex="apache-solr-velocity-\d.*\.jar" /> | |
84 | + | |
85 | + <!-- If a 'dir' option (with or without a regex) is used and nothing | |
86 | + is found that matches, it will be ignored | |
87 | + --> | |
88 | + <lib dir="/total/crap/dir/ignored" /> | |
89 | + | |
90 | + <!-- an exact 'path' can be used instead of a 'dir' to specify a | |
91 | + specific jar file. This will cause a serious error to be logged | |
92 | + if it can't be loaded. | |
93 | + --> | |
94 | + <!-- | |
95 | + <lib path="../a-jar-that-does-not-exist.jar" /> | |
96 | + --> | |
97 | + | |
98 | + <!-- Data Directory | |
99 | + | |
100 | + Used to specify an alternate directory to hold all index data | |
101 | + other than the default ./data under the Solr home. If | |
102 | + replication is in use, this should match the replication | |
103 | + configuration. | |
104 | + --> | |
105 | + <dataDir>${solr.data.dir:}</dataDir> | |
106 | + | |
107 | + | |
108 | + <!-- The DirectoryFactory to use for indexes. | |
109 | + | |
110 | + solr.StandardDirectoryFactory is filesystem | |
111 | + based and tries to pick the best implementation for the current | |
112 | + JVM and platform. solr.NRTCachingDirectoryFactory, the default, | |
113 | + wraps solr.StandardDirectoryFactory and caches small files in memory | |
114 | + for better NRT performance. | |
115 | + | |
116 | + One can force a particular implementation via solr.MMapDirectoryFactory, | |
117 | + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. | |
118 | + | |
119 | + solr.RAMDirectoryFactory is memory based, not | |
120 | + persistent, and doesn't work with replication. | |
121 | + --> | |
122 | + <directoryFactory name="DirectoryFactory" | |
123 | + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> | |
124 | + | |
125 | + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
126 | + Index Config - These settings control low-level behavior of indexing | |
127 | + Most example settings here show the default value, but are commented | |
128 | + out, to more easily see where customizations have been made. | |
129 | + | |
130 | + Note: This replaces <indexDefaults> and <mainIndex> from older versions | |
131 | + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> | |
132 | + <indexConfig> | |
133 | + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a | |
134 | + LimitTokenCountFilterFactory in your fieldType definition. E.g. | |
135 | + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> | |
136 | + --> | |
137 | + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> | |
138 | + <!-- <writeLockTimeout>1000</writeLockTimeout> --> | |
139 | + | |
140 | + <!-- Expert: Enabling compound file will use less files for the index, | |
141 | + using fewer file descriptors on the expense of performance decrease. | |
142 | + Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> | |
143 | + <!-- <useCompoundFile>false</useCompoundFile> --> | |
144 | + | |
145 | + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene | |
146 | + indexing for buffering added documents and deletions before they are | |
147 | + flushed to the Directory. | |
148 | + maxBufferedDocs sets a limit on the number of documents buffered | |
149 | + before flushing. | |
150 | + If both ramBufferSizeMB and maxBufferedDocs is set, then | |
151 | + Lucene will flush based on whichever limit is hit first. --> | |
152 | + <!-- <ramBufferSizeMB>32</ramBufferSizeMB> --> | |
153 | + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> | |
154 | + | |
155 | + <!-- Expert: Merge Policy | |
156 | + The Merge Policy in Lucene controls how merging of segments is done. | |
157 | + The default since Solr/Lucene 3.3 is TieredMergePolicy. | |
158 | + The default since Lucene 2.3 was the LogByteSizeMergePolicy, | |
159 | + Even older versions of Lucene used LogDocMergePolicy. | |
160 | + --> | |
161 | + <!-- | |
162 | + <mergePolicy class="org.apache.lucene.index.TieredMergePolicy"> | |
163 | + <int name="maxMergeAtOnce">10</int> | |
164 | + <int name="segmentsPerTier">10</int> | |
165 | + </mergePolicy> | |
166 | + --> | |
167 | + | |
168 | + <!-- Merge Factor | |
169 | + The merge factor controls how many segments will get merged at a time. | |
170 | + For TieredMergePolicy, mergeFactor is a convenience parameter which | |
171 | + will set both MaxMergeAtOnce and SegmentsPerTier at once. | |
172 | + For LogByteSizeMergePolicy, mergeFactor decides how many new segments | |
173 | + will be allowed before they are merged into one. | |
174 | + Default is 10 for both merge policies. | |
175 | + --> | |
176 | + <!-- | |
177 | + <mergeFactor>10</mergeFactor> | |
178 | + --> | |
179 | + | |
180 | + <!-- Expert: Merge Scheduler | |
181 | + The Merge Scheduler in Lucene controls how merges are | |
182 | + performed. The ConcurrentMergeScheduler (Lucene 2.3 default) | |
183 | + can perform merges in the background using separate threads. | |
184 | + The SerialMergeScheduler (Lucene 2.2 default) does not. | |
185 | + --> | |
186 | + <!-- | |
187 | + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> | |
188 | + --> | |
189 | + | |
190 | + <!-- LockFactory | |
191 | + | |
192 | + This option specifies which Lucene LockFactory implementation | |
193 | + to use. | |
194 | + | |
195 | + single = SingleInstanceLockFactory - suggested for a | |
196 | + read-only index or when there is no possibility of | |
197 | + another process trying to modify the index. | |
198 | + native = NativeFSLockFactory - uses OS native file locking. | |
199 | + Do not use when multiple solr webapps in the same | |
200 | + JVM are attempting to share a single index. | |
201 | + simple = SimpleFSLockFactory - uses a plain file for locking | |
202 | + | |
203 | + Defaults: 'native' is default for Solr3.6 and later, otherwise | |
204 | + 'simple' is the default | |
205 | + | |
206 | + More details on the nuances of each LockFactory... | |
207 | + http://wiki.apache.org/lucene-java/AvailableLockFactories | |
208 | + --> | |
209 | + <!-- <lockType>native</lockType> --> | |
210 | + | |
211 | + <!-- Unlock On Startup | |
212 | + | |
213 | + If true, unlock any held write or commit locks on startup. | |
214 | + This defeats the locking mechanism that allows multiple | |
215 | + processes to safely access a lucene index, and should be used | |
216 | + with care. Default is "false". | |
217 | + | |
218 | + This is not needed if lock type is 'none' or 'single' | |
219 | + --> | |
220 | + <!-- | |
221 | + <unlockOnStartup>false</unlockOnStartup> | |
222 | + --> | |
223 | + | |
224 | + <!-- Expert: Controls how often Lucene loads terms into memory | |
225 | + Default is 128 and is likely good for most everyone. | |
226 | + --> | |
227 | + <!-- <termIndexInterval>128</termIndexInterval> --> | |
228 | + | |
229 | + <!-- If true, IndexReaders will be reopened (often more efficient) | |
230 | + instead of closed and then opened. Default: true | |
231 | + --> | |
232 | + <!-- | |
233 | + <reopenReaders>true</reopenReaders> | |
234 | + --> | |
235 | + | |
236 | + <!-- Commit Deletion Policy | |
237 | + | |
238 | + Custom deletion policies can be specified here. The class must | |
239 | + implement org.apache.lucene.index.IndexDeletionPolicy. | |
240 | + | |
241 | + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html | |
242 | + | |
243 | + The default Solr IndexDeletionPolicy implementation supports | |
244 | + deleting index commit points on number of commits, age of | |
245 | + commit point and optimized status. | |
246 | + | |
247 | + The latest commit point should always be preserved regardless | |
248 | + of the criteria. | |
249 | + --> | |
250 | + <!-- | |
251 | + <deletionPolicy class="solr.SolrDeletionPolicy"> | |
252 | + --> | |
253 | + <!-- The number of commit points to be kept --> | |
254 | + <!-- <str name="maxCommitsToKeep">1</str> --> | |
255 | + <!-- The number of optimized commit points to be kept --> | |
256 | + <!-- <str name="maxOptimizedCommitsToKeep">0</str> --> | |
257 | + <!-- | |
258 | + Delete all commit points once they have reached the given age. | |
259 | + Supports DateMathParser syntax e.g. | |
260 | + --> | |
261 | + <!-- | |
262 | + <str name="maxCommitAge">30MINUTES</str> | |
263 | + <str name="maxCommitAge">1DAY</str> | |
264 | + --> | |
265 | + <!-- | |
266 | + </deletionPolicy> | |
267 | + --> | |
268 | + | |
269 | + <!-- Lucene Infostream | |
270 | + | |
271 | + To aid in advanced debugging, Lucene provides an "InfoStream" | |
272 | + of detailed information when indexing. | |
273 | + | |
274 | + Setting The value to true will instruct the underlying Lucene | |
275 | + IndexWriter to write its debugging info the specified file | |
276 | + --> | |
277 | + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> | |
278 | + </indexConfig> | |
279 | + | |
280 | + | |
281 | + <!-- JMX | |
282 | + | |
283 | + This example enables JMX if and only if an existing MBeanServer | |
284 | + is found, use this if you want to configure JMX through JVM | |
285 | + parameters. Remove this to disable exposing Solr configuration | |
286 | + and statistics to JMX. | |
287 | + | |
288 | + For more details see http://wiki.apache.org/solr/SolrJmx | |
289 | + --> | |
290 | + <jmx /> | |
291 | + <!-- If you want to connect to a particular server, specify the | |
292 | + agentId | |
293 | + --> | |
294 | + <!-- <jmx agentId="myAgent" /> --> | |
295 | + <!-- If you want to start a new MBeanServer, specify the serviceUrl --> | |
296 | + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> | |
297 | + --> | |
298 | + | |
299 | + <!-- The default high-performance update handler --> | |
300 | + <updateHandler class="solr.DirectUpdateHandler2"> | |
301 | + | |
302 | + <!-- AutoCommit | |
303 | + | |
304 | + Perform a hard commit automatically under certain conditions. | |
305 | + Instead of enabling autoCommit, consider using "commitWithin" | |
306 | + when adding documents. | |
307 | + | |
308 | + http://wiki.apache.org/solr/UpdateXmlMessages | |
309 | + | |
310 | + maxDocs - Maximum number of documents to add since the last | |
311 | + commit before automatically triggering a new commit. | |
312 | + | |
313 | + maxTime - Maximum amount of time in ms that is allowed to pass | |
314 | + since a document was added before automaticly | |
315 | + triggering a new commit. | |
316 | + openSearcher - if false, the commit causes recent index changes | |
317 | + to be flushed to stable storage, but does not cause a new | |
318 | + searcher to be opened to make those changes visible. | |
319 | + --> | |
320 | + <autoCommit> | |
321 | + <maxTime>300000</maxTime> | |
322 | + <openSearcher>false</openSearcher> | |
323 | + </autoCommit> | |
324 | + | |
325 | + <!-- softAutoCommit is like autoCommit except it causes a | |
326 | + 'soft' commit which only ensures that changes are visible | |
327 | + but does not ensure that data is synced to disk. This is | |
328 | + faster and more near-realtime friendly than a hard commit. | |
329 | + --> | |
330 | + <!-- | |
331 | + <autoSoftCommit> | |
332 | + <maxTime>1000</maxTime> | |
333 | + </autoSoftCommit> | |
334 | + --> | |
335 | + | |
336 | + <!-- Update Related Event Listeners | |
337 | + | |
338 | + Various IndexWriter related events can trigger Listeners to | |
339 | + take actions. | |
340 | + | |
341 | + postCommit - fired after every commit or optimize command | |
342 | + postOptimize - fired after every optimize command | |
343 | + --> | |
344 | + <!-- The RunExecutableListener executes an external command from a | |
345 | + hook such as postCommit or postOptimize. | |
346 | + | |
347 | + exe - the name of the executable to run | |
348 | + dir - dir to use as the current working directory. (default=".") | |
349 | + wait - the calling thread waits until the executable returns. | |
350 | + (default="true") | |
351 | + args - the arguments to pass to the program. (default is none) | |
352 | + env - environment variables to set. (default is none) | |
353 | + --> | |
354 | + <!-- This example shows how RunExecutableListener could be used | |
355 | + with the script based replication... | |
356 | + http://wiki.apache.org/solr/CollectionDistribution | |
357 | + --> | |
358 | + <!-- | |
359 | + <listener event="postCommit" class="solr.RunExecutableListener"> | |
360 | + <str name="exe">solr/bin/snapshooter</str> | |
361 | + <str name="dir">.</str> | |
362 | + <bool name="wait">true</bool> | |
363 | + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> | |
364 | + <arr name="env"> <str>MYVAR=val1</str> </arr> | |
365 | + </listener> | |
366 | + --> | |
367 | + | |
368 | + <!-- Enables a transaction log, currently used for real-time get. | |
369 | + "dir" - the target directory for transaction logs, defaults to the | |
370 | + solr data directory. --> | |
371 | + <updateLog> | |
372 | + <str name="dir">${solr.data.dir:}</str> | |
373 | + </updateLog> | |
374 | + | |
375 | + | |
376 | + </updateHandler> | |
377 | + | |
378 | + <!-- IndexReaderFactory | |
379 | + | |
380 | + Use the following format to specify a custom IndexReaderFactory, | |
381 | + which allows for alternate IndexReader implementations. | |
382 | + | |
383 | + ** Experimental Feature ** | |
384 | + | |
385 | + Please note - Using a custom IndexReaderFactory may prevent | |
386 | + certain other features from working. The API to | |
387 | + IndexReaderFactory may change without warning or may even be | |
388 | + removed from future releases if the problems cannot be | |
389 | + resolved. | |
390 | + | |
391 | + | |
392 | + ** Features that may not work with custom IndexReaderFactory ** | |
393 | + | |
394 | + The ReplicationHandler assumes a disk-resident index. Using a | |
395 | + custom IndexReader implementation may cause incompatibility | |
396 | + with ReplicationHandler and may cause replication to not work | |
397 | + correctly. See SOLR-1366 for details. | |
398 | + | |
399 | + --> | |
400 | + <!-- | |
401 | + <indexReaderFactory name="IndexReaderFactory" class="package.class"> | |
402 | + <str name="someArg">Some Value</str> | |
403 | + </indexReaderFactory > | |
404 | + --> | |
405 | + <!-- By explicitly declaring the Factory, the termIndexDivisor can | |
406 | + be specified. | |
407 | + --> | |
408 | + <!-- | |
409 | + <indexReaderFactory name="IndexReaderFactory" | |
410 | + class="solr.StandardIndexReaderFactory"> | |
411 | + <int name="setTermIndexDivisor">12</int> | |
412 | + </indexReaderFactory > | |
413 | + --> | |
414 | + | |
415 | + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
416 | + Query section - these settings control query time things like caches | |
417 | + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> | |
418 | + <query> | |
419 | + <!-- Max Boolean Clauses | |
420 | + | |
421 | + Maximum number of clauses in each BooleanQuery, an exception | |
422 | + is thrown if exceeded. | |
423 | + | |
424 | + ** WARNING ** | |
425 | + | |
426 | + This option actually modifies a global Lucene property that | |
427 | + will affect all SolrCores. If multiple solrconfig.xml files | |
428 | + disagree on this property, the value at any given moment will | |
429 | + be based on the last SolrCore to be initialized. | |
430 | + | |
431 | + --> | |
432 | + <maxBooleanClauses>1024</maxBooleanClauses> | |
433 | + | |
434 | + | |
435 | + <!-- Solr Internal Query Caches | |
436 | + | |
437 | + There are two implementations of cache available for Solr, | |
438 | + LRUCache, based on a synchronized LinkedHashMap, and | |
439 | + FastLRUCache, based on a ConcurrentHashMap. | |
440 | + | |
441 | + FastLRUCache has faster gets and slower puts in single | |
442 | + threaded operation and thus is generally faster than LRUCache | |
443 | + when the hit ratio of the cache is high (> 75%), and may be | |
444 | + faster under other scenarios on multi-cpu systems. | |
445 | + --> | |
446 | + | |
447 | + <!-- Filter Cache | |
448 | + | |
449 | + Cache used by SolrIndexSearcher for filters (DocSets), | |
450 | + unordered sets of *all* documents that match a query. When a | |
451 | + new searcher is opened, its caches may be prepopulated or | |
452 | + "autowarmed" using data from caches in the old searcher. | |
453 | + autowarmCount is the number of items to prepopulate. For | |
454 | + LRUCache, the autowarmed items will be the most recently | |
455 | + accessed items. | |
456 | + | |
457 | + Parameters: | |
458 | + class - the SolrCache implementation LRUCache or | |
459 | + (LRUCache or FastLRUCache) | |
460 | + size - the maximum number of entries in the cache | |
461 | + initialSize - the initial capacity (number of entries) of | |
462 | + the cache. (see java.util.HashMap) | |
463 | + autowarmCount - the number of entries to prepopulate from | |
464 | + and old cache. | |
465 | + --> | |
466 | + <filterCache class="solr.FastLRUCache" | |
467 | + size="512" | |
468 | + initialSize="512" | |
469 | + autowarmCount="0"/> | |
470 | + | |
471 | + <!-- Query Result Cache | |
472 | + | |
473 | + Caches results of searches - ordered lists of document ids | |
474 | + (DocList) based on a query, a sort, and the range of documents requested. | |
475 | + --> | |
476 | + <queryResultCache class="solr.LRUCache" | |
477 | + size="512" | |
478 | + initialSize="512" | |
479 | + autowarmCount="0"/> | |
480 | + | |
481 | + <!-- Document Cache | |
482 | + | |
483 | + Caches Lucene Document objects (the stored fields for each | |
484 | + document). Since Lucene internal document ids are transient, | |
485 | + this cache will not be autowarmed. | |
486 | + --> | |
487 | + <documentCache class="solr.LRUCache" | |
488 | + size="512" | |
489 | + initialSize="512" | |
490 | + autowarmCount="0"/> | |
491 | + | |
492 | + <!-- Field Value Cache | |
493 | + | |
494 | + Cache used to hold field values that are quickly accessible | |
495 | + by document id. The fieldValueCache is created by default | |
496 | + even if not configured here. | |
497 | + --> | |
498 | + <!-- | |
499 | + <fieldValueCache class="solr.FastLRUCache" | |
500 | + size="512" | |
501 | + autowarmCount="128" | |
502 | + showItems="32" /> | |
503 | + --> | |
504 | + | |
505 | + <!-- Custom Cache | |
506 | + | |
507 | + Example of a generic cache. These caches may be accessed by | |
508 | + name through SolrIndexSearcher.getCache(),cacheLookup(), and | |
509 | + cacheInsert(). The purpose is to enable easy caching of | |
510 | + user/application level data. The regenerator argument should | |
511 | + be specified as an implementation of solr.CacheRegenerator | |
512 | + if autowarming is desired. | |
513 | + --> | |
514 | + <!-- | |
515 | + <cache name="myUserCache" | |
516 | + class="solr.LRUCache" | |
517 | + size="4096" | |
518 | + initialSize="1024" | |
519 | + autowarmCount="1024" | |
520 | + regenerator="com.mycompany.MyRegenerator" | |
521 | + /> | |
522 | + --> | |
523 | + | |
524 | + | |
525 | + <!-- Lazy Field Loading | |
526 | + | |
527 | + If true, stored fields that are not requested will be loaded | |
528 | + lazily. This can result in a significant speed improvement | |
529 | + if the usual case is to not load all stored fields, | |
530 | + especially if the skipped fields are large compressed text | |
531 | + fields. | |
532 | + --> | |
533 | + <enableLazyFieldLoading>true</enableLazyFieldLoading> | |
534 | + | |
535 | + <!-- Use Filter For Sorted Query | |
536 | + | |
537 | + A possible optimization that attempts to use a filter to | |
538 | + satisfy a search. If the requested sort does not include | |
539 | + score, then the filterCache will be checked for a filter | |
540 | + matching the query. If found, the filter will be used as the | |
541 | + source of document ids, and then the sort will be applied to | |
542 | + that. | |
543 | + | |
544 | + For most situations, this will not be useful unless you | |
545 | + frequently get the same search repeatedly with different sort | |
546 | + options, and none of them ever use "score" | |
547 | + --> | |
548 | + <!-- | |
549 | + <useFilterForSortedQuery>true</useFilterForSortedQuery> | |
550 | + --> | |
551 | + | |
552 | + <!-- Result Window Size | |
553 | + | |
554 | + An optimization for use with the queryResultCache. When a search | |
555 | + is requested, a superset of the requested number of document ids | |
556 | + are collected. For example, if a search for a particular query | |
557 | + requests matching documents 10 through 19, and queryWindowSize is 50, | |
558 | + then documents 0 through 49 will be collected and cached. Any further | |
559 | + requests in that range can be satisfied via the cache. | |
560 | + --> | |
561 | + <queryResultWindowSize>20</queryResultWindowSize> | |
562 | + | |
563 | + <!-- Maximum number of documents to cache for any entry in the | |
564 | + queryResultCache. | |
565 | + --> | |
566 | + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> | |
567 | + | |
568 | + <!-- Query Related Event Listeners | |
569 | + | |
570 | + Various IndexSearcher related events can trigger Listeners to | |
571 | + take actions. | |
572 | + | |
573 | + newSearcher - fired whenever a new searcher is being prepared | |
574 | + and there is a current searcher handling requests (aka | |
575 | + registered). It can be used to prime certain caches to | |
576 | + prevent long request times for certain requests. | |
577 | + | |
578 | + firstSearcher - fired whenever a new searcher is being | |
579 | + prepared but there is no current registered searcher to handle | |
580 | + requests or to gain autowarming data from. | |
581 | + | |
582 | + | |
583 | + --> | |
584 | + <!-- QuerySenderListener takes an array of NamedList and executes a | |
585 | + local query request for each NamedList in sequence. | |
586 | + --> | |
587 | + <listener event="newSearcher" class="solr.QuerySenderListener"> | |
588 | + <arr name="queries"> | |
589 | + <!-- | |
590 | + <lst><str name="q">solr</str><str name="sort">price asc</str></lst> | |
591 | + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> | |
592 | + --> | |
593 | + </arr> | |
594 | + </listener> | |
595 | + <listener event="firstSearcher" class="solr.QuerySenderListener"> | |
596 | + <arr name="queries"> | |
597 | + <lst> | |
598 | + <str name="q">static firstSearcher warming in solrconfig.xml</str> | |
599 | + </lst> | |
600 | + </arr> | |
601 | + </listener> | |
602 | + | |
603 | + <!-- Use Cold Searcher | |
604 | + | |
605 | + If a search request comes in and there is no current | |
606 | + registered searcher, then immediately register the still | |
607 | + warming searcher and use it. If "false" then all requests | |
608 | + will block until the first searcher is done warming. | |
609 | + --> | |
610 | + <useColdSearcher>false</useColdSearcher> | |
611 | + | |
612 | + <!-- Max Warming Searchers | |
613 | + | |
614 | + Maximum number of searchers that may be warming in the | |
615 | + background concurrently. An error is returned if this limit | |
616 | + is exceeded. | |
617 | + | |
618 | + Recommend values of 1-2 for read-only slaves, higher for | |
619 | + masters w/o cache warming. | |
620 | + --> | |
621 | + <maxWarmingSearchers>2</maxWarmingSearchers> | |
622 | + | |
623 | + </query> | |
624 | + | |
625 | + | |
626 | + <!-- Request Dispatcher | |
627 | + | |
628 | + This section contains instructions for how the SolrDispatchFilter | |
629 | + should behave when processing requests for this SolrCore. | |
630 | + | |
631 | + handleSelect is a legacy option that affects the behavior of requests | |
632 | + such as /select?qt=XXX | |
633 | + | |
634 | + handleSelect="true" will cause the SolrDispatchFilter to process | |
635 | + the request and dispatch the query to a handler specified by the | |
636 | + "qt" param, assuming "/select" isn't already registered. | |
637 | + | |
638 | + handleSelect="false" will cause the SolrDispatchFilter to | |
639 | + ignore "/select" requests, resulting in a 404 unless a handler | |
640 | + is explicitly registered with the name "/select" | |
641 | + | |
642 | + handleSelect="true" is not recommended for new users, but is the default | |
643 | + for backwards compatibility | |
644 | + --> | |
645 | + <requestDispatcher handleSelect="false" > | |
646 | + <!-- Request Parsing | |
647 | + | |
648 | + These settings indicate how Solr Requests may be parsed, and | |
649 | + what restrictions may be placed on the ContentStreams from | |
650 | + those requests | |
651 | + | |
652 | + enableRemoteStreaming - enables use of the stream.file | |
653 | + and stream.url parameters for specifying remote streams. | |
654 | + | |
655 | + multipartUploadLimitInKB - specifies the max size of | |
656 | + Multipart File Uploads that Solr will allow in a Request. | |
657 | + | |
658 | + *** WARNING *** | |
659 | + The settings below authorize Solr to fetch remote files, You | |
660 | + should make sure your system has some authentication before | |
661 | + using enableRemoteStreaming="true" | |
662 | + | |
663 | + --> | |
664 | + <requestParsers enableRemoteStreaming="true" | |
665 | + multipartUploadLimitInKB="2048000" /> | |
666 | + | |
667 | + <!-- HTTP Caching | |
668 | + | |
669 | + Set HTTP caching related parameters (for proxy caches and clients). | |
670 | + | |
671 | + The options below instruct Solr not to output any HTTP Caching | |
672 | + related headers | |
673 | + --> | |
674 | + <httpCaching never304="true" /> | |
675 | + <!-- If you include a <cacheControl> directive, it will be used to | |
676 | + generate a Cache-Control header (as well as an Expires header | |
677 | + if the value contains "max-age=") | |
678 | + | |
679 | + By default, no Cache-Control header is generated. | |
680 | + | |
681 | + You can use the <cacheControl> option even if you have set | |
682 | + never304="true" | |
683 | + --> | |
684 | + <!-- | |
685 | + <httpCaching never304="true" > | |
686 | + <cacheControl>max-age=30, public</cacheControl> | |
687 | + </httpCaching> | |
688 | + --> | |
689 | + <!-- To enable Solr to respond with automatically generated HTTP | |
690 | + Caching headers, and to response to Cache Validation requests | |
691 | + correctly, set the value of never304="false" | |
692 | + | |
693 | + This will cause Solr to generate Last-Modified and ETag | |
694 | + headers based on the properties of the Index. | |
695 | + | |
696 | + The following options can also be specified to affect the | |
697 | + values of these headers... | |
698 | + | |
699 | + lastModFrom - the default value is "openTime" which means the | |
700 | + Last-Modified value (and validation against If-Modified-Since | |
701 | + requests) will all be relative to when the current Searcher | |
702 | + was opened. You can change it to lastModFrom="dirLastMod" if | |
703 | + you want the value to exactly correspond to when the physical | |
704 | + index was last modified. | |
705 | + | |
706 | + etagSeed="..." is an option you can change to force the ETag | |
707 | + header (and validation against If-None-Match requests) to be | |
708 | + different even if the index has not changed (ie: when making | |
709 | + significant changes to your config file) | |
710 | + | |
711 | + (lastModifiedFrom and etagSeed are both ignored if you use | |
712 | + the never304="true" option) | |
713 | + --> | |
714 | + <!-- | |
715 | + <httpCaching lastModifiedFrom="openTime" | |
716 | + etagSeed="Solr"> | |
717 | + <cacheControl>max-age=30, public</cacheControl> | |
718 | + </httpCaching> | |
719 | + --> | |
720 | + </requestDispatcher> | |
721 | + | |
722 | + <!-- Request Handlers | |
723 | + | |
724 | + http://wiki.apache.org/solr/SolrRequestHandler | |
725 | + | |
726 | + Incoming queries will be dispatched to a specific handler by name | |
727 | + based on the path specified in the request. | |
728 | + | |
729 | + Legacy behavior: If the request path uses "/select" but no Request | |
730 | + Handler has that name, and if handleSelect="true" has been specified in | |
731 | + the requestDispatcher, then the Request Handler is dispatched based on | |
732 | + the qt parameter. Handlers without a leading '/' are accessed this way | |
733 | + like so: http://host/app/[core/]select?qt=name If no qt is | |
734 | + given, then the requestHandler that declares default="true" will be | |
735 | + used or the one named "standard". | |
736 | + | |
737 | + If a Request Handler is declared with startup="lazy", then it will | |
738 | + not be initialized until the first request that uses it. | |
739 | + | |
740 | + --> | |
741 | + <!-- SearchHandler | |
742 | + | |
743 | + http://wiki.apache.org/solr/SearchHandler | |
744 | + | |
745 | + For processing Search Queries, the primary Request Handler | |
746 | + provided with Solr is "SearchHandler" It delegates to a sequent | |
747 | + of SearchComponents (see below) and supports distributed | |
748 | + queries across multiple shards | |
749 | + --> | |
750 | + <requestHandler name="/select" class="solr.SearchHandler"> | |
751 | + <!-- default values for query parameters can be specified, these | |
752 | + will be overridden by parameters in the request | |
753 | + --> | |
754 | + <lst name="defaults"> | |
755 | + <str name="echoParams">explicit</str> | |
756 | + <int name="rows">10</int> | |
757 | + <str name="df">text</str> | |
758 | + </lst> | |
759 | + <!-- In addition to defaults, "appends" params can be specified | |
760 | + to identify values which should be appended to the list of | |
761 | + multi-val params from the query (or the existing "defaults"). | |
762 | + --> | |
763 | + <!-- In this example, the param "fq=instock:true" would be appended to | |
764 | + any query time fq params the user may specify, as a mechanism for | |
765 | + partitioning the index, independent of any user selected filtering | |
766 | + that may also be desired (perhaps as a result of faceted searching). | |
767 | + | |
768 | + NOTE: there is *absolutely* nothing a client can do to prevent these | |
769 | + "appends" values from being used, so don't use this mechanism | |
770 | + unless you are sure you always want it. | |
771 | + --> | |
772 | + <!-- | |
773 | + <lst name="appends"> | |
774 | + <str name="fq">inStock:true</str> | |
775 | + </lst> | |
776 | + --> | |
777 | + <!-- "invariants" are a way of letting the Solr maintainer lock down | |
778 | + the options available to Solr clients. Any params values | |
779 | + specified here are used regardless of what values may be specified | |
780 | + in either the query, the "defaults", or the "appends" params. | |
781 | + | |
782 | + In this example, the facet.field and facet.query params would | |
783 | + be fixed, limiting the facets clients can use. Faceting is | |
784 | + not turned on by default - but if the client does specify | |
785 | + facet=true in the request, these are the only facets they | |
786 | + will be able to see counts for; regardless of what other | |
787 | + facet.field or facet.query params they may specify. | |
788 | + | |
789 | + NOTE: there is *absolutely* nothing a client can do to prevent these | |
790 | + "invariants" values from being used, so don't use this mechanism | |
791 | + unless you are sure you always want it. | |
792 | + --> | |
793 | + <!-- | |
794 | + <lst name="invariants"> | |
795 | + <str name="facet.field">cat</str> | |
796 | + <str name="facet.field">manu_exact</str> | |
797 | + <str name="facet.query">price:[* TO 500]</str> | |
798 | + <str name="facet.query">price:[500 TO *]</str> | |
799 | + </lst> | |
800 | + --> | |
801 | + <!-- If the default list of SearchComponents is not desired, that | |
802 | + list can either be overridden completely, or components can be | |
803 | + prepended or appended to the default list. (see below) | |
804 | + --> | |
805 | + <!-- | |
806 | + <arr name="components"> | |
807 | + <str>nameOfCustomComponent1</str> | |
808 | + <str>nameOfCustomComponent2</str> | |
809 | + </arr> | |
810 | + --> | |
811 | + | |
812 | + </requestHandler> | |
813 | + | |
814 | + <!-- A request handler that returns indented JSON by default --> | |
815 | + <requestHandler name="/query" class="solr.SearchHandler"> | |
816 | + <lst name="defaults"> | |
817 | + <str name="echoParams">explicit</str> | |
818 | + <str name="wt">json</str> | |
819 | + <str name="indent">true</str> | |
820 | + <str name="df">text</str> | |
821 | + </lst> | |
822 | + </requestHandler> | |
823 | + | |
824 | + | |
825 | + <!-- realtime get handler, guaranteed to return the latest stored fields of | |
826 | + any document, without the need to commit or open a new searcher. The | |
827 | + current implementation relies on the updateLog feature being enabled. --> | |
828 | + <requestHandler name="/get" class="solr.RealTimeGetHandler"> | |
829 | + <lst name="defaults"> | |
830 | + <str name="omitHeader">true</str> | |
831 | + <str name="wt">json</str> | |
832 | + <str name="indent">true</str> | |
833 | + </lst> | |
834 | + </requestHandler> | |
835 | + | |
836 | + | |
837 | + <!-- A Robust Example | |
838 | + | |
839 | + This example SearchHandler declaration shows off usage of the | |
840 | + SearchHandler with many defaults declared | |
841 | + | |
842 | + Note that multiple instances of the same Request Handler | |
843 | + (SearchHandler) can be registered multiple times with different | |
844 | + names (and different init parameters) | |
845 | + --> | |
846 | + <requestHandler name="/browse" class="solr.SearchHandler"> | |
847 | + <lst name="defaults"> | |
848 | + <str name="echoParams">explicit</str> | |
849 | + | |
850 | + <!-- VelocityResponseWriter settings --> | |
851 | + <str name="wt">velocity</str> | |
852 | + <str name="v.template">browse</str> | |
853 | + <str name="v.layout">layout</str> | |
854 | + <str name="title">Solritas</str> | |
855 | + | |
856 | + <!-- Query settings --> | |
857 | + <str name="defType">edismax</str> | |
858 | + <str name="qf"> | |
859 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
860 | + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 | |
861 | + </str> | |
862 | + <str name="df">text</str> | |
863 | + <str name="mm">100%</str> | |
864 | + <str name="q.alt">*:*</str> | |
865 | + <str name="rows">10</str> | |
866 | + <str name="fl">*,score</str> | |
867 | + | |
868 | + <str name="mlt.qf"> | |
869 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
870 | + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 | |
871 | + </str> | |
872 | + <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str> | |
873 | + <int name="mlt.count">3</int> | |
874 | + | |
875 | + <!-- Faceting defaults --> | |
876 | + <str name="facet">on</str> | |
877 | + <str name="facet.field">cat</str> | |
878 | + <str name="facet.field">manu_exact</str> | |
879 | + <str name="facet.field">content_type</str> | |
880 | + <str name="facet.field">author_s</str> | |
881 | + <str name="facet.query">ipod</str> | |
882 | + <str name="facet.query">GB</str> | |
883 | + <str name="facet.mincount">1</str> | |
884 | + <str name="facet.pivot">cat,inStock</str> | |
885 | + <str name="facet.range.other">after</str> | |
886 | + <str name="facet.range">price</str> | |
887 | + <int name="f.price.facet.range.start">0</int> | |
888 | + <int name="f.price.facet.range.end">600</int> | |
889 | + <int name="f.price.facet.range.gap">50</int> | |
890 | + <str name="facet.range">popularity</str> | |
891 | + <int name="f.popularity.facet.range.start">0</int> | |
892 | + <int name="f.popularity.facet.range.end">10</int> | |
893 | + <int name="f.popularity.facet.range.gap">3</int> | |
894 | + <str name="facet.range">manufacturedate_dt</str> | |
895 | + <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str> | |
896 | + <str name="f.manufacturedate_dt.facet.range.end">NOW</str> | |
897 | + <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str> | |
898 | + <str name="f.manufacturedate_dt.facet.range.other">before</str> | |
899 | + <str name="f.manufacturedate_dt.facet.range.other">after</str> | |
900 | + | |
901 | + <!-- Highlighting defaults --> | |
902 | + <str name="hl">on</str> | |
903 | + <str name="hl.fl">content</str> | |
904 | + <str name="hl.encoder">html</str> | |
905 | + <str name="hl.simple.pre"><![CDATA[<b>]]></str> | |
906 | + <str name="hl.simple.post"><![CDATA[</b>]]></str> | |
907 | + <str name="f.title.hl.fragsize">0</str> | |
908 | + <str name="f.title.hl.alternateField">title</str> | |
909 | + <str name="f.name.hl.fragsize">0</str> | |
910 | + <str name="f.name.hl.alternateField">name</str> | |
911 | + <str name="f.content.hl.snippets">3</str> | |
912 | + <str name="f.content.hl.fragsize">1000</str> | |
913 | + <str name="f.content.hl.alternateField">content</str> | |
914 | + <str name="f.content.hl.maxAlternateFieldLength">250</str> | |
915 | + | |
916 | + | |
917 | + <!-- Spell checking defaults --> | |
918 | + <str name="spellcheck">on</str> | |
919 | + <str name="spellcheck.extendedResults">false</str> | |
920 | + <str name="spellcheck.count">5</str> | |
921 | + <str name="spellcheck.alternativeTermCount">2</str> | |
922 | + <str name="spellcheck.maxResultsForSuggest">5</str> | |
923 | + <str name="spellcheck.collate">true</str> | |
924 | + <str name="spellcheck.collateExtendedResults">true</str> | |
925 | + <str name="spellcheck.maxCollationTries">5</str> | |
926 | + <str name="spellcheck.maxCollations">3</str> | |
927 | + </lst> | |
928 | + | |
929 | + <!-- append spellchecking to our list of components --> | |
930 | + <arr name="last-components"> | |
931 | + <str>spellcheck</str> | |
932 | + </arr> | |
933 | + </requestHandler> | |
934 | + | |
935 | + | |
936 | + <!-- Update Request Handler. | |
937 | + | |
938 | + http://wiki.apache.org/solr/UpdateXmlMessages | |
939 | + | |
940 | + The canonical Request Handler for Modifying the Index through | |
941 | + commands specified using XML, JSON, CSV, or JAVABIN | |
942 | + | |
943 | + Note: Since solr1.1 requestHandlers requires a valid content | |
944 | + type header if posted in the body. For example, curl now | |
945 | + requires: -H 'Content-type:text/xml; charset=utf-8' | |
946 | + | |
947 | + To override the request content type and force a specific | |
948 | + Content-type, use the request parameter: | |
949 | + ?update.contentType=text/csv | |
950 | + | |
951 | + This handler will pick a response format to match the input | |
952 | + if the 'wt' parameter is not explicit | |
953 | + --> | |
954 | + <requestHandler name="/update" class="solr.UpdateRequestHandler"> | |
955 | + <!-- See below for information on defining | |
956 | + updateRequestProcessorChains that can be used by name | |
957 | + on each Update Request | |
958 | + --> | |
959 | + <!-- | |
960 | + <lst name="defaults"> | |
961 | + <str name="update.chain">dedupe</str> | |
962 | + </lst> | |
963 | + --> | |
964 | + </requestHandler> | |
965 | + | |
966 | + <!-- for back compat with clients using /update/json and /update/csv --> | |
967 | + <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler"> | |
968 | + <lst name="defaults"> | |
969 | + <str name="stream.contentType">application/json</str> | |
970 | + </lst> | |
971 | + </requestHandler> | |
972 | + <requestHandler name="/update/csv" class="solr.CSVRequestHandler"> | |
973 | + <lst name="defaults"> | |
974 | + <str name="stream.contentType">application/csv</str> | |
975 | + </lst> | |
976 | + </requestHandler> | |
977 | + | |
978 | + <!-- Solr Cell Update Request Handler | |
979 | + | |
980 | + http://wiki.apache.org/solr/ExtractingRequestHandler | |
981 | + | |
982 | + --> | |
983 | + <requestHandler name="/update/extract" | |
984 | + startup="lazy" | |
985 | + class="solr.extraction.ExtractingRequestHandler" > | |
986 | + <lst name="defaults"> | |
987 | + <str name="lowernames">true</str> | |
988 | + <str name="uprefix">ignored_</str> | |
989 | + | |
990 | + <!-- capture link hrefs but ignore div attributes --> | |
991 | + <str name="captureAttr">true</str> | |
992 | + <str name="fmap.a">links</str> | |
993 | + <str name="fmap.div">ignored_</str> | |
994 | + </lst> | |
995 | + </requestHandler> | |
996 | + | |
997 | + | |
998 | + <!-- Field Analysis Request Handler | |
999 | + | |
1000 | + RequestHandler that provides much the same functionality as | |
1001 | + analysis.jsp. Provides the ability to specify multiple field | |
1002 | + types and field names in the same request and outputs | |
1003 | + index-time and query-time analysis for each of them. | |
1004 | + | |
1005 | + Request parameters are: | |
1006 | + analysis.fieldname - field name whose analyzers are to be used | |
1007 | + | |
1008 | + analysis.fieldtype - field type whose analyzers are to be used | |
1009 | + analysis.fieldvalue - text for index-time analysis | |
1010 | + q (or analysis.q) - text for query time analysis | |
1011 | + analysis.showmatch (true|false) - When set to true and when | |
1012 | + query analysis is performed, the produced tokens of the | |
1013 | + field value analysis will be marked as "matched" for every | |
1014 | + token that is produces by the query analysis | |
1015 | + --> | |
1016 | + <requestHandler name="/analysis/field" | |
1017 | + startup="lazy" | |
1018 | + class="solr.FieldAnalysisRequestHandler" /> | |
1019 | + | |
1020 | + | |
1021 | + <!-- Document Analysis Handler | |
1022 | + | |
1023 | + http://wiki.apache.org/solr/AnalysisRequestHandler | |
1024 | + | |
1025 | + An analysis handler that provides a breakdown of the analysis | |
1026 | + process of provided documents. This handler expects a (single) | |
1027 | + content stream with the following format: | |
1028 | + | |
1029 | + <docs> | |
1030 | + <doc> | |
1031 | + <field name="id">1</field> | |
1032 | + <field name="name">The Name</field> | |
1033 | + <field name="text">The Text Value</field> | |
1034 | + </doc> | |
1035 | + <doc>...</doc> | |
1036 | + <doc>...</doc> | |
1037 | + ... | |
1038 | + </docs> | |
1039 | + | |
1040 | + Note: Each document must contain a field which serves as the | |
1041 | + unique key. This key is used in the returned response to associate | |
1042 | + an analysis breakdown to the analyzed document. | |
1043 | + | |
1044 | + Like the FieldAnalysisRequestHandler, this handler also supports | |
1045 | + query analysis by sending either an "analysis.query" or "q" | |
1046 | + request parameter that holds the query text to be analyzed. It | |
1047 | + also supports the "analysis.showmatch" parameter which when set to | |
1048 | + true, all field tokens that match the query tokens will be marked | |
1049 | + as a "match". | |
1050 | + --> | |
1051 | + <requestHandler name="/analysis/document" | |
1052 | + class="solr.DocumentAnalysisRequestHandler" | |
1053 | + startup="lazy" /> | |
1054 | + | |
1055 | + <!-- Admin Handlers | |
1056 | + | |
1057 | + Admin Handlers - This will register all the standard admin | |
1058 | + RequestHandlers. | |
1059 | + --> | |
1060 | + <requestHandler name="/admin/" | |
1061 | + class="solr.admin.AdminHandlers" /> | |
1062 | + <!-- This single handler is equivalent to the following... --> | |
1063 | + <!-- | |
1064 | + <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" /> | |
1065 | + <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" /> | |
1066 | + <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" /> | |
1067 | + <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" /> | |
1068 | + <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" /> | |
1069 | + <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" > | |
1070 | + --> | |
1071 | + <!-- If you wish to hide files under ${solr.home}/conf, explicitly | |
1072 | + register the ShowFileRequestHandler using: | |
1073 | + --> | |
1074 | + <!-- | |
1075 | + <requestHandler name="/admin/file" | |
1076 | + class="solr.admin.ShowFileRequestHandler" > | |
1077 | + <lst name="invariants"> | |
1078 | + <str name="hidden">synonyms.txt</str> | |
1079 | + <str name="hidden">anotherfile.txt</str> | |
1080 | + </lst> | |
1081 | + </requestHandler> | |
1082 | + --> | |
1083 | + | |
1084 | + <!-- ping/healthcheck --> | |
1085 | + <requestHandler name="/admin/ping" class="solr.PingRequestHandler"> | |
1086 | + <lst name="invariants"> | |
1087 | + <str name="q">solrpingquery</str> | |
1088 | + </lst> | |
1089 | + <lst name="defaults"> | |
1090 | + <str name="echoParams">all</str> | |
1091 | + </lst> | |
1092 | + <!-- An optional feature of the PingRequestHandler is to configure the | |
1093 | + handler with a "healthcheckFile" which can be used to enable/disable | |
1094 | + the PingRequestHandler. | |
1095 | + relative paths are resolved against the data dir | |
1096 | + --> | |
1097 | + <!-- <str name="healthcheckFile">server-enabled.txt</str> --> | |
1098 | + </requestHandler> | |
1099 | + | |
1100 | + <!-- Echo the request contents back to the client --> | |
1101 | + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > | |
1102 | + <lst name="defaults"> | |
1103 | + <str name="echoParams">explicit</str> | |
1104 | + <str name="echoHandler">true</str> | |
1105 | + </lst> | |
1106 | + </requestHandler> | |
1107 | + | |
1108 | + <!-- Solr Replication | |
1109 | + | |
1110 | + The SolrReplicationHandler supports replicating indexes from a | |
1111 | + "master" used for indexing and "slaves" used for queries. | |
1112 | + | |
1113 | + http://wiki.apache.org/solr/SolrReplication | |
1114 | + | |
1115 | + It is also neccessary for SolrCloud to function (in Cloud mode, the | |
1116 | + replication handler is used to bulk transfer segments when nodes | |
1117 | + are added or need to recover). | |
1118 | + | |
1119 | + https://wiki.apache.org/solr/SolrCloud/ | |
1120 | + --> | |
1121 | + <requestHandler name="/replication" class="solr.ReplicationHandler" > | |
1122 | + <!-- | |
1123 | + To enable simple master/slave replication, uncomment one of the | |
1124 | + sections below, depending on wether this solr instance should be | |
1125 | + the "master" or a "slave". If this instance is a "slave" you will | |
1126 | + also need to fill in the masterUrl to point to a real machine. | |
1127 | + --> | |
1128 | + <!-- | |
1129 | + <lst name="master"> | |
1130 | + <str name="replicateAfter">commit</str> | |
1131 | + <str name="replicateAfter">startup</str> | |
1132 | + <str name="confFiles">schema.xml,stopwords.txt</str> | |
1133 | + </lst> | |
1134 | + --> | |
1135 | + <!-- | |
1136 | + <lst name="slave"> | |
1137 | + <str name="masterUrl">http://your-master-hostname:8983/solr</str> | |
1138 | + <str name="pollInterval">00:00:60</str> | |
1139 | + </lst> | |
1140 | + --> | |
1141 | + </requestHandler> | |
1142 | + | |
1143 | + <!-- Search Components | |
1144 | + | |
1145 | + Search components are registered to SolrCore and used by | |
1146 | + instances of SearchHandler (which can access them by name) | |
1147 | + | |
1148 | + By default, the following components are available: | |
1149 | + | |
1150 | + <searchComponent name="query" class="solr.QueryComponent" /> | |
1151 | + <searchComponent name="facet" class="solr.FacetComponent" /> | |
1152 | + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" /> | |
1153 | + <searchComponent name="highlight" class="solr.HighlightComponent" /> | |
1154 | + <searchComponent name="stats" class="solr.StatsComponent" /> | |
1155 | + <searchComponent name="debug" class="solr.DebugComponent" /> | |
1156 | + | |
1157 | + Default configuration in a requestHandler would look like: | |
1158 | + | |
1159 | + <arr name="components"> | |
1160 | + <str>query</str> | |
1161 | + <str>facet</str> | |
1162 | + <str>mlt</str> | |
1163 | + <str>highlight</str> | |
1164 | + <str>stats</str> | |
1165 | + <str>debug</str> | |
1166 | + </arr> | |
1167 | + | |
1168 | + If you register a searchComponent to one of the standard names, | |
1169 | + that will be used instead of the default. | |
1170 | + | |
1171 | + To insert components before or after the 'standard' components, use: | |
1172 | + | |
1173 | + <arr name="first-components"> | |
1174 | + <str>myFirstComponentName</str> | |
1175 | + </arr> | |
1176 | + | |
1177 | + <arr name="last-components"> | |
1178 | + <str>myLastComponentName</str> | |
1179 | + </arr> | |
1180 | + | |
1181 | + NOTE: The component registered with the name "debug" will | |
1182 | + always be executed after the "last-components" | |
1183 | + | |
1184 | + --> | |
1185 | + | |
1186 | + <!-- Spell Check | |
1187 | + | |
1188 | + The spell check component can return a list of alternative spelling | |
1189 | + suggestions. | |
1190 | + | |
1191 | + http://wiki.apache.org/solr/SpellCheckComponent | |
1192 | + --> | |
1193 | + <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> | |
1194 | + | |
1195 | + <str name="queryAnalyzerFieldType">textSpell</str> | |
1196 | + | |
1197 | + <!-- Multiple "Spell Checkers" can be declared and used by this | |
1198 | + component | |
1199 | + --> | |
1200 | + | |
1201 | + <!-- a spellchecker built from a field of the main index --> | |
1202 | + <lst name="spellchecker"> | |
1203 | + <str name="name">default</str> | |
1204 | + <str name="field">name</str> | |
1205 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1206 | + <!-- the spellcheck distance measure used, the default is the internal levenshtein --> | |
1207 | + <str name="distanceMeasure">internal</str> | |
1208 | + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> | |
1209 | + <float name="accuracy">0.5</float> | |
1210 | + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> | |
1211 | + <int name="maxEdits">2</int> | |
1212 | + <!-- the minimum shared prefix when enumerating terms --> | |
1213 | + <int name="minPrefix">1</int> | |
1214 | + <!-- maximum number of inspections per result. --> | |
1215 | + <int name="maxInspections">5</int> | |
1216 | + <!-- minimum length of a query term to be considered for correction --> | |
1217 | + <int name="minQueryLength">4</int> | |
1218 | + <!-- maximum threshold of documents a query term can appear to be considered for correction --> | |
1219 | + <float name="maxQueryFrequency">0.01</float> | |
1220 | + <!-- uncomment this to require suggestions to occur in 1% of the documents | |
1221 | + <float name="thresholdTokenFrequency">.01</float> | |
1222 | + --> | |
1223 | + </lst> | |
1224 | + | |
1225 | + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> | |
1226 | + <lst name="spellchecker"> | |
1227 | + <str name="name">wordbreak</str> | |
1228 | + <str name="classname">solr.WordBreakSolrSpellChecker</str> | |
1229 | + <str name="field">name</str> | |
1230 | + <str name="combineWords">true</str> | |
1231 | + <str name="breakWords">true</str> | |
1232 | + <int name="maxChanges">10</int> | |
1233 | + </lst> | |
1234 | + | |
1235 | + <!-- a spellchecker that uses a different distance measure --> | |
1236 | + <!-- | |
1237 | + <lst name="spellchecker"> | |
1238 | + <str name="name">jarowinkler</str> | |
1239 | + <str name="field">spell</str> | |
1240 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1241 | + <str name="distanceMeasure"> | |
1242 | + org.apache.lucene.search.spell.JaroWinklerDistance | |
1243 | + </str> | |
1244 | + </lst> | |
1245 | + --> | |
1246 | + | |
1247 | + <!-- a spellchecker that use an alternate comparator | |
1248 | + | |
1249 | + comparatorClass be one of: | |
1250 | + 1. score (default) | |
1251 | + 2. freq (Frequency first, then score) | |
1252 | + 3. A fully qualified class name | |
1253 | + --> | |
1254 | + <!-- | |
1255 | + <lst name="spellchecker"> | |
1256 | + <str name="name">freq</str> | |
1257 | + <str name="field">lowerfilt</str> | |
1258 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1259 | + <str name="comparatorClass">freq</str> | |
1260 | + --> | |
1261 | + | |
1262 | + <!-- A spellchecker that reads the list of words from a file --> | |
1263 | + <!-- | |
1264 | + <lst name="spellchecker"> | |
1265 | + <str name="classname">solr.FileBasedSpellChecker</str> | |
1266 | + <str name="name">file</str> | |
1267 | + <str name="sourceLocation">spellings.txt</str> | |
1268 | + <str name="characterEncoding">UTF-8</str> | |
1269 | + <str name="spellcheckIndexDir">spellcheckerFile</str> | |
1270 | + </lst> | |
1271 | + --> | |
1272 | + </searchComponent> | |
1273 | + | |
1274 | + <!-- A request handler for demonstrating the spellcheck component. | |
1275 | + | |
1276 | + NOTE: This is purely as an example. The whole purpose of the | |
1277 | + SpellCheckComponent is to hook it into the request handler that | |
1278 | + handles your normal user queries so that a separate request is | |
1279 | + not needed to get suggestions. | |
1280 | + | |
1281 | + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS | |
1282 | + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! | |
1283 | + | |
1284 | + See http://wiki.apache.org/solr/SpellCheckComponent for details | |
1285 | + on the request parameters. | |
1286 | + --> | |
1287 | + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> | |
1288 | + <lst name="defaults"> | |
1289 | + <str name="df">text</str> | |
1290 | + <!-- Solr will use suggestions from both the 'default' spellchecker | |
1291 | + and from the 'wordbreak' spellchecker and combine them. | |
1292 | + collations (re-written queries) can include a combination of | |
1293 | + corrections from both spellcheckers --> | |
1294 | + <str name="spellcheck.dictionary">default</str> | |
1295 | + <str name="spellcheck.dictionary">wordbreak</str> | |
1296 | + <str name="spellcheck">on</str> | |
1297 | + <str name="spellcheck.extendedResults">true</str> | |
1298 | + <str name="spellcheck.count">10</str> | |
1299 | + <str name="spellcheck.alternativeTermCount">5</str> | |
1300 | + <str name="spellcheck.maxResultsForSuggest">5</str> | |
1301 | + <str name="spellcheck.collate">true</str> | |
1302 | + <str name="spellcheck.collateExtendedResults">true</str> | |
1303 | + <str name="spellcheck.maxCollationTries">10</str> | |
1304 | + <str name="spellcheck.maxCollations">5</str> | |
1305 | + </lst> | |
1306 | + <arr name="last-components"> | |
1307 | + <str>spellcheck</str> | |
1308 | + </arr> | |
1309 | + </requestHandler> | |
1310 | + | |
1311 | + <!-- Term Vector Component | |
1312 | + | |
1313 | + http://wiki.apache.org/solr/TermVectorComponent | |
1314 | + --> | |
1315 | + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> | |
1316 | + | |
1317 | + <!-- A request handler for demonstrating the term vector component | |
1318 | + | |
1319 | + This is purely as an example. | |
1320 | + | |
1321 | + In reality you will likely want to add the component to your | |
1322 | + already specified request handlers. | |
1323 | + --> | |
1324 | + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> | |
1325 | + <lst name="defaults"> | |
1326 | + <str name="df">text</str> | |
1327 | + <bool name="tv">true</bool> | |
1328 | + </lst> | |
1329 | + <arr name="last-components"> | |
1330 | + <str>tvComponent</str> | |
1331 | + </arr> | |
1332 | + </requestHandler> | |
1333 | + | |
1334 | + <!-- Clustering Component | |
1335 | + | |
1336 | + http://wiki.apache.org/solr/ClusteringComponent | |
1337 | + | |
1338 | + You'll need to set the solr.cluster.enabled system property | |
1339 | + when running solr to run with clustering enabled: | |
1340 | + | |
1341 | + java -Dsolr.clustering.enabled=true -jar start.jar | |
1342 | + | |
1343 | + --> | |
1344 | + <searchComponent name="clustering" | |
1345 | + enable="${solr.clustering.enabled:false}" | |
1346 | + class="solr.clustering.ClusteringComponent" > | |
1347 | + <!-- Declare an engine --> | |
1348 | + <lst name="engine"> | |
1349 | + <!-- The name, only one can be named "default" --> | |
1350 | + <str name="name">default</str> | |
1351 | + | |
1352 | + <!-- Class name of Carrot2 clustering algorithm. | |
1353 | + | |
1354 | + Currently available algorithms are: | |
1355 | + | |
1356 | + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm | |
1357 | + * org.carrot2.clustering.stc.STCClusteringAlgorithm | |
1358 | + * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm | |
1359 | + | |
1360 | + See http://project.carrot2.org/algorithms.html for the | |
1361 | + algorithm's characteristics. | |
1362 | + --> | |
1363 | + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str> | |
1364 | + | |
1365 | + <!-- Overriding values for Carrot2 default algorithm attributes. | |
1366 | + | |
1367 | + For a description of all available attributes, see: | |
1368 | + http://download.carrot2.org/stable/manual/#chapter.components. | |
1369 | + Use attribute key as name attribute of str elements | |
1370 | + below. These can be further overridden for individual | |
1371 | + requests by specifying attribute key as request parameter | |
1372 | + name and attribute value as parameter value. | |
1373 | + --> | |
1374 | + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str> | |
1375 | + | |
1376 | + <!-- Location of Carrot2 lexical resources. | |
1377 | + | |
1378 | + A directory from which to load Carrot2-specific stop words | |
1379 | + and stop labels. Absolute or relative to Solr config directory. | |
1380 | + If a specific resource (e.g. stopwords.en) is present in the | |
1381 | + specified dir, it will completely override the corresponding | |
1382 | + default one that ships with Carrot2. | |
1383 | + | |
1384 | + For an overview of Carrot2 lexical resources, see: | |
1385 | + http://download.carrot2.org/head/manual/#chapter.lexical-resources | |
1386 | + --> | |
1387 | + <str name="carrot.lexicalResourcesDir">clustering/carrot2</str> | |
1388 | + | |
1389 | + <!-- The language to assume for the documents. | |
1390 | + | |
1391 | + For a list of allowed values, see: | |
1392 | + http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage | |
1393 | + --> | |
1394 | + <str name="MultilingualClustering.defaultLanguage">PORTUGUESE</str> | |
1395 | + </lst> | |
1396 | + <lst name="engine"> | |
1397 | + <str name="name">stc</str> | |
1398 | + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str> | |
1399 | + </lst> | |
1400 | + </searchComponent> | |
1401 | + | |
1402 | + <!-- A request handler for demonstrating the clustering component | |
1403 | + | |
1404 | + This is purely as an example. | |
1405 | + | |
1406 | + In reality you will likely want to add the component to your | |
1407 | + already specified request handlers. | |
1408 | + --> | |
1409 | + <requestHandler name="/clustering" | |
1410 | + startup="lazy" | |
1411 | + enable="${solr.clustering.enabled:false}" | |
1412 | + class="solr.SearchHandler"> | |
1413 | + <lst name="defaults"> | |
1414 | + <bool name="clustering">true</bool> | |
1415 | + <str name="clustering.engine">default</str> | |
1416 | + <bool name="clustering.results">true</bool> | |
1417 | + <!-- The title field --> | |
1418 | + <str name="carrot.title">name</str> | |
1419 | + <str name="carrot.url">id</str> | |
1420 | + <!-- The field to cluster on --> | |
1421 | + <str name="carrot.snippet">features</str> | |
1422 | + <!-- produce summaries --> | |
1423 | + <bool name="carrot.produceSummary">true</bool> | |
1424 | + <!-- the maximum number of labels per cluster --> | |
1425 | + <!--<int name="carrot.numDescriptions">5</int>--> | |
1426 | + <!-- produce sub clusters --> | |
1427 | + <bool name="carrot.outputSubClusters">false</bool> | |
1428 | + | |
1429 | + <str name="defType">edismax</str> | |
1430 | + <str name="qf"> | |
1431 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
1432 | + </str> | |
1433 | + <str name="q.alt">*:*</str> | |
1434 | + <str name="rows">10</str> | |
1435 | + <str name="fl">*,score</str> | |
1436 | + </lst> | |
1437 | + <arr name="last-components"> | |
1438 | + <str>clustering</str> | |
1439 | + </arr> | |
1440 | + </requestHandler> | |
1441 | + | |
1442 | + <!-- Terms Component | |
1443 | + | |
1444 | + http://wiki.apache.org/solr/TermsComponent | |
1445 | + | |
1446 | + A component to return terms and document frequency of those | |
1447 | + terms | |
1448 | + --> | |
1449 | + <searchComponent name="terms" class="solr.TermsComponent"/> | |
1450 | + | |
1451 | + <!-- A request handler for demonstrating the terms component --> | |
1452 | + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> | |
1453 | + <lst name="defaults"> | |
1454 | + <bool name="terms">true</bool> | |
1455 | + <bool name="distrib">false</bool> | |
1456 | + </lst> | |
1457 | + <arr name="components"> | |
1458 | + <str>terms</str> | |
1459 | + </arr> | |
1460 | + </requestHandler> | |
1461 | + | |
1462 | + | |
1463 | + <!-- Query Elevation Component | |
1464 | + | |
1465 | + http://wiki.apache.org/solr/QueryElevationComponent | |
1466 | + | |
1467 | + a search component that enables you to configure the top | |
1468 | + results for a given query regardless of the normal lucene | |
1469 | + scoring. | |
1470 | + --> | |
1471 | + <searchComponent name="elevator" class="solr.QueryElevationComponent" > | |
1472 | + <!-- pick a fieldType to analyze queries --> | |
1473 | + <str name="queryFieldType">string</str> | |
1474 | + <str name="config-file">elevate.xml</str> | |
1475 | + </searchComponent> | |
1476 | + | |
1477 | + <!-- A request handler for demonstrating the elevator component --> | |
1478 | + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> | |
1479 | + <lst name="defaults"> | |
1480 | + <str name="echoParams">explicit</str> | |
1481 | + <str name="df">text</str> | |
1482 | + </lst> | |
1483 | + <arr name="last-components"> | |
1484 | + <str>elevator</str> | |
1485 | + </arr> | |
1486 | + </requestHandler> | |
1487 | + | |
1488 | + <!-- Highlighting Component | |
1489 | + | |
1490 | + http://wiki.apache.org/solr/HighlightingParameters | |
1491 | + --> | |
1492 | + <searchComponent class="solr.HighlightComponent" name="highlight"> | |
1493 | + <highlighting> | |
1494 | + <!-- Configure the standard fragmenter --> | |
1495 | + <!-- This could most likely be commented out in the "default" case --> | |
1496 | + <fragmenter name="gap" | |
1497 | + default="true" | |
1498 | + class="solr.highlight.GapFragmenter"> | |
1499 | + <lst name="defaults"> | |
1500 | + <int name="hl.fragsize">250</int> | |
1501 | + </lst> | |
1502 | + </fragmenter> | |
1503 | + | |
1504 | + <!-- A regular-expression-based fragmenter | |
1505 | + (for sentence extraction) | |
1506 | + --> | |
1507 | + <fragmenter name="regex" | |
1508 | + class="solr.highlight.RegexFragmenter"> | |
1509 | + <lst name="defaults"> | |
1510 | + <!-- slightly smaller fragsizes work better because of slop --> | |
1511 | + <int name="hl.fragsize">250</int> | |
1512 | + <!-- allow 50% slop on fragment sizes --> | |
1513 | + <float name="hl.regex.slop">0.5</float> | |
1514 | + <!-- a basic sentence pattern --> | |
1515 | + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> | |
1516 | + </lst> | |
1517 | + </fragmenter> | |
1518 | + | |
1519 | + <!-- Configure the standard formatter --> | |
1520 | + <formatter name="html" | |
1521 | + default="true" | |
1522 | + class="solr.highlight.HtmlFormatter"> | |
1523 | + <lst name="defaults"> | |
1524 | + <str name="hl.simple.pre"><![CDATA[<b>]]></str> | |
1525 | + <str name="hl.simple.post"><![CDATA[</b>]]></str> | |
1526 | + </lst> | |
1527 | + </formatter> | |
1528 | + | |
1529 | + <!-- Configure the standard encoder --> | |
1530 | + <encoder name="html" | |
1531 | + class="solr.highlight.HtmlEncoder" /> | |
1532 | + | |
1533 | + <!-- Configure the standard fragListBuilder --> | |
1534 | + <fragListBuilder name="simple" | |
1535 | + class="solr.highlight.SimpleFragListBuilder"/> | |
1536 | + | |
1537 | + <!-- Configure the single fragListBuilder --> | |
1538 | + <fragListBuilder name="single" | |
1539 | + class="solr.highlight.SingleFragListBuilder"/> | |
1540 | + | |
1541 | + <!-- Configure the weighted fragListBuilder --> | |
1542 | + <fragListBuilder name="weighted" | |
1543 | + default="true" | |
1544 | + class="solr.highlight.WeightedFragListBuilder"/> | |
1545 | + | |
1546 | + <!-- default tag FragmentsBuilder --> | |
1547 | + <fragmentsBuilder name="default" | |
1548 | + default="true" | |
1549 | + class="solr.highlight.ScoreOrderFragmentsBuilder"> | |
1550 | + <!-- | |
1551 | + <lst name="defaults"> | |
1552 | + <str name="hl.multiValuedSeparatorChar">/</str> | |
1553 | + </lst> | |
1554 | + --> | |
1555 | + </fragmentsBuilder> | |
1556 | + | |
1557 | + <!-- multi-colored tag FragmentsBuilder --> | |
1558 | + <fragmentsBuilder name="colored" | |
1559 | + class="solr.highlight.ScoreOrderFragmentsBuilder"> | |
1560 | + <lst name="defaults"> | |
1561 | + <str name="hl.tag.pre"><![CDATA[ | |
1562 | + <b style="background:yellow">,<b style="background:lawgreen">, | |
1563 | + <b style="background:aquamarine">,<b style="background:magenta">, | |
1564 | + <b style="background:palegreen">,<b style="background:coral">, | |
1565 | + <b style="background:wheat">,<b style="background:khaki">, | |
1566 | + <b style="background:lime">,<b style="background:deepskyblue">]]></str> | |
1567 | + <str name="hl.tag.post"><![CDATA[</b>]]></str> | |
1568 | + </lst> | |
1569 | + </fragmentsBuilder> | |
1570 | + | |
1571 | + <boundaryScanner name="default" | |
1572 | + default="true" | |
1573 | + class="solr.highlight.SimpleBoundaryScanner"> | |
1574 | + <lst name="defaults"> | |
1575 | + <str name="hl.bs.maxScan">10</str> | |
1576 | + <str name="hl.bs.chars">.,!? | |
1577 | + | |
1578 | +</str> | |
1579 | + </lst> | |
1580 | + </boundaryScanner> | |
1581 | + | |
1582 | + <boundaryScanner name="breakIterator" | |
1583 | + class="solr.highlight.BreakIteratorBoundaryScanner"> | |
1584 | + <lst name="defaults"> | |
1585 | + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE --> | |
1586 | + <str name="hl.bs.type">WORD</str> | |
1587 | + <!-- language and country are used when constructing Locale object. --> | |
1588 | + <!-- And the Locale object will be used when getting instance of BreakIterator --> | |
1589 | + <str name="hl.bs.language">pt</str> | |
1590 | + <str name="hl.bs.country">BR</str> | |
1591 | + </lst> | |
1592 | + </boundaryScanner> | |
1593 | + </highlighting> | |
1594 | + </searchComponent> | |
1595 | + | |
1596 | + <!-- Update Processors | |
1597 | + | |
1598 | + Chains of Update Processor Factories for dealing with Update | |
1599 | + Requests can be declared, and then used by name in Update | |
1600 | + Request Processors | |
1601 | + | |
1602 | + http://wiki.apache.org/solr/UpdateRequestProcessor | |
1603 | + | |
1604 | + --> | |
1605 | + <!-- Deduplication | |
1606 | + | |
1607 | + An example dedup update processor that creates the "id" field | |
1608 | + on the fly based on the hash code of some other fields. This | |
1609 | + example has overwriteDupes set to false since we are using the | |
1610 | + id field as the signatureField and Solr will maintain | |
1611 | + uniqueness based on that anyway. | |
1612 | + | |
1613 | + --> | |
1614 | + <!-- | |
1615 | + <updateRequestProcessorChain name="dedupe"> | |
1616 | + <processor class="solr.processor.SignatureUpdateProcessorFactory"> | |
1617 | + <bool name="enabled">true</bool> | |
1618 | + <str name="signatureField">id</str> | |
1619 | + <bool name="overwriteDupes">false</bool> | |
1620 | + <str name="fields">name,features,cat</str> | |
1621 | + <str name="signatureClass">solr.processor.Lookup3Signature</str> | |
1622 | + </processor> | |
1623 | + <processor class="solr.LogUpdateProcessorFactory" /> | |
1624 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1625 | + </updateRequestProcessorChain> | |
1626 | + --> | |
1627 | + | |
1628 | + <!-- Language identification | |
1629 | + | |
1630 | + This example update chain identifies the language of the incoming | |
1631 | + documents using the langid contrib. The detected language is | |
1632 | + written to field language_s. No field name mapping is done. | |
1633 | + The fields used for detection are text, title, subject and description, | |
1634 | + making this example suitable for detecting languages form full-text | |
1635 | + rich documents injected via ExtractingRequestHandler. | |
1636 | + See more about langId at http://wiki.apache.org/solr/LanguageDetection | |
1637 | + --> | |
1638 | + <!-- | |
1639 | + <updateRequestProcessorChain name="langid"> | |
1640 | + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory"> | |
1641 | + <str name="langid.fl">text,title,subject,description</str> | |
1642 | + <str name="langid.langField">language_s</str> | |
1643 | + <str name="langid.fallback">en</str> | |
1644 | + </processor> | |
1645 | + <processor class="solr.LogUpdateProcessorFactory" /> | |
1646 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1647 | + </updateRequestProcessorChain> | |
1648 | + --> | |
1649 | + | |
1650 | + <!-- Script update processor | |
1651 | + | |
1652 | + This example hooks in an update processor implemented using JavaScript. | |
1653 | + | |
1654 | + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor | |
1655 | + --> | |
1656 | + <!-- | |
1657 | + <updateRequestProcessorChain name="script"> | |
1658 | + <processor class="solr.StatelessScriptUpdateProcessorFactory"> | |
1659 | + <str name="script">update-script.js</str> | |
1660 | + <lst name="params"> | |
1661 | + <str name="config_param">example config parameter</str> | |
1662 | + </lst> | |
1663 | + </processor> | |
1664 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1665 | + </updateRequestProcessorChain> | |
1666 | + --> | |
1667 | + | |
1668 | + <!-- Response Writers | |
1669 | + | |
1670 | + http://wiki.apache.org/solr/QueryResponseWriter | |
1671 | + | |
1672 | + Request responses will be written using the writer specified by | |
1673 | + the 'wt' request parameter matching the name of a registered | |
1674 | + writer. | |
1675 | + | |
1676 | + The "default" writer is the default and will be used if 'wt' is | |
1677 | + not specified in the request. | |
1678 | + --> | |
1679 | + <!-- The following response writers are implicitly configured unless | |
1680 | + overridden... | |
1681 | + --> | |
1682 | + <!-- | |
1683 | + <queryResponseWriter name="xml" | |
1684 | + default="true" | |
1685 | + class="solr.XMLResponseWriter" /> | |
1686 | + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> | |
1687 | + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> | |
1688 | + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> | |
1689 | + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> | |
1690 | + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> | |
1691 | + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> | |
1692 | + --> | |
1693 | + | |
1694 | + <queryResponseWriter name="json" class="solr.JSONResponseWriter"> | |
1695 | + <!-- For the purposes of the tutorial, JSON responses are written as | |
1696 | + plain text so that they are easy to read in *any* browser. | |
1697 | + If you expect a MIME type of "application/json" just remove this override. | |
1698 | + --> | |
1699 | + <str name="content-type">text/plain; charset=UTF-8</str> | |
1700 | + </queryResponseWriter> | |
1701 | + | |
1702 | + <!-- | |
1703 | + Custom response writers can be declared as needed... | |
1704 | + --> | |
1705 | + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/> | |
1706 | + | |
1707 | + | |
1708 | + <!-- XSLT response writer transforms the XML output by any xslt file found | |
1709 | + in Solr's conf/xslt directory. Changes to xslt files are checked for | |
1710 | + every xsltCacheLifetimeSeconds. | |
1711 | + --> | |
1712 | + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> | |
1713 | + <int name="xsltCacheLifetimeSeconds">5</int> | |
1714 | + </queryResponseWriter> | |
1715 | + | |
1716 | + <!-- Query Parsers | |
1717 | + | |
1718 | + http://wiki.apache.org/solr/SolrQuerySyntax | |
1719 | + | |
1720 | + Multiple QParserPlugins can be registered by name, and then | |
1721 | + used in either the "defType" param for the QueryComponent (used | |
1722 | + by SearchHandler) or in LocalParams | |
1723 | + --> | |
1724 | + <!-- example of registering a query parser --> | |
1725 | + <!-- | |
1726 | + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/> | |
1727 | + --> | |
1728 | + | |
1729 | + <!-- Function Parsers | |
1730 | + | |
1731 | + http://wiki.apache.org/solr/FunctionQuery | |
1732 | + | |
1733 | + Multiple ValueSourceParsers can be registered by name, and then | |
1734 | + used as function names when using the "func" QParser. | |
1735 | + --> | |
1736 | + <!-- example of registering a custom function parser --> | |
1737 | + <!-- | |
1738 | + <valueSourceParser name="myfunc" | |
1739 | + class="com.mycompany.MyValueSourceParser" /> | |
1740 | + --> | |
1741 | + | |
1742 | + | |
1743 | + <!-- Document Transformers | |
1744 | + http://wiki.apache.org/solr/DocTransformers | |
1745 | + --> | |
1746 | + <!-- | |
1747 | + Could be something like: | |
1748 | + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" > | |
1749 | + <int name="connection">jdbc://....</int> | |
1750 | + </transformer> | |
1751 | + | |
1752 | + To add a constant value to all docs, use: | |
1753 | + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" > | |
1754 | + <int name="value">5</int> | |
1755 | + </transformer> | |
1756 | + | |
1757 | + If you want the user to still be able to change it with _value:something_ use this: | |
1758 | + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" > | |
1759 | + <double name="defaultValue">5</double> | |
1760 | + </transformer> | |
1761 | + | |
1762 | + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The | |
1763 | + EditorialMarkerFactory will do exactly that: | |
1764 | + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" /> | |
1765 | + --> | |
1766 | + | |
1767 | + | |
1768 | + <!-- Legacy config for the admin interface --> | |
1769 | + <admin> | |
1770 | + <defaultQuery>*:*</defaultQuery> | |
1771 | + </admin> | |
1772 | + | |
1773 | +</config> | ... | ... |
1 | +++ a/index/sei-bases-conhecimento-schema.xml | |
... | ... | @@ -0,0 +1,1147 @@ |
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<!-- | |
3 | + Licensed to the Apache Software Foundation (ASF) under one or more | |
4 | + contributor license agreements. See the NOTICE file distributed with | |
5 | + this work for additional information regarding copyright ownership. | |
6 | + The ASF licenses this file to You under the Apache License, Version 2.0 | |
7 | + (the "License"); you may not use this file except in compliance with | |
8 | + the License. You may obtain a copy of the License at | |
9 | + | |
10 | + http://www.apache.org/licenses/LICENSE-2.0 | |
11 | + | |
12 | + Unless required by applicable law or agreed to in writing, software | |
13 | + distributed under the License is distributed on an "AS IS" BASIS, | |
14 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | + See the License for the specific language governing permissions and | |
16 | + limitations under the License. | |
17 | +--> | |
18 | + | |
19 | +<!-- | |
20 | + This is the Solr schema file. This file should be named "schema.xml" and | |
21 | + should be in the conf directory under the solr home | |
22 | + (i.e. ./solr/conf/schema.xml by default) | |
23 | + or located where the classloader for the Solr webapp can find it. | |
24 | + | |
25 | + This example schema is the recommended starting point for users. | |
26 | + It should be kept correct and concise, usable out-of-the-box. | |
27 | + | |
28 | + For more information, on how to customize this file, please see | |
29 | + http://wiki.apache.org/solr/SchemaXml | |
30 | + | |
31 | + PERFORMANCE NOTE: this schema includes many optional features and should not | |
32 | + be used for benchmarking. To improve performance one could | |
33 | + - set stored="false" for all fields possible (esp large fields) when you | |
34 | + only need to search on the field but don't need to return the original | |
35 | + value. | |
36 | + - set indexed="false" if you don't need to search on the field, but only | |
37 | + return the field as a result of searching on other indexed fields. | |
38 | + - remove all unneeded copyField statements | |
39 | + - for best index size and searching performance, set "index" to false | |
40 | + for all general text fields, use copyField to copy them to the | |
41 | + catchall "text" field, and use that for searching. | |
42 | + - For maximum indexing performance, use the StreamingUpdateSolrServer | |
43 | + java client. | |
44 | + - Remember to run the JVM in server mode, and use a higher logging level | |
45 | + that avoids logging every request | |
46 | +--> | |
47 | + | |
48 | +<schema name="sei-protocolos" version="1.5"> | |
49 | + <!-- attribute "name" is the name of this schema and is only used for display purposes. | |
50 | + version="x.y" is Solr's version number for the schema syntax and | |
51 | + semantics. It should not normally be changed by applications. | |
52 | + | |
53 | + 1.0: multiValued attribute did not exist, all fields are multiValued | |
54 | + by nature | |
55 | + 1.1: multiValued attribute introduced, false by default | |
56 | + 1.2: omitTermFreqAndPositions attribute introduced, true by default | |
57 | + except for text fields. | |
58 | + 1.3: removed optional field compress feature | |
59 | + 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser | |
60 | + behavior when a single string produces multiple tokens. Defaults | |
61 | + to off for version >= 1.4 | |
62 | + 1.5: omitNorms defaults to true for primitive field types | |
63 | + (int, float, boolean, string...) | |
64 | + --> | |
65 | + | |
66 | + <fields> | |
67 | + | |
68 | + <field name="id_base_conhecimento" type="string" indexed="false" stored="true" /> | |
69 | + <field name="id_documento_edoc" type="string" indexed="false" stored="true" /> | |
70 | + <field name="descricao" type="string" indexed="false" stored="true" /> | |
71 | + <field name="id_unidade" type="string" indexed="false" stored="true" /> | |
72 | + <field name="sigla_unidade" type="string" indexed="false" stored="true" /> | |
73 | + <field name="descricao_unidade" type="string" indexed="false" stored="true" /> | |
74 | + <field name="dta_geracao" type="date" indexed="true" stored="true" /> | |
75 | + <field name="id_anexo" type="string" indexed="false" stored="true" /> | |
76 | + <field name="nome_anexo" type="string" indexed="false" stored="true" /> | |
77 | + <field name="link_base_conhecimento" type="string" indexed="false" stored="true" /> | |
78 | + | |
79 | + <!-- Valid attributes for fields: | |
80 | + name: mandatory - the name for the field | |
81 | + type: mandatory - the name of a field type from the | |
82 | + <types> fieldType section | |
83 | + indexed: true if this field should be indexed (searchable or sortable) | |
84 | + stored: true if this field should be retrievable | |
85 | + multiValued: true if this field may contain multiple values per document | |
86 | + omitNorms: (expert) set to true to omit the norms associated with | |
87 | + this field (this disables length normalization and index-time | |
88 | + boosting for the field, and saves some memory). Only full-text | |
89 | + fields or fields that need an index-time boost need norms. | |
90 | + Norms are omitted for primitive (non-analyzed) types by default. | |
91 | + termVectors: [false] set to true to store the term vector for a | |
92 | + given field. | |
93 | + When using MoreLikeThis, fields used for similarity should be | |
94 | + stored for best performance. | |
95 | + termPositions: Store position information with the term vector. | |
96 | + This will increase storage costs. | |
97 | + termOffsets: Store offset information with the term vector. This | |
98 | + will increase storage costs. | |
99 | + required: The field is required. It will throw an error if the | |
100 | + value does not exist | |
101 | + default: a value that should be used if no value is specified | |
102 | + when adding a document. | |
103 | + --> | |
104 | + | |
105 | + <!-- field names should consist of alphanumeric or underscore characters only and | |
106 | + not start with a digit. This is not currently strictly enforced, | |
107 | + but other field names will not have first class support from all components | |
108 | + and back compatibility is not guaranteed. Names with both leading and | |
109 | + trailing underscores (e.g. _version_) are reserved. | |
110 | + --> | |
111 | + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> | |
112 | + | |
113 | + <!-- mairon | |
114 | + <field name="documento" type="string" indexed="true" stored="true" /> | |
115 | + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> | |
116 | + <field name="name" type="text_general" indexed="true" stored="true"/> | |
117 | + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> | |
118 | + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> | |
119 | + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
120 | + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> | |
121 | + | |
122 | + <field name="weight" type="float" indexed="true" stored="true"/> | |
123 | + <field name="price" type="float" indexed="true" stored="true"/> | |
124 | + <field name="popularity" type="int" indexed="true" stored="true" /> | |
125 | + <field name="inStock" type="boolean" indexed="true" stored="true" /> | |
126 | + | |
127 | + <field name="store" type="location" indexed="true" stored="true"/> | |
128 | + --> | |
129 | + | |
130 | + <!-- Common metadata fields, named specifically to match up with | |
131 | + SolrCell metadata when parsing rich documents such as Word, PDF. | |
132 | + Some fields are multiValued only because Tika currently may return | |
133 | + multiple values for them. Some metadata is parsed from the documents, | |
134 | + but there are some which come from the client context: | |
135 | + "content_type": From the HTTP headers of incoming stream | |
136 | + "resourcename": From SolrCell request param resource.name | |
137 | + --> | |
138 | + <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
139 | + <field name="subject" type="text_general" indexed="true" stored="true"/> | |
140 | + <field name="description" type="text_general" indexed="true" stored="true"/> | |
141 | + <field name="comments" type="text_general" indexed="true" stored="true"/> | |
142 | + <field name="author" type="text_general" indexed="true" stored="true"/> | |
143 | + <field name="keywords" type="text_general" indexed="true" stored="true"/> | |
144 | + <field name="category" type="text_general" indexed="true" stored="true"/> | |
145 | + <field name="resourcename" type="text_general" indexed="true" stored="true"/> | |
146 | + <field name="url" type="text_general" indexed="true" stored="true"/> | |
147 | + <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> | |
148 | + <field name="last_modified" type="date" indexed="true" stored="true"/> | |
149 | + <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> | |
150 | + | |
151 | + <!-- Main body of document extracted by SolrCell. | |
152 | + NOTE: This field is not indexed by default, since it is also copied to "text" | |
153 | + using copyField below. This is to save space. Use this field for returning and | |
154 | + highlighting document content. Use the "text" field to search the content. --> | |
155 | + <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/> | |
156 | + | |
157 | + | |
158 | + <!-- catchall field, containing all other searchable text fields (implemented | |
159 | + via copyField further on in this schema --> | |
160 | + <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> | |
161 | + | |
162 | + <!-- catchall text field that indexes tokens both normally and in reverse for efficient | |
163 | + leading wildcard queries. --> | |
164 | + <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> | |
165 | + | |
166 | + <!-- non-tokenized version of manufacturer to make it easier to sort or group | |
167 | + results by manufacturer. copied from "manu" via copyField --> | |
168 | + <field name="manu_exact" type="string" indexed="true" stored="false"/> | |
169 | + | |
170 | + <field name="payloads" type="payloads" indexed="true" stored="true"/> | |
171 | + | |
172 | + <field name="_version_" type="long" indexed="true" stored="true"/> | |
173 | + | |
174 | + <!-- Uncommenting the following will create a "timestamp" field using | |
175 | + a default value of "NOW" to indicate when each document was indexed. | |
176 | + --> | |
177 | + <!-- | |
178 | + <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/> | |
179 | + --> | |
180 | + | |
181 | + <!-- Dynamic field definitions allow using convention over configuration | |
182 | + for fields via the specification of patterns to match field names. | |
183 | + EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) | |
184 | + RESTRICTION: the glob-like pattern in the name attribute must have | |
185 | + a "*" only at the start or the end. --> | |
186 | + | |
187 | + <dynamicField name="*_i" type="int" indexed="true" stored="true"/> | |
188 | + <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/> | |
189 | + <dynamicField name="*_s" type="string" indexed="true" stored="true" /> | |
190 | + <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/> | |
191 | + <dynamicField name="*_l" type="long" indexed="true" stored="true"/> | |
192 | + <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/> | |
193 | + <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> | |
194 | + <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
195 | + <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/> | |
196 | + <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |
197 | + <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/> | |
198 | + <dynamicField name="*_f" type="float" indexed="true" stored="true"/> | |
199 | + <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/> | |
200 | + <dynamicField name="*_d" type="double" indexed="true" stored="true"/> | |
201 | + <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/> | |
202 | + | |
203 | + <!-- Type used to index the lat and lon components for the "location" FieldType --> | |
204 | + <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" /> | |
205 | + | |
206 | + <dynamicField name="*_dt" type="date" indexed="true" stored="true"/> | |
207 | + <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/> | |
208 | + <dynamicField name="*_p" type="location" indexed="true" stored="true"/> | |
209 | + | |
210 | + <!-- some trie-coded dynamic fields for faster range queries --> | |
211 | + <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/> | |
212 | + <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/> | |
213 | + <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/> | |
214 | + <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/> | |
215 | + <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/> | |
216 | + | |
217 | + <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/> | |
218 | + <dynamicField name="*_c" type="currency" indexed="true" stored="true"/> | |
219 | + | |
220 | + <dynamicField name="ignored_*" type="ignored" multiValued="true"/> | |
221 | + <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
222 | + | |
223 | + <dynamicField name="random_*" type="random" /> | |
224 | + | |
225 | + <!-- uncomment the following to ignore any fields that don't already match an existing | |
226 | + field name or dynamic field, rather than reporting them as an error. | |
227 | + alternately, change the type="ignored" to some other type e.g. "text" if you want | |
228 | + unknown fields indexed and/or stored by default --> | |
229 | + <!--dynamicField name="*" type="ignored" multiValued="true" /--> | |
230 | + | |
231 | + </fields> | |
232 | + | |
233 | + | |
234 | + <!-- Field to use to determine and enforce document uniqueness. | |
235 | + Unless this field is marked with required="false", it will be a required field | |
236 | + --> | |
237 | + <uniqueKey>id</uniqueKey> | |
238 | + | |
239 | + <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when | |
240 | + parsing a query string that isn't explicit about the field. Machine (non-user) | |
241 | + generated queries are best made explicit, or they can use the "df" request parameter | |
242 | + which takes precedence over this. | |
243 | + Note: Un-commenting defaultSearchField will be insufficient if your request handler | |
244 | + in solrconfig.xml defines "df", which takes precedence. That would need to be removed. | |
245 | + <defaultSearchField>text</defaultSearchField> --> | |
246 | + | |
247 | + <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers | |
248 | + when parsing a query string to determine if a clause of the query should be marked as | |
249 | + required or optional, assuming the clause isn't already marked by some operator. | |
250 | + The default is OR, which is generally assumed so it is not a good idea to change it | |
251 | + globally here. The "q.op" request parameter takes precedence over this. | |
252 | + <solrQueryParser defaultOperator="OR"/> --> | |
253 | + | |
254 | + <!-- copyField commands copy one field to another at the time a document | |
255 | + is added to the index. It's used either to index the same field differently, | |
256 | + or to add multiple fields to the same field for easier/faster searching. --> | |
257 | + | |
258 | + <!-- mairon | |
259 | + <copyField source="cat" dest="text"/> | |
260 | + <copyField source="name" dest="text"/> | |
261 | + <copyField source="manu" dest="text"/> | |
262 | + <copyField source="features" dest="text"/> | |
263 | + <copyField source="includes" dest="text"/> | |
264 | + <copyField source="manu" dest="manu_exact"/> | |
265 | + <copyField source="price" dest="price_c"/> | |
266 | + --> | |
267 | + | |
268 | + <!-- Text fields from SolrCell to search by default in our catch-all field --> | |
269 | + <copyField source="title" dest="text"/> | |
270 | + <copyField source="author" dest="text"/> | |
271 | + <copyField source="description" dest="text"/> | |
272 | + <copyField source="keywords" dest="text"/> | |
273 | + <copyField source="content" dest="text"/> | |
274 | + <copyField source="content_type" dest="text"/> | |
275 | + <copyField source="resourcename" dest="text"/> | |
276 | + <copyField source="url" dest="text"/> | |
277 | + | |
278 | + <!-- Create a string version of author for faceting --> | |
279 | + <copyField source="author" dest="author_s"/> | |
280 | + | |
281 | + <!-- Above, multiple source fields are copied to the [text] field. | |
282 | + Another way to map multiple source fields to the same | |
283 | + destination field is to use the dynamic field syntax. | |
284 | + copyField also supports a maxChars to copy setting. --> | |
285 | + | |
286 | + <!-- <copyField source="*_t" dest="text" maxChars="3000"/> --> | |
287 | + | |
288 | + <!-- copy name to alphaNameSort, a field designed for sorting by name --> | |
289 | + <!-- <copyField source="name" dest="alphaNameSort"/> --> | |
290 | + | |
291 | + <types> | |
292 | + <!-- field type definitions. The "name" attribute is | |
293 | + just a label to be used by field definitions. The "class" | |
294 | + attribute and any other attributes determine the real | |
295 | + behavior of the fieldType. | |
296 | + Class names starting with "solr" refer to java classes in a | |
297 | + standard package such as org.apache.solr.analysis | |
298 | + --> | |
299 | + | |
300 | + <fieldType name="date" class="solr.DateField" omitNorms="true"/> | |
301 | + | |
302 | + <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> | |
303 | + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> | |
304 | + | |
305 | + <!-- boolean type: "true" or "false" --> | |
306 | + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |
307 | + | |
308 | + <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are | |
309 | + currently supported on types that are sorted internally as strings | |
310 | + and on numeric types. | |
311 | + This includes "string","boolean", and, as of 3.5 (and 4.x), | |
312 | + int, float, long, date, double, including the "Trie" variants. | |
313 | + - If sortMissingLast="true", then a sort on this field will cause documents | |
314 | + without the field to come after documents with the field, | |
315 | + regardless of the requested sort order (asc or desc). | |
316 | + - If sortMissingFirst="true", then a sort on this field will cause documents | |
317 | + without the field to come before documents with the field, | |
318 | + regardless of the requested sort order. | |
319 | + - If sortMissingLast="false" and sortMissingFirst="false" (the default), | |
320 | + then default lucene sorting will be used which places docs without the | |
321 | + field first in an ascending sort and last in a descending sort. | |
322 | + --> | |
323 | + | |
324 | + <!-- | |
325 | + Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. | |
326 | + --> | |
327 | + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> | |
328 | + <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> | |
329 | + <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> | |
330 | + <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> | |
331 | + | |
332 | + <!-- | |
333 | + Numeric field types that index each value at various levels of precision | |
334 | + to accelerate range queries when the number of values between the range | |
335 | + endpoints is large. See the javadoc for NumericRangeQuery for internal | |
336 | + implementation details. | |
337 | + | |
338 | + Smaller precisionStep values (specified in bits) will lead to more tokens | |
339 | + indexed per value, slightly larger index size, and faster range queries. | |
340 | + A precisionStep of 0 disables indexing at different precision levels. | |
341 | + --> | |
342 | + <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> | |
343 | + <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> | |
344 | + <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> | |
345 | + <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> | |
346 | + | |
347 | + <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and | |
348 | + is a more restricted form of the canonical representation of dateTime | |
349 | + http://www.w3.org/TR/xmlschema-2/#dateTime | |
350 | + The trailing "Z" designates UTC time and is mandatory. | |
351 | + Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z | |
352 | + All other components are mandatory. | |
353 | + | |
354 | + Expressions can also be used to denote calculations that should be | |
355 | + performed relative to "NOW" to determine the value, ie... | |
356 | + | |
357 | + NOW/HOUR | |
358 | + ... Round to the start of the current hour | |
359 | + NOW-1DAY | |
360 | + ... Exactly 1 day prior to now | |
361 | + NOW/DAY+6MONTHS+3DAYS | |
362 | + ... 6 months and 3 days in the future from the start of | |
363 | + the current day | |
364 | + | |
365 | + Consult the DateField javadocs for more information. | |
366 | + | |
367 | + Note: For faster range queries, consider the tdate type | |
368 | + --> | |
369 | +<!-- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> | |
370 | + | |
371 | + A Trie based date field for faster date range queries and date faceting. --> | |
372 | + <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> | |
373 | + | |
374 | + | |
375 | + <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> | |
376 | + <fieldtype name="binary" class="solr.BinaryField"/> | |
377 | + | |
378 | + <!-- | |
379 | + Note: | |
380 | + These should only be used for compatibility with existing indexes (created with lucene or older Solr versions). | |
381 | + Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last | |
382 | + | |
383 | + Plain numeric field types that store and index the text | |
384 | + value verbatim (and hence don't correctly support range queries, since the | |
385 | + lexicographic ordering isn't equal to the numeric ordering) | |
386 | + --> | |
387 | + <fieldType name="pint" class="solr.IntField"/> | |
388 | + <fieldType name="plong" class="solr.LongField"/> | |
389 | + <fieldType name="pfloat" class="solr.FloatField"/> | |
390 | + <fieldType name="pdouble" class="solr.DoubleField"/> | |
391 | + <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/> | |
392 | + | |
393 | + <!-- The "RandomSortField" is not used to store or search any | |
394 | + data. You can declare fields of this type it in your schema | |
395 | + to generate pseudo-random orderings of your docs for sorting | |
396 | + or function purposes. The ordering is generated based on the field | |
397 | + name and the version of the index. As long as the index version | |
398 | + remains unchanged, and the same field name is reused, | |
399 | + the ordering of the docs will be consistent. | |
400 | + If you want different psuedo-random orderings of documents, | |
401 | + for the same version of the index, use a dynamicField and | |
402 | + change the field name in the request. | |
403 | + --> | |
404 | + <fieldType name="random" class="solr.RandomSortField" indexed="true" /> | |
405 | + | |
406 | + <!-- solr.TextField allows the specification of custom text analyzers | |
407 | + specified as a tokenizer and a list of token filters. Different | |
408 | + analyzers may be specified for indexing and querying. | |
409 | + | |
410 | + The optional positionIncrementGap puts space between multiple fields of | |
411 | + this type on the same document, with the purpose of preventing false phrase | |
412 | + matching across fields. | |
413 | + | |
414 | + For more info on customizing your analyzer chain, please see | |
415 | + http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters | |
416 | + --> | |
417 | + | |
418 | + <!-- One can also specify an existing Analyzer class that has a | |
419 | + default constructor via the class attribute on the analyzer element. | |
420 | + Example: | |
421 | + <fieldType name="text_greek" class="solr.TextField"> | |
422 | + <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> | |
423 | + </fieldType> | |
424 | + --> | |
425 | + | |
426 | + <!-- A text field that only splits on whitespace for exact matching of words --> | |
427 | + <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |
428 | + <analyzer> | |
429 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
430 | + </analyzer> | |
431 | + </fieldType> | |
432 | + | |
433 | + <!-- A general text field that has reasonable, generic | |
434 | + cross-language defaults: it tokenizes with StandardTokenizer, | |
435 | + removes stop words from case-insensitive "stopwords.txt" | |
436 | + (empty by default), and down cases. At query time only, it | |
437 | + also applies synonyms. --> | |
438 | + <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> | |
439 | + <analyzer type="index"> | |
440 | + | |
441 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
442 | + | |
443 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" /> | |
444 | + <!-- in this example, we will only use synonyms at query time | |
445 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
446 | + --> | |
447 | + <filter class="solr.LowerCaseFilterFactory"/> | |
448 | + | |
449 | + <!-- mairon --> | |
450 | + <filter class="solr.ASCIIFoldingFilterFactory"/> | |
451 | + <!-- mairon --> | |
452 | + | |
453 | + | |
454 | + </analyzer> | |
455 | + | |
456 | + <analyzer type="query"> | |
457 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
458 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" /> | |
459 | + <filter class="solr.LowerCaseFilterFactory"/> | |
460 | + | |
461 | + <!-- mairon --> | |
462 | + <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> --> | |
463 | + <!-- <filter class="solr.ASCIIFoldingFilterFactory"/> --> | |
464 | + <!-- <filter class="solr.BrazilianStemFilterFactory"/> --> | |
465 | + <!-- mairon --> | |
466 | + | |
467 | + </analyzer> | |
468 | + | |
469 | + </fieldType> | |
470 | + | |
471 | + <!-- A text field with defaults appropriate for English: it | |
472 | + tokenizes with StandardTokenizer, removes English stop words | |
473 | + (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and | |
474 | + finally applies Porter's stemming. The query time analyzer | |
475 | + also applies synonyms from synonyms.txt. --> | |
476 | + <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |
477 | + <analyzer type="index"> | |
478 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
479 | + <!-- in this example, we will only use synonyms at query time | |
480 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
481 | + --> | |
482 | + <!-- Case insensitive stop word removal. | |
483 | + add enablePositionIncrements=true in both the index and query | |
484 | + analyzers to leave a 'gap' for more accurate phrase queries. | |
485 | + --> | |
486 | + <filter class="solr.StopFilterFactory" | |
487 | + ignoreCase="true" | |
488 | + words="lang/stopwords_en.txt" | |
489 | + enablePositionIncrements="true" | |
490 | + /> | |
491 | + <filter class="solr.LowerCaseFilterFactory"/> | |
492 | + <filter class="solr.EnglishPossessiveFilterFactory"/> | |
493 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
494 | + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | |
495 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
496 | + --> | |
497 | + <filter class="solr.PorterStemFilterFactory"/> | |
498 | + </analyzer> | |
499 | + <analyzer type="query"> | |
500 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
501 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
502 | + <filter class="solr.StopFilterFactory" | |
503 | + ignoreCase="true" | |
504 | + words="lang/stopwords_en.txt" | |
505 | + enablePositionIncrements="true" | |
506 | + /> | |
507 | + <filter class="solr.LowerCaseFilterFactory"/> | |
508 | + <filter class="solr.EnglishPossessiveFilterFactory"/> | |
509 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
510 | + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | |
511 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
512 | + --> | |
513 | + <filter class="solr.PorterStemFilterFactory"/> | |
514 | + </analyzer> | |
515 | + </fieldType> | |
516 | + | |
517 | + <!-- A text field with defaults appropriate for English, plus | |
518 | + aggressive word-splitting and autophrase features enabled. | |
519 | + This field is just like text_en, except it adds | |
520 | + WordDelimiterFilter to enable splitting and matching of | |
521 | + words on case-change, alpha numeric boundaries, and | |
522 | + non-alphanumeric chars. This means certain compound word | |
523 | + cases will work, for example query "wi fi" will match | |
524 | + document "WiFi" or "wi-fi". | |
525 | + --> | |
526 | + <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
527 | + <analyzer type="index"> | |
528 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
529 | + <!-- in this example, we will only use synonyms at query time | |
530 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
531 | + --> | |
532 | + <!-- Case insensitive stop word removal. | |
533 | + add enablePositionIncrements=true in both the index and query | |
534 | + analyzers to leave a 'gap' for more accurate phrase queries. | |
535 | + --> | |
536 | + <filter class="solr.StopFilterFactory" | |
537 | + ignoreCase="true" | |
538 | + words="lang/stopwords_en.txt" | |
539 | + enablePositionIncrements="true" | |
540 | + /> | |
541 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> | |
542 | + <filter class="solr.LowerCaseFilterFactory"/> | |
543 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
544 | + <filter class="solr.PorterStemFilterFactory"/> | |
545 | + | |
546 | + | |
547 | + </analyzer> | |
548 | + <analyzer type="query"> | |
549 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
550 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
551 | + <filter class="solr.StopFilterFactory" | |
552 | + ignoreCase="true" | |
553 | + words="lang/stopwords_en.txt" | |
554 | + enablePositionIncrements="true" | |
555 | + /> | |
556 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | |
557 | + <filter class="solr.LowerCaseFilterFactory"/> | |
558 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
559 | + <filter class="solr.PorterStemFilterFactory"/> | |
560 | + </analyzer> | |
561 | + </fieldType> | |
562 | + | |
563 | + <!-- Less flexible matching, but less false matches. Probably not ideal for product names, | |
564 | + but may be good for SKUs. Can insert dashes in the wrong place and still match. --> | |
565 | + <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
566 | + <analyzer> | |
567 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
568 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | |
569 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> | |
570 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | |
571 | + <filter class="solr.LowerCaseFilterFactory"/> | |
572 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
573 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
574 | + <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes | |
575 | + possible with WordDelimiterFilter in conjuncton with stemming. --> | |
576 | + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
577 | + </analyzer> | |
578 | + </fieldType> | |
579 | + | |
580 | + <!-- Just like text_general except it reverses the characters of | |
581 | + each token, to enable more efficient leading wildcard queries. --> | |
582 | + <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |
583 | + <analyzer type="index"> | |
584 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
585 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
586 | + <filter class="solr.LowerCaseFilterFactory"/> | |
587 | + <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" | |
588 | + maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> | |
589 | + </analyzer> | |
590 | + <analyzer type="query"> | |
591 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
592 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
593 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
594 | + <filter class="solr.LowerCaseFilterFactory"/> | |
595 | + </analyzer> | |
596 | + </fieldType> | |
597 | + | |
598 | + <!-- charFilter + WhitespaceTokenizer --> | |
599 | + <!-- | |
600 | + <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > | |
601 | + <analyzer> | |
602 | + <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
603 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
604 | + </analyzer> | |
605 | + </fieldType> | |
606 | + --> | |
607 | + | |
608 | + <!-- This is an example of using the KeywordTokenizer along | |
609 | + With various TokenFilterFactories to produce a sortable field | |
610 | + that does not include some properties of the source text | |
611 | + --> | |
612 | + <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
613 | + <analyzer> | |
614 | + <!-- KeywordTokenizer does no actual tokenizing, so the entire | |
615 | + input string is preserved as a single token | |
616 | + --> | |
617 | + <tokenizer class="solr.KeywordTokenizerFactory"/> | |
618 | + <!-- The LowerCase TokenFilter does what you expect, which can be | |
619 | + when you want your sorting to be case insensitive | |
620 | + --> | |
621 | + <filter class="solr.LowerCaseFilterFactory" /> | |
622 | + <!-- The TrimFilter removes any leading or trailing whitespace --> | |
623 | + <filter class="solr.TrimFilterFactory" /> | |
624 | + <!-- The PatternReplaceFilter gives you the flexibility to use | |
625 | + Java Regular expression to replace any sequence of characters | |
626 | + matching a pattern with an arbitrary replacement string, | |
627 | + which may include back references to portions of the original | |
628 | + string matched by the pattern. | |
629 | + | |
630 | + See the Java Regular Expression documentation for more | |
631 | + information on pattern and replacement string syntax. | |
632 | + | |
633 | + http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html | |
634 | + --> | |
635 | + <filter class="solr.PatternReplaceFilterFactory" | |
636 | + pattern="([^a-z])" replacement="" replace="all" | |
637 | + /> | |
638 | + </analyzer> | |
639 | + </fieldType> | |
640 | + | |
641 | + <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" > | |
642 | + <analyzer> | |
643 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
644 | + <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |
645 | + </analyzer> | |
646 | + </fieldtype> | |
647 | + | |
648 | + <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" > | |
649 | + <analyzer> | |
650 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
651 | + <!-- | |
652 | + The DelimitedPayloadTokenFilter can put payloads on tokens... for example, | |
653 | + a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f | |
654 | + Attributes of the DelimitedPayloadTokenFilterFactory : | |
655 | + "delimiter" - a one character delimiter. Default is | (pipe) | |
656 | + "encoder" - how to encode the following value into a playload | |
657 | + float -> org.apache.lucene.analysis.payloads.FloatEncoder, | |
658 | + integer -> o.a.l.a.p.IntegerEncoder | |
659 | + identity -> o.a.l.a.p.IdentityEncoder | |
660 | + Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. | |
661 | + --> | |
662 | + <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> | |
663 | + </analyzer> | |
664 | + </fieldtype> | |
665 | + | |
666 | + <!-- lowercases the entire field value, keeping it as a single token. --> | |
667 | + <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |
668 | + <analyzer> | |
669 | + <tokenizer class="solr.KeywordTokenizerFactory"/> | |
670 | + <filter class="solr.LowerCaseFilterFactory" /> | |
671 | + </analyzer> | |
672 | + </fieldType> | |
673 | + | |
674 | + <!-- | |
675 | + Example of using PathHierarchyTokenizerFactory at index time, so | |
676 | + queries for paths match documents at that path, or in descendent paths | |
677 | + --> | |
678 | + <fieldType name="descendent_path" class="solr.TextField"> | |
679 | + <analyzer type="index"> | |
680 | + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
681 | + </analyzer> | |
682 | + <analyzer type="query"> | |
683 | + <tokenizer class="solr.KeywordTokenizerFactory" /> | |
684 | + </analyzer> | |
685 | + </fieldType> | |
686 | + <!-- | |
687 | + Example of using PathHierarchyTokenizerFactory at query time, so | |
688 | + queries for paths match documents at that path, or in ancestor paths | |
689 | + --> | |
690 | + <fieldType name="ancestor_path" class="solr.TextField"> | |
691 | + <analyzer type="index"> | |
692 | + <tokenizer class="solr.KeywordTokenizerFactory" /> | |
693 | + </analyzer> | |
694 | + <analyzer type="query"> | |
695 | + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
696 | + </analyzer> | |
697 | + </fieldType> | |
698 | + | |
699 | + <!-- since fields of this type are by default not stored or indexed, | |
700 | + any data added to them will be ignored outright. --> | |
701 | + <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> | |
702 | + | |
703 | + <!-- This point type indexes the coordinates as separate fields (subFields) | |
704 | + If subFieldType is defined, it references a type, and a dynamic field | |
705 | + definition is created matching *___<typename>. Alternately, if | |
706 | + subFieldSuffix is defined, that is used to create the subFields. | |
707 | + Example: if subFieldType="double", then the coordinates would be | |
708 | + indexed in fields myloc_0___double,myloc_1___double. | |
709 | + Example: if subFieldSuffix="_d" then the coordinates would be indexed | |
710 | + in fields myloc_0_d,myloc_1_d | |
711 | + The subFields are an implementation detail of the fieldType, and end | |
712 | + users normally should not need to know about them. | |
713 | + --> | |
714 | + <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> | |
715 | + | |
716 | + <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. --> | |
717 | + <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |
718 | + | |
719 | + <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes. | |
720 | + For more information about this and other Spatial fields new to Solr 4, see: | |
721 | + http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 | |
722 | + --> | |
723 | + <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" | |
724 | + geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" /> | |
725 | + | |
726 | + <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType | |
727 | + Parameters: | |
728 | + defaultCurrency: Specifies the default currency if none specified. Defaults to "USD" | |
729 | + precisionStep: Specifies the precisionStep for the TrieLong field used for the amount | |
730 | + providerClass: Lets you plug in other exchange provider backend: | |
731 | + solr.FileExchangeRateProvider is the default and takes one parameter: | |
732 | + currencyConfig: name of an xml file holding exchange rates | |
733 | + solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org: | |
734 | + ratesFileLocation: URL or path to rates JSON file (default latest.json on the web) | |
735 | + refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60) | |
736 | + --> | |
737 | + <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" /> | |
738 | + | |
739 | + | |
740 | + | |
741 | + <!-- some examples for different languages (generally ordered by ISO code) --> | |
742 | + | |
743 | + <!-- Arabic --> | |
744 | + <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> | |
745 | + <analyzer> | |
746 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
747 | + <!-- for any non-arabic --> | |
748 | + <filter class="solr.LowerCaseFilterFactory"/> | |
749 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/> | |
750 | + <!-- normalizes ﻯ to ﻱ, etc --> | |
751 | + <filter class="solr.ArabicNormalizationFilterFactory"/> | |
752 | + <filter class="solr.ArabicStemFilterFactory"/> | |
753 | + </analyzer> | |
754 | + </fieldType> | |
755 | + | |
756 | + <!-- Bulgarian --> | |
757 | + <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> | |
758 | + <analyzer> | |
759 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
760 | + <filter class="solr.LowerCaseFilterFactory"/> | |
761 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/> | |
762 | + <filter class="solr.BulgarianStemFilterFactory"/> | |
763 | + </analyzer> | |
764 | + </fieldType> | |
765 | + | |
766 | + <!-- Catalan --> | |
767 | + <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> | |
768 | + <analyzer> | |
769 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
770 | + <!-- removes l', etc --> | |
771 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> | |
772 | + <filter class="solr.LowerCaseFilterFactory"/> | |
773 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/> | |
774 | + <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> | |
775 | + </analyzer> | |
776 | + </fieldType> | |
777 | + | |
778 | + <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> | |
779 | + <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> | |
780 | + <analyzer> | |
781 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
782 | + <!-- normalize width before bigram, as e.g. half-width dakuten combine --> | |
783 | + <filter class="solr.CJKWidthFilterFactory"/> | |
784 | + <!-- for any non-CJK --> | |
785 | + <filter class="solr.LowerCaseFilterFactory"/> | |
786 | + <filter class="solr.CJKBigramFilterFactory"/> | |
787 | + </analyzer> | |
788 | + </fieldType> | |
789 | + | |
790 | + <!-- Czech --> | |
791 | + <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> | |
792 | + <analyzer> | |
793 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
794 | + <filter class="solr.LowerCaseFilterFactory"/> | |
795 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/> | |
796 | + <filter class="solr.CzechStemFilterFactory"/> | |
797 | + </analyzer> | |
798 | + </fieldType> | |
799 | + | |
800 | + <!-- Danish --> | |
801 | + <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> | |
802 | + <analyzer> | |
803 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
804 | + <filter class="solr.LowerCaseFilterFactory"/> | |
805 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/> | |
806 | + <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> | |
807 | + </analyzer> | |
808 | + </fieldType> | |
809 | + | |
810 | + <!-- German --> | |
811 | + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> | |
812 | + <analyzer> | |
813 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
814 | + <filter class="solr.LowerCaseFilterFactory"/> | |
815 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/> | |
816 | + <filter class="solr.GermanNormalizationFilterFactory"/> | |
817 | + <filter class="solr.GermanLightStemFilterFactory"/> | |
818 | + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> | |
819 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> | |
820 | + </analyzer> | |
821 | + </fieldType> | |
822 | + | |
823 | + <!-- Greek --> | |
824 | + <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> | |
825 | + <analyzer> | |
826 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
827 | + <!-- greek specific lowercase for sigma --> | |
828 | + <filter class="solr.GreekLowerCaseFilterFactory"/> | |
829 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/> | |
830 | + <filter class="solr.GreekStemFilterFactory"/> | |
831 | + </analyzer> | |
832 | + </fieldType> | |
833 | + | |
834 | + <!-- Spanish --> | |
835 | + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> | |
836 | + <analyzer> | |
837 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
838 | + <filter class="solr.LowerCaseFilterFactory"/> | |
839 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/> | |
840 | + <filter class="solr.SpanishLightStemFilterFactory"/> | |
841 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> | |
842 | + </analyzer> | |
843 | + </fieldType> | |
844 | + | |
845 | + <!-- Basque --> | |
846 | + <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> | |
847 | + <analyzer> | |
848 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
849 | + <filter class="solr.LowerCaseFilterFactory"/> | |
850 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/> | |
851 | + <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> | |
852 | + </analyzer> | |
853 | + </fieldType> | |
854 | + | |
855 | + <!-- Persian --> | |
856 | + <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> | |
857 | + <analyzer> | |
858 | + <!-- for ZWNJ --> | |
859 | + <charFilter class="solr.PersianCharFilterFactory"/> | |
860 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
861 | + <filter class="solr.LowerCaseFilterFactory"/> | |
862 | + <filter class="solr.ArabicNormalizationFilterFactory"/> | |
863 | + <filter class="solr.PersianNormalizationFilterFactory"/> | |
864 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/> | |
865 | + </analyzer> | |
866 | + </fieldType> | |
867 | + | |
868 | + <!-- Finnish --> | |
869 | + <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> | |
870 | + <analyzer> | |
871 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
872 | + <filter class="solr.LowerCaseFilterFactory"/> | |
873 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/> | |
874 | + <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> | |
875 | + <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> | |
876 | + </analyzer> | |
877 | + </fieldType> | |
878 | + | |
879 | + <!-- French --> | |
880 | + <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> | |
881 | + <analyzer> | |
882 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
883 | + <!-- removes l', etc --> | |
884 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> | |
885 | + <filter class="solr.LowerCaseFilterFactory"/> | |
886 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/> | |
887 | + <filter class="solr.FrenchLightStemFilterFactory"/> | |
888 | + <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> | |
889 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> | |
890 | + </analyzer> | |
891 | + </fieldType> | |
892 | + | |
893 | + <!-- Irish --> | |
894 | + <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> | |
895 | + <analyzer> | |
896 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
897 | + <!-- removes d', etc --> | |
898 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> | |
899 | + <!-- removes n-, etc. position increments is intentionally false! --> | |
900 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/> | |
901 | + <filter class="solr.IrishLowerCaseFilterFactory"/> | |
902 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/> | |
903 | + <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> | |
904 | + </analyzer> | |
905 | + </fieldType> | |
906 | + | |
907 | + <!-- Galician --> | |
908 | + <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> | |
909 | + <analyzer> | |
910 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
911 | + <filter class="solr.LowerCaseFilterFactory"/> | |
912 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/> | |
913 | + <filter class="solr.GalicianStemFilterFactory"/> | |
914 | + <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> | |
915 | + </analyzer> | |
916 | + </fieldType> | |
917 | + | |
918 | + <!-- Hindi --> | |
919 | + <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> | |
920 | + <analyzer> | |
921 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
922 | + <filter class="solr.LowerCaseFilterFactory"/> | |
923 | + <!-- normalizes unicode representation --> | |
924 | + <filter class="solr.IndicNormalizationFilterFactory"/> | |
925 | + <!-- normalizes variation in spelling --> | |
926 | + <filter class="solr.HindiNormalizationFilterFactory"/> | |
927 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/> | |
928 | + <filter class="solr.HindiStemFilterFactory"/> | |
929 | + </analyzer> | |
930 | + </fieldType> | |
931 | + | |
932 | + <!-- Hungarian --> | |
933 | + <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> | |
934 | + <analyzer> | |
935 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
936 | + <filter class="solr.LowerCaseFilterFactory"/> | |
937 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/> | |
938 | + <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> | |
939 | + <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> | |
940 | + </analyzer> | |
941 | + </fieldType> | |
942 | + | |
943 | + <!-- Armenian --> | |
944 | + <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> | |
945 | + <analyzer> | |
946 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
947 | + <filter class="solr.LowerCaseFilterFactory"/> | |
948 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/> | |
949 | + <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> | |
950 | + </analyzer> | |
951 | + </fieldType> | |
952 | + | |
953 | + <!-- Indonesian --> | |
954 | + <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> | |
955 | + <analyzer> | |
956 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
957 | + <filter class="solr.LowerCaseFilterFactory"/> | |
958 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/> | |
959 | + <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> | |
960 | + <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> | |
961 | + </analyzer> | |
962 | + </fieldType> | |
963 | + | |
964 | + <!-- Italian --> | |
965 | + <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> | |
966 | + <analyzer> | |
967 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
968 | + <!-- removes l', etc --> | |
969 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> | |
970 | + <filter class="solr.LowerCaseFilterFactory"/> | |
971 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/> | |
972 | + <filter class="solr.ItalianLightStemFilterFactory"/> | |
973 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> | |
974 | + </analyzer> | |
975 | + </fieldType> | |
976 | + | |
977 | + <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming) | |
978 | + | |
979 | + NOTE: If you want to optimize search for precision, use default operator AND in your query | |
980 | + parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use | |
981 | + OR if you would like to optimize for recall (default). | |
982 | + --> | |
983 | + <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> | |
984 | + <analyzer> | |
985 | + <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer) | |
986 | + | |
987 | + Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic | |
988 | + is used to segment compounds into its parts and the compound itself is kept as synonym. | |
989 | + | |
990 | + Valid values for attribute mode are: | |
991 | + normal: regular segmentation | |
992 | + search: segmentation useful for search with synonyms compounds (default) | |
993 | + extended: same as search mode, but unigrams unknown words (experimental) | |
994 | + | |
995 | + For some applications it might be good to use search mode for indexing and normal mode for | |
996 | + queries to reduce recall and prevent parts of compounds from being matched and highlighted. | |
997 | + Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query. | |
998 | + | |
999 | + Kuromoji also has a convenient user dictionary feature that allows overriding the statistical | |
1000 | + model with your own entries for segmentation, part-of-speech tags and readings without a need | |
1001 | + to specify weights. Notice that user dictionaries have not been subject to extensive testing. | |
1002 | + | |
1003 | + User dictionary attributes are: | |
1004 | + userDictionary: user dictionary filename | |
1005 | + userDictionaryEncoding: user dictionary encoding (default is UTF-8) | |
1006 | + | |
1007 | + See lang/userdict_ja.txt for a sample user dictionary file. | |
1008 | + | |
1009 | + Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them. | |
1010 | + | |
1011 | + See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. | |
1012 | + --> | |
1013 | + <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> | |
1014 | + <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> | |
1015 | + <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> | |
1016 | + <filter class="solr.JapaneseBaseFormFilterFactory"/> | |
1017 | + <!-- Removes tokens with certain part-of-speech tags --> | |
1018 | + <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/> | |
1019 | + <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> | |
1020 | + <filter class="solr.CJKWidthFilterFactory"/> | |
1021 | + <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> | |
1022 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" /> | |
1023 | + <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> | |
1024 | + <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> | |
1025 | + <!-- Lower-cases romaji characters --> | |
1026 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1027 | + </analyzer> | |
1028 | + </fieldType> | |
1029 | + | |
1030 | + <!-- Latvian --> | |
1031 | + <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> | |
1032 | + <analyzer> | |
1033 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1034 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1035 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/> | |
1036 | + <filter class="solr.LatvianStemFilterFactory"/> | |
1037 | + </analyzer> | |
1038 | + </fieldType> | |
1039 | + | |
1040 | + <!-- Dutch --> | |
1041 | + <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> | |
1042 | + <analyzer> | |
1043 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1044 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1045 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/> | |
1046 | + <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> | |
1047 | + <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> | |
1048 | + </analyzer> | |
1049 | + </fieldType> | |
1050 | + | |
1051 | + <!-- Norwegian --> | |
1052 | + <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> | |
1053 | + <analyzer> | |
1054 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1055 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1056 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/> | |
1057 | + <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> | |
1058 | + <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> | |
1059 | + <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> | |
1060 | + </analyzer> | |
1061 | + </fieldType> | |
1062 | + | |
1063 | + <!-- Portuguese --> | |
1064 | + <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | |
1065 | + <analyzer> | |
1066 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1067 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1068 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/> | |
1069 | + | |
1070 | + <!-- mairon --> | |
1071 | + <!-- <filter class="solr.PortugueseLightStemFilterFactory"/> --> | |
1072 | + <filter class="solr.BrazilianStemFilterFactory"/> | |
1073 | + <filter class="solr.ASCIIFoldingFilterFactory"/> | |
1074 | + <!-- mairon --> | |
1075 | + | |
1076 | + <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> | |
1077 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> | |
1078 | + <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> | |
1079 | + </analyzer> | |
1080 | + </fieldType> | |
1081 | + | |
1082 | + <!-- Romanian --> | |
1083 | + <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> | |
1084 | + <analyzer> | |
1085 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1086 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1087 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/> | |
1088 | + <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> | |
1089 | + </analyzer> | |
1090 | + </fieldType> | |
1091 | + | |
1092 | + <!-- Russian --> | |
1093 | + <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> | |
1094 | + <analyzer> | |
1095 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1096 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1097 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/> | |
1098 | + <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> | |
1099 | + <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> | |
1100 | + </analyzer> | |
1101 | + </fieldType> | |
1102 | + | |
1103 | + <!-- Swedish --> | |
1104 | + <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |
1105 | + <analyzer> | |
1106 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1107 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1108 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/> | |
1109 | + <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |
1110 | + <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> | |
1111 | + </analyzer> | |
1112 | + </fieldType> | |
1113 | + | |
1114 | + <!-- Thai --> | |
1115 | + <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> | |
1116 | + <analyzer> | |
1117 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1118 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1119 | + <filter class="solr.ThaiWordFilterFactory"/> | |
1120 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/> | |
1121 | + </analyzer> | |
1122 | + </fieldType> | |
1123 | + | |
1124 | + <!-- Turkish --> | |
1125 | + <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> | |
1126 | + <analyzer> | |
1127 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1128 | + <filter class="solr.TurkishLowerCaseFilterFactory"/> | |
1129 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/> | |
1130 | + <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> | |
1131 | + </analyzer> | |
1132 | + </fieldType> | |
1133 | + | |
1134 | + </types> | |
1135 | + | |
1136 | + <!-- Similarity is the scoring routine for each document vs. a query. | |
1137 | + A custom Similarity or SimilarityFactory may be specified here, but | |
1138 | + the default is fine for most applications. | |
1139 | + For more info: http://wiki.apache.org/solr/SchemaXml#Similarity | |
1140 | + --> | |
1141 | + <!-- | |
1142 | + <similarity class="com.example.solr.CustomSimilarityFactory"> | |
1143 | + <str name="paramkey">param value</str> | |
1144 | + </similarity> | |
1145 | + --> | |
1146 | + | |
1147 | +</schema> | |
0 | 1148 | \ No newline at end of file | ... | ... |
1 | +++ a/index/sei-protocolos-config.xml | |
... | ... | @@ -0,0 +1,1773 @@ |
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<!-- | |
3 | + Licensed to the Apache Software Foundation (ASF) under one or more | |
4 | + contributor license agreements. See the NOTICE file distributed with | |
5 | + this work for additional information regarding copyright ownership. | |
6 | + The ASF licenses this file to You under the Apache License, Version 2.0 | |
7 | + (the "License"); you may not use this file except in compliance with | |
8 | + the License. You may obtain a copy of the License at | |
9 | + | |
10 | + http://www.apache.org/licenses/LICENSE-2.0 | |
11 | + | |
12 | + Unless required by applicable law or agreed to in writing, software | |
13 | + distributed under the License is distributed on an "AS IS" BASIS, | |
14 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | + See the License for the specific language governing permissions and | |
16 | + limitations under the License. | |
17 | +--> | |
18 | + | |
19 | +<!-- | |
20 | + For more details about configurations options that may appear in | |
21 | + this file, see http://wiki.apache.org/solr/SolrConfigXml. | |
22 | +--> | |
23 | +<config> | |
24 | + <!-- In all configuration below, a prefix of "solr." for class names | |
25 | + is an alias that causes solr to search appropriate packages, | |
26 | + including org.apache.solr.(search|update|request|core|analysis) | |
27 | + | |
28 | + You may also specify a fully qualified Java classname if you | |
29 | + have your own custom plugins. | |
30 | + --> | |
31 | + | |
32 | + <!-- Controls what version of Lucene various components of Solr | |
33 | + adhere to. Generally, you want to use the latest version to | |
34 | + get all bug fixes and improvements. It is highly recommended | |
35 | + that you fully re-index after changing this setting as it can | |
36 | + affect both how text is indexed and queried. | |
37 | + --> | |
38 | + <luceneMatchVersion>LUCENE_40</luceneMatchVersion> | |
39 | + | |
40 | + <!-- <lib/> directives can be used to instruct Solr to load an Jars | |
41 | + identified and use them to resolve any "plugins" specified in | |
42 | + your solrconfig.xml or schema.xml (ie: Analyzers, Request | |
43 | + Handlers, etc...). | |
44 | + | |
45 | + All directories and paths are resolved relative to the | |
46 | + instanceDir. | |
47 | + | |
48 | + Please note that <lib/> directives are processed in the order | |
49 | + that they appear in your solrconfig.xml file, and are "stacked" | |
50 | + on top of each other when building a ClassLoader - so if you have | |
51 | + plugin jars with dependencies on other jars, the "lower level" | |
52 | + dependency jars should be loaded first. | |
53 | + | |
54 | + If a "./lib" directory exists in your instanceDir, all files | |
55 | + found in it are included as if you had used the following | |
56 | + syntax... | |
57 | + | |
58 | +--> | |
59 | + <lib dir="./lib" /> | |
60 | + | |
61 | + | |
62 | + <!-- A 'dir' option by itself adds any files found in the directory | |
63 | + to the classpath, this is useful for including all jars in a | |
64 | + directory. | |
65 | + | |
66 | + When a 'regex' is specified in addition to a 'dir', only the | |
67 | + files in that directory which completely match the regex | |
68 | + (anchored on both ends) will be included. | |
69 | + | |
70 | + The examples below can be used to load some solr-contribs along | |
71 | + with their external dependencies. | |
72 | + --> | |
73 | + <lib dir="./contrib/extraction/lib" regex=".*\.jar" /> | |
74 | + <lib dir="./dist/" regex="apache-solr-cell-\d.*\.jar" /> | |
75 | + | |
76 | + <lib dir="./contrib/clustering/lib/" regex=".*\.jar" /> | |
77 | + <lib dir="./dist/" regex="apache-solr-clustering-\d.*\.jar" /> | |
78 | + | |
79 | + <lib dir="./contrib/langid/lib/" regex=".*\.jar" /> | |
80 | + <lib dir="./dist/" regex="apache-solr-langid-\d.*\.jar" /> | |
81 | + | |
82 | + <lib dir="./contrib/velocity/lib" regex=".*\.jar" /> | |
83 | + <lib dir="./dist/" regex="apache-solr-velocity-\d.*\.jar" /> | |
84 | + | |
85 | + <!-- If a 'dir' option (with or without a regex) is used and nothing | |
86 | + is found that matches, it will be ignored | |
87 | + --> | |
88 | + <lib dir="/total/crap/dir/ignored" /> | |
89 | + | |
90 | + <!-- an exact 'path' can be used instead of a 'dir' to specify a | |
91 | + specific jar file. This will cause a serious error to be logged | |
92 | + if it can't be loaded. | |
93 | + --> | |
94 | + <!-- | |
95 | + <lib path="../a-jar-that-does-not-exist.jar" /> | |
96 | + --> | |
97 | + | |
98 | + <!-- Data Directory | |
99 | + | |
100 | + Used to specify an alternate directory to hold all index data | |
101 | + other than the default ./data under the Solr home. If | |
102 | + replication is in use, this should match the replication | |
103 | + configuration. | |
104 | + --> | |
105 | + <dataDir>${solr.data.dir:}</dataDir> | |
106 | + | |
107 | + | |
108 | + <!-- The DirectoryFactory to use for indexes. | |
109 | + | |
110 | + solr.StandardDirectoryFactory is filesystem | |
111 | + based and tries to pick the best implementation for the current | |
112 | + JVM and platform. solr.NRTCachingDirectoryFactory, the default, | |
113 | + wraps solr.StandardDirectoryFactory and caches small files in memory | |
114 | + for better NRT performance. | |
115 | + | |
116 | + One can force a particular implementation via solr.MMapDirectoryFactory, | |
117 | + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. | |
118 | + | |
119 | + solr.RAMDirectoryFactory is memory based, not | |
120 | + persistent, and doesn't work with replication. | |
121 | + --> | |
122 | + <directoryFactory name="DirectoryFactory" | |
123 | + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> | |
124 | + | |
125 | + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
126 | + Index Config - These settings control low-level behavior of indexing | |
127 | + Most example settings here show the default value, but are commented | |
128 | + out, to more easily see where customizations have been made. | |
129 | + | |
130 | + Note: This replaces <indexDefaults> and <mainIndex> from older versions | |
131 | + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> | |
132 | + <indexConfig> | |
133 | + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a | |
134 | + LimitTokenCountFilterFactory in your fieldType definition. E.g. | |
135 | + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> | |
136 | + --> | |
137 | + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> | |
138 | + <!-- <writeLockTimeout>1000</writeLockTimeout> --> | |
139 | + | |
140 | + <!-- Expert: Enabling compound file will use less files for the index, | |
141 | + using fewer file descriptors on the expense of performance decrease. | |
142 | + Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> | |
143 | + <!-- <useCompoundFile>false</useCompoundFile> --> | |
144 | + | |
145 | + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene | |
146 | + indexing for buffering added documents and deletions before they are | |
147 | + flushed to the Directory. | |
148 | + maxBufferedDocs sets a limit on the number of documents buffered | |
149 | + before flushing. | |
150 | + If both ramBufferSizeMB and maxBufferedDocs is set, then | |
151 | + Lucene will flush based on whichever limit is hit first. --> | |
152 | + <!-- <ramBufferSizeMB>32</ramBufferSizeMB> --> | |
153 | + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> | |
154 | + | |
155 | + <!-- Expert: Merge Policy | |
156 | + The Merge Policy in Lucene controls how merging of segments is done. | |
157 | + The default since Solr/Lucene 3.3 is TieredMergePolicy. | |
158 | + The default since Lucene 2.3 was the LogByteSizeMergePolicy, | |
159 | + Even older versions of Lucene used LogDocMergePolicy. | |
160 | + --> | |
161 | + <!-- | |
162 | + <mergePolicy class="org.apache.lucene.index.TieredMergePolicy"> | |
163 | + <int name="maxMergeAtOnce">10</int> | |
164 | + <int name="segmentsPerTier">10</int> | |
165 | + </mergePolicy> | |
166 | + --> | |
167 | + | |
168 | + <!-- Merge Factor | |
169 | + The merge factor controls how many segments will get merged at a time. | |
170 | + For TieredMergePolicy, mergeFactor is a convenience parameter which | |
171 | + will set both MaxMergeAtOnce and SegmentsPerTier at once. | |
172 | + For LogByteSizeMergePolicy, mergeFactor decides how many new segments | |
173 | + will be allowed before they are merged into one. | |
174 | + Default is 10 for both merge policies. | |
175 | + --> | |
176 | + <!-- | |
177 | + <mergeFactor>10</mergeFactor> | |
178 | + --> | |
179 | + | |
180 | + <!-- Expert: Merge Scheduler | |
181 | + The Merge Scheduler in Lucene controls how merges are | |
182 | + performed. The ConcurrentMergeScheduler (Lucene 2.3 default) | |
183 | + can perform merges in the background using separate threads. | |
184 | + The SerialMergeScheduler (Lucene 2.2 default) does not. | |
185 | + --> | |
186 | + <!-- | |
187 | + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> | |
188 | + --> | |
189 | + | |
190 | + <!-- LockFactory | |
191 | + | |
192 | + This option specifies which Lucene LockFactory implementation | |
193 | + to use. | |
194 | + | |
195 | + single = SingleInstanceLockFactory - suggested for a | |
196 | + read-only index or when there is no possibility of | |
197 | + another process trying to modify the index. | |
198 | + native = NativeFSLockFactory - uses OS native file locking. | |
199 | + Do not use when multiple solr webapps in the same | |
200 | + JVM are attempting to share a single index. | |
201 | + simple = SimpleFSLockFactory - uses a plain file for locking | |
202 | + | |
203 | + Defaults: 'native' is default for Solr3.6 and later, otherwise | |
204 | + 'simple' is the default | |
205 | + | |
206 | + More details on the nuances of each LockFactory... | |
207 | + http://wiki.apache.org/lucene-java/AvailableLockFactories | |
208 | + --> | |
209 | + <!-- <lockType>native</lockType> --> | |
210 | + | |
211 | + <!-- Unlock On Startup | |
212 | + | |
213 | + If true, unlock any held write or commit locks on startup. | |
214 | + This defeats the locking mechanism that allows multiple | |
215 | + processes to safely access a lucene index, and should be used | |
216 | + with care. Default is "false". | |
217 | + | |
218 | + This is not needed if lock type is 'none' or 'single' | |
219 | + --> | |
220 | + <!-- | |
221 | + <unlockOnStartup>false</unlockOnStartup> | |
222 | + --> | |
223 | + | |
224 | + <!-- Expert: Controls how often Lucene loads terms into memory | |
225 | + Default is 128 and is likely good for most everyone. | |
226 | + --> | |
227 | + <!-- <termIndexInterval>128</termIndexInterval> --> | |
228 | + | |
229 | + <!-- If true, IndexReaders will be reopened (often more efficient) | |
230 | + instead of closed and then opened. Default: true | |
231 | + --> | |
232 | + <!-- | |
233 | + <reopenReaders>true</reopenReaders> | |
234 | + --> | |
235 | + | |
236 | + <!-- Commit Deletion Policy | |
237 | + | |
238 | + Custom deletion policies can be specified here. The class must | |
239 | + implement org.apache.lucene.index.IndexDeletionPolicy. | |
240 | + | |
241 | + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html | |
242 | + | |
243 | + The default Solr IndexDeletionPolicy implementation supports | |
244 | + deleting index commit points on number of commits, age of | |
245 | + commit point and optimized status. | |
246 | + | |
247 | + The latest commit point should always be preserved regardless | |
248 | + of the criteria. | |
249 | + --> | |
250 | + <!-- | |
251 | + <deletionPolicy class="solr.SolrDeletionPolicy"> | |
252 | + --> | |
253 | + <!-- The number of commit points to be kept --> | |
254 | + <!-- <str name="maxCommitsToKeep">1</str> --> | |
255 | + <!-- The number of optimized commit points to be kept --> | |
256 | + <!-- <str name="maxOptimizedCommitsToKeep">0</str> --> | |
257 | + <!-- | |
258 | + Delete all commit points once they have reached the given age. | |
259 | + Supports DateMathParser syntax e.g. | |
260 | + --> | |
261 | + <!-- | |
262 | + <str name="maxCommitAge">30MINUTES</str> | |
263 | + <str name="maxCommitAge">1DAY</str> | |
264 | + --> | |
265 | + <!-- | |
266 | + </deletionPolicy> | |
267 | + --> | |
268 | + | |
269 | + <!-- Lucene Infostream | |
270 | + | |
271 | + To aid in advanced debugging, Lucene provides an "InfoStream" | |
272 | + of detailed information when indexing. | |
273 | + | |
274 | + Setting The value to true will instruct the underlying Lucene | |
275 | + IndexWriter to write its debugging info the specified file | |
276 | + --> | |
277 | + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> | |
278 | + </indexConfig> | |
279 | + | |
280 | + | |
281 | + <!-- JMX | |
282 | + | |
283 | + This example enables JMX if and only if an existing MBeanServer | |
284 | + is found, use this if you want to configure JMX through JVM | |
285 | + parameters. Remove this to disable exposing Solr configuration | |
286 | + and statistics to JMX. | |
287 | + | |
288 | + For more details see http://wiki.apache.org/solr/SolrJmx | |
289 | + --> | |
290 | + <jmx /> | |
291 | + <!-- If you want to connect to a particular server, specify the | |
292 | + agentId | |
293 | + --> | |
294 | + <!-- <jmx agentId="myAgent" /> --> | |
295 | + <!-- If you want to start a new MBeanServer, specify the serviceUrl --> | |
296 | + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> | |
297 | + --> | |
298 | + | |
299 | + <!-- The default high-performance update handler --> | |
300 | + <updateHandler class="solr.DirectUpdateHandler2"> | |
301 | + | |
302 | + <!-- AutoCommit | |
303 | + | |
304 | + Perform a hard commit automatically under certain conditions. | |
305 | + Instead of enabling autoCommit, consider using "commitWithin" | |
306 | + when adding documents. | |
307 | + | |
308 | + http://wiki.apache.org/solr/UpdateXmlMessages | |
309 | + | |
310 | + maxDocs - Maximum number of documents to add since the last | |
311 | + commit before automatically triggering a new commit. | |
312 | + | |
313 | + maxTime - Maximum amount of time in ms that is allowed to pass | |
314 | + since a document was added before automaticly | |
315 | + triggering a new commit. | |
316 | + openSearcher - if false, the commit causes recent index changes | |
317 | + to be flushed to stable storage, but does not cause a new | |
318 | + searcher to be opened to make those changes visible. | |
319 | + --> | |
320 | + <autoCommit> | |
321 | + <maxTime>300000</maxTime> | |
322 | + <openSearcher>false</openSearcher> | |
323 | + </autoCommit> | |
324 | + | |
325 | + <!-- softAutoCommit is like autoCommit except it causes a | |
326 | + 'soft' commit which only ensures that changes are visible | |
327 | + but does not ensure that data is synced to disk. This is | |
328 | + faster and more near-realtime friendly than a hard commit. | |
329 | + --> | |
330 | + <!-- | |
331 | + <autoSoftCommit> | |
332 | + <maxTime>1000</maxTime> | |
333 | + </autoSoftCommit> | |
334 | + --> | |
335 | + | |
336 | + <!-- Update Related Event Listeners | |
337 | + | |
338 | + Various IndexWriter related events can trigger Listeners to | |
339 | + take actions. | |
340 | + | |
341 | + postCommit - fired after every commit or optimize command | |
342 | + postOptimize - fired after every optimize command | |
343 | + --> | |
344 | + <!-- The RunExecutableListener executes an external command from a | |
345 | + hook such as postCommit or postOptimize. | |
346 | + | |
347 | + exe - the name of the executable to run | |
348 | + dir - dir to use as the current working directory. (default=".") | |
349 | + wait - the calling thread waits until the executable returns. | |
350 | + (default="true") | |
351 | + args - the arguments to pass to the program. (default is none) | |
352 | + env - environment variables to set. (default is none) | |
353 | + --> | |
354 | + <!-- This example shows how RunExecutableListener could be used | |
355 | + with the script based replication... | |
356 | + http://wiki.apache.org/solr/CollectionDistribution | |
357 | + --> | |
358 | + <!-- | |
359 | + <listener event="postCommit" class="solr.RunExecutableListener"> | |
360 | + <str name="exe">solr/bin/snapshooter</str> | |
361 | + <str name="dir">.</str> | |
362 | + <bool name="wait">true</bool> | |
363 | + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> | |
364 | + <arr name="env"> <str>MYVAR=val1</str> </arr> | |
365 | + </listener> | |
366 | + --> | |
367 | + | |
368 | + <!-- Enables a transaction log, currently used for real-time get. | |
369 | + "dir" - the target directory for transaction logs, defaults to the | |
370 | + solr data directory. --> | |
371 | + <updateLog> | |
372 | + <str name="dir">${solr.data.dir:}</str> | |
373 | + </updateLog> | |
374 | + | |
375 | + | |
376 | + </updateHandler> | |
377 | + | |
378 | + <!-- IndexReaderFactory | |
379 | + | |
380 | + Use the following format to specify a custom IndexReaderFactory, | |
381 | + which allows for alternate IndexReader implementations. | |
382 | + | |
383 | + ** Experimental Feature ** | |
384 | + | |
385 | + Please note - Using a custom IndexReaderFactory may prevent | |
386 | + certain other features from working. The API to | |
387 | + IndexReaderFactory may change without warning or may even be | |
388 | + removed from future releases if the problems cannot be | |
389 | + resolved. | |
390 | + | |
391 | + | |
392 | + ** Features that may not work with custom IndexReaderFactory ** | |
393 | + | |
394 | + The ReplicationHandler assumes a disk-resident index. Using a | |
395 | + custom IndexReader implementation may cause incompatibility | |
396 | + with ReplicationHandler and may cause replication to not work | |
397 | + correctly. See SOLR-1366 for details. | |
398 | + | |
399 | + --> | |
400 | + <!-- | |
401 | + <indexReaderFactory name="IndexReaderFactory" class="package.class"> | |
402 | + <str name="someArg">Some Value</str> | |
403 | + </indexReaderFactory > | |
404 | + --> | |
405 | + <!-- By explicitly declaring the Factory, the termIndexDivisor can | |
406 | + be specified. | |
407 | + --> | |
408 | + <!-- | |
409 | + <indexReaderFactory name="IndexReaderFactory" | |
410 | + class="solr.StandardIndexReaderFactory"> | |
411 | + <int name="setTermIndexDivisor">12</int> | |
412 | + </indexReaderFactory > | |
413 | + --> | |
414 | + | |
415 | + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
416 | + Query section - these settings control query time things like caches | |
417 | + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> | |
418 | + <query> | |
419 | + <!-- Max Boolean Clauses | |
420 | + | |
421 | + Maximum number of clauses in each BooleanQuery, an exception | |
422 | + is thrown if exceeded. | |
423 | + | |
424 | + ** WARNING ** | |
425 | + | |
426 | + This option actually modifies a global Lucene property that | |
427 | + will affect all SolrCores. If multiple solrconfig.xml files | |
428 | + disagree on this property, the value at any given moment will | |
429 | + be based on the last SolrCore to be initialized. | |
430 | + | |
431 | + --> | |
432 | + <maxBooleanClauses>1024</maxBooleanClauses> | |
433 | + | |
434 | + | |
435 | + <!-- Solr Internal Query Caches | |
436 | + | |
437 | + There are two implementations of cache available for Solr, | |
438 | + LRUCache, based on a synchronized LinkedHashMap, and | |
439 | + FastLRUCache, based on a ConcurrentHashMap. | |
440 | + | |
441 | + FastLRUCache has faster gets and slower puts in single | |
442 | + threaded operation and thus is generally faster than LRUCache | |
443 | + when the hit ratio of the cache is high (> 75%), and may be | |
444 | + faster under other scenarios on multi-cpu systems. | |
445 | + --> | |
446 | + | |
447 | + <!-- Filter Cache | |
448 | + | |
449 | + Cache used by SolrIndexSearcher for filters (DocSets), | |
450 | + unordered sets of *all* documents that match a query. When a | |
451 | + new searcher is opened, its caches may be prepopulated or | |
452 | + "autowarmed" using data from caches in the old searcher. | |
453 | + autowarmCount is the number of items to prepopulate. For | |
454 | + LRUCache, the autowarmed items will be the most recently | |
455 | + accessed items. | |
456 | + | |
457 | + Parameters: | |
458 | + class - the SolrCache implementation LRUCache or | |
459 | + (LRUCache or FastLRUCache) | |
460 | + size - the maximum number of entries in the cache | |
461 | + initialSize - the initial capacity (number of entries) of | |
462 | + the cache. (see java.util.HashMap) | |
463 | + autowarmCount - the number of entries to prepopulate from | |
464 | + and old cache. | |
465 | + --> | |
466 | + <filterCache class="solr.FastLRUCache" | |
467 | + size="512" | |
468 | + initialSize="512" | |
469 | + autowarmCount="0"/> | |
470 | + | |
471 | + <!-- Query Result Cache | |
472 | + | |
473 | + Caches results of searches - ordered lists of document ids | |
474 | + (DocList) based on a query, a sort, and the range of documents requested. | |
475 | + --> | |
476 | + <queryResultCache class="solr.LRUCache" | |
477 | + size="512" | |
478 | + initialSize="512" | |
479 | + autowarmCount="0"/> | |
480 | + | |
481 | + <!-- Document Cache | |
482 | + | |
483 | + Caches Lucene Document objects (the stored fields for each | |
484 | + document). Since Lucene internal document ids are transient, | |
485 | + this cache will not be autowarmed. | |
486 | + --> | |
487 | + <documentCache class="solr.LRUCache" | |
488 | + size="512" | |
489 | + initialSize="512" | |
490 | + autowarmCount="0"/> | |
491 | + | |
492 | + <!-- Field Value Cache | |
493 | + | |
494 | + Cache used to hold field values that are quickly accessible | |
495 | + by document id. The fieldValueCache is created by default | |
496 | + even if not configured here. | |
497 | + --> | |
498 | + <!-- | |
499 | + <fieldValueCache class="solr.FastLRUCache" | |
500 | + size="512" | |
501 | + autowarmCount="128" | |
502 | + showItems="32" /> | |
503 | + --> | |
504 | + | |
505 | + <!-- Custom Cache | |
506 | + | |
507 | + Example of a generic cache. These caches may be accessed by | |
508 | + name through SolrIndexSearcher.getCache(),cacheLookup(), and | |
509 | + cacheInsert(). The purpose is to enable easy caching of | |
510 | + user/application level data. The regenerator argument should | |
511 | + be specified as an implementation of solr.CacheRegenerator | |
512 | + if autowarming is desired. | |
513 | + --> | |
514 | + <!-- | |
515 | + <cache name="myUserCache" | |
516 | + class="solr.LRUCache" | |
517 | + size="4096" | |
518 | + initialSize="1024" | |
519 | + autowarmCount="1024" | |
520 | + regenerator="com.mycompany.MyRegenerator" | |
521 | + /> | |
522 | + --> | |
523 | + | |
524 | + | |
525 | + <!-- Lazy Field Loading | |
526 | + | |
527 | + If true, stored fields that are not requested will be loaded | |
528 | + lazily. This can result in a significant speed improvement | |
529 | + if the usual case is to not load all stored fields, | |
530 | + especially if the skipped fields are large compressed text | |
531 | + fields. | |
532 | + --> | |
533 | + <enableLazyFieldLoading>true</enableLazyFieldLoading> | |
534 | + | |
535 | + <!-- Use Filter For Sorted Query | |
536 | + | |
537 | + A possible optimization that attempts to use a filter to | |
538 | + satisfy a search. If the requested sort does not include | |
539 | + score, then the filterCache will be checked for a filter | |
540 | + matching the query. If found, the filter will be used as the | |
541 | + source of document ids, and then the sort will be applied to | |
542 | + that. | |
543 | + | |
544 | + For most situations, this will not be useful unless you | |
545 | + frequently get the same search repeatedly with different sort | |
546 | + options, and none of them ever use "score" | |
547 | + --> | |
548 | + <!-- | |
549 | + <useFilterForSortedQuery>true</useFilterForSortedQuery> | |
550 | + --> | |
551 | + | |
552 | + <!-- Result Window Size | |
553 | + | |
554 | + An optimization for use with the queryResultCache. When a search | |
555 | + is requested, a superset of the requested number of document ids | |
556 | + are collected. For example, if a search for a particular query | |
557 | + requests matching documents 10 through 19, and queryWindowSize is 50, | |
558 | + then documents 0 through 49 will be collected and cached. Any further | |
559 | + requests in that range can be satisfied via the cache. | |
560 | + --> | |
561 | + <queryResultWindowSize>20</queryResultWindowSize> | |
562 | + | |
563 | + <!-- Maximum number of documents to cache for any entry in the | |
564 | + queryResultCache. | |
565 | + --> | |
566 | + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> | |
567 | + | |
568 | + <!-- Query Related Event Listeners | |
569 | + | |
570 | + Various IndexSearcher related events can trigger Listeners to | |
571 | + take actions. | |
572 | + | |
573 | + newSearcher - fired whenever a new searcher is being prepared | |
574 | + and there is a current searcher handling requests (aka | |
575 | + registered). It can be used to prime certain caches to | |
576 | + prevent long request times for certain requests. | |
577 | + | |
578 | + firstSearcher - fired whenever a new searcher is being | |
579 | + prepared but there is no current registered searcher to handle | |
580 | + requests or to gain autowarming data from. | |
581 | + | |
582 | + | |
583 | + --> | |
584 | + <!-- QuerySenderListener takes an array of NamedList and executes a | |
585 | + local query request for each NamedList in sequence. | |
586 | + --> | |
587 | + <listener event="newSearcher" class="solr.QuerySenderListener"> | |
588 | + <arr name="queries"> | |
589 | + <!-- | |
590 | + <lst><str name="q">solr</str><str name="sort">price asc</str></lst> | |
591 | + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> | |
592 | + --> | |
593 | + </arr> | |
594 | + </listener> | |
595 | + <listener event="firstSearcher" class="solr.QuerySenderListener"> | |
596 | + <arr name="queries"> | |
597 | + <lst> | |
598 | + <str name="q">static firstSearcher warming in solrconfig.xml</str> | |
599 | + </lst> | |
600 | + </arr> | |
601 | + </listener> | |
602 | + | |
603 | + <!-- Use Cold Searcher | |
604 | + | |
605 | + If a search request comes in and there is no current | |
606 | + registered searcher, then immediately register the still | |
607 | + warming searcher and use it. If "false" then all requests | |
608 | + will block until the first searcher is done warming. | |
609 | + --> | |
610 | + <useColdSearcher>false</useColdSearcher> | |
611 | + | |
612 | + <!-- Max Warming Searchers | |
613 | + | |
614 | + Maximum number of searchers that may be warming in the | |
615 | + background concurrently. An error is returned if this limit | |
616 | + is exceeded. | |
617 | + | |
618 | + Recommend values of 1-2 for read-only slaves, higher for | |
619 | + masters w/o cache warming. | |
620 | + --> | |
621 | + <maxWarmingSearchers>2</maxWarmingSearchers> | |
622 | + | |
623 | + </query> | |
624 | + | |
625 | + | |
626 | + <!-- Request Dispatcher | |
627 | + | |
628 | + This section contains instructions for how the SolrDispatchFilter | |
629 | + should behave when processing requests for this SolrCore. | |
630 | + | |
631 | + handleSelect is a legacy option that affects the behavior of requests | |
632 | + such as /select?qt=XXX | |
633 | + | |
634 | + handleSelect="true" will cause the SolrDispatchFilter to process | |
635 | + the request and dispatch the query to a handler specified by the | |
636 | + "qt" param, assuming "/select" isn't already registered. | |
637 | + | |
638 | + handleSelect="false" will cause the SolrDispatchFilter to | |
639 | + ignore "/select" requests, resulting in a 404 unless a handler | |
640 | + is explicitly registered with the name "/select" | |
641 | + | |
642 | + handleSelect="true" is not recommended for new users, but is the default | |
643 | + for backwards compatibility | |
644 | + --> | |
645 | + <requestDispatcher handleSelect="false" > | |
646 | + <!-- Request Parsing | |
647 | + | |
648 | + These settings indicate how Solr Requests may be parsed, and | |
649 | + what restrictions may be placed on the ContentStreams from | |
650 | + those requests | |
651 | + | |
652 | + enableRemoteStreaming - enables use of the stream.file | |
653 | + and stream.url parameters for specifying remote streams. | |
654 | + | |
655 | + multipartUploadLimitInKB - specifies the max size of | |
656 | + Multipart File Uploads that Solr will allow in a Request. | |
657 | + | |
658 | + *** WARNING *** | |
659 | + The settings below authorize Solr to fetch remote files, You | |
660 | + should make sure your system has some authentication before | |
661 | + using enableRemoteStreaming="true" | |
662 | + | |
663 | + --> | |
664 | + <requestParsers enableRemoteStreaming="true" | |
665 | + multipartUploadLimitInKB="2048000" /> | |
666 | + | |
667 | + <!-- HTTP Caching | |
668 | + | |
669 | + Set HTTP caching related parameters (for proxy caches and clients). | |
670 | + | |
671 | + The options below instruct Solr not to output any HTTP Caching | |
672 | + related headers | |
673 | + --> | |
674 | + <httpCaching never304="true" /> | |
675 | + <!-- If you include a <cacheControl> directive, it will be used to | |
676 | + generate a Cache-Control header (as well as an Expires header | |
677 | + if the value contains "max-age=") | |
678 | + | |
679 | + By default, no Cache-Control header is generated. | |
680 | + | |
681 | + You can use the <cacheControl> option even if you have set | |
682 | + never304="true" | |
683 | + --> | |
684 | + <!-- | |
685 | + <httpCaching never304="true" > | |
686 | + <cacheControl>max-age=30, public</cacheControl> | |
687 | + </httpCaching> | |
688 | + --> | |
689 | + <!-- To enable Solr to respond with automatically generated HTTP | |
690 | + Caching headers, and to response to Cache Validation requests | |
691 | + correctly, set the value of never304="false" | |
692 | + | |
693 | + This will cause Solr to generate Last-Modified and ETag | |
694 | + headers based on the properties of the Index. | |
695 | + | |
696 | + The following options can also be specified to affect the | |
697 | + values of these headers... | |
698 | + | |
699 | + lastModFrom - the default value is "openTime" which means the | |
700 | + Last-Modified value (and validation against If-Modified-Since | |
701 | + requests) will all be relative to when the current Searcher | |
702 | + was opened. You can change it to lastModFrom="dirLastMod" if | |
703 | + you want the value to exactly correspond to when the physical | |
704 | + index was last modified. | |
705 | + | |
706 | + etagSeed="..." is an option you can change to force the ETag | |
707 | + header (and validation against If-None-Match requests) to be | |
708 | + different even if the index has not changed (ie: when making | |
709 | + significant changes to your config file) | |
710 | + | |
711 | + (lastModifiedFrom and etagSeed are both ignored if you use | |
712 | + the never304="true" option) | |
713 | + --> | |
714 | + <!-- | |
715 | + <httpCaching lastModifiedFrom="openTime" | |
716 | + etagSeed="Solr"> | |
717 | + <cacheControl>max-age=30, public</cacheControl> | |
718 | + </httpCaching> | |
719 | + --> | |
720 | + </requestDispatcher> | |
721 | + | |
722 | + <!-- Request Handlers | |
723 | + | |
724 | + http://wiki.apache.org/solr/SolrRequestHandler | |
725 | + | |
726 | + Incoming queries will be dispatched to a specific handler by name | |
727 | + based on the path specified in the request. | |
728 | + | |
729 | + Legacy behavior: If the request path uses "/select" but no Request | |
730 | + Handler has that name, and if handleSelect="true" has been specified in | |
731 | + the requestDispatcher, then the Request Handler is dispatched based on | |
732 | + the qt parameter. Handlers without a leading '/' are accessed this way | |
733 | + like so: http://host/app/[core/]select?qt=name If no qt is | |
734 | + given, then the requestHandler that declares default="true" will be | |
735 | + used or the one named "standard". | |
736 | + | |
737 | + If a Request Handler is declared with startup="lazy", then it will | |
738 | + not be initialized until the first request that uses it. | |
739 | + | |
740 | + --> | |
741 | + <!-- SearchHandler | |
742 | + | |
743 | + http://wiki.apache.org/solr/SearchHandler | |
744 | + | |
745 | + For processing Search Queries, the primary Request Handler | |
746 | + provided with Solr is "SearchHandler" It delegates to a sequent | |
747 | + of SearchComponents (see below) and supports distributed | |
748 | + queries across multiple shards | |
749 | + --> | |
750 | + <requestHandler name="/select" class="solr.SearchHandler"> | |
751 | + <!-- default values for query parameters can be specified, these | |
752 | + will be overridden by parameters in the request | |
753 | + --> | |
754 | + <lst name="defaults"> | |
755 | + <str name="echoParams">explicit</str> | |
756 | + <int name="rows">10</int> | |
757 | + <str name="df">text</str> | |
758 | + </lst> | |
759 | + <!-- In addition to defaults, "appends" params can be specified | |
760 | + to identify values which should be appended to the list of | |
761 | + multi-val params from the query (or the existing "defaults"). | |
762 | + --> | |
763 | + <!-- In this example, the param "fq=instock:true" would be appended to | |
764 | + any query time fq params the user may specify, as a mechanism for | |
765 | + partitioning the index, independent of any user selected filtering | |
766 | + that may also be desired (perhaps as a result of faceted searching). | |
767 | + | |
768 | + NOTE: there is *absolutely* nothing a client can do to prevent these | |
769 | + "appends" values from being used, so don't use this mechanism | |
770 | + unless you are sure you always want it. | |
771 | + --> | |
772 | + <!-- | |
773 | + <lst name="appends"> | |
774 | + <str name="fq">inStock:true</str> | |
775 | + </lst> | |
776 | + --> | |
777 | + <!-- "invariants" are a way of letting the Solr maintainer lock down | |
778 | + the options available to Solr clients. Any params values | |
779 | + specified here are used regardless of what values may be specified | |
780 | + in either the query, the "defaults", or the "appends" params. | |
781 | + | |
782 | + In this example, the facet.field and facet.query params would | |
783 | + be fixed, limiting the facets clients can use. Faceting is | |
784 | + not turned on by default - but if the client does specify | |
785 | + facet=true in the request, these are the only facets they | |
786 | + will be able to see counts for; regardless of what other | |
787 | + facet.field or facet.query params they may specify. | |
788 | + | |
789 | + NOTE: there is *absolutely* nothing a client can do to prevent these | |
790 | + "invariants" values from being used, so don't use this mechanism | |
791 | + unless you are sure you always want it. | |
792 | + --> | |
793 | + <!-- | |
794 | + <lst name="invariants"> | |
795 | + <str name="facet.field">cat</str> | |
796 | + <str name="facet.field">manu_exact</str> | |
797 | + <str name="facet.query">price:[* TO 500]</str> | |
798 | + <str name="facet.query">price:[500 TO *]</str> | |
799 | + </lst> | |
800 | + --> | |
801 | + <!-- If the default list of SearchComponents is not desired, that | |
802 | + list can either be overridden completely, or components can be | |
803 | + prepended or appended to the default list. (see below) | |
804 | + --> | |
805 | + <!-- | |
806 | + <arr name="components"> | |
807 | + <str>nameOfCustomComponent1</str> | |
808 | + <str>nameOfCustomComponent2</str> | |
809 | + </arr> | |
810 | + --> | |
811 | + | |
812 | + </requestHandler> | |
813 | + | |
814 | + <!-- A request handler that returns indented JSON by default --> | |
815 | + <requestHandler name="/query" class="solr.SearchHandler"> | |
816 | + <lst name="defaults"> | |
817 | + <str name="echoParams">explicit</str> | |
818 | + <str name="wt">json</str> | |
819 | + <str name="indent">true</str> | |
820 | + <str name="df">text</str> | |
821 | + </lst> | |
822 | + </requestHandler> | |
823 | + | |
824 | + | |
825 | + <!-- realtime get handler, guaranteed to return the latest stored fields of | |
826 | + any document, without the need to commit or open a new searcher. The | |
827 | + current implementation relies on the updateLog feature being enabled. --> | |
828 | + <requestHandler name="/get" class="solr.RealTimeGetHandler"> | |
829 | + <lst name="defaults"> | |
830 | + <str name="omitHeader">true</str> | |
831 | + <str name="wt">json</str> | |
832 | + <str name="indent">true</str> | |
833 | + </lst> | |
834 | + </requestHandler> | |
835 | + | |
836 | + | |
837 | + <!-- A Robust Example | |
838 | + | |
839 | + This example SearchHandler declaration shows off usage of the | |
840 | + SearchHandler with many defaults declared | |
841 | + | |
842 | + Note that multiple instances of the same Request Handler | |
843 | + (SearchHandler) can be registered multiple times with different | |
844 | + names (and different init parameters) | |
845 | + --> | |
846 | + <requestHandler name="/browse" class="solr.SearchHandler"> | |
847 | + <lst name="defaults"> | |
848 | + <str name="echoParams">explicit</str> | |
849 | + | |
850 | + <!-- VelocityResponseWriter settings --> | |
851 | + <str name="wt">velocity</str> | |
852 | + <str name="v.template">browse</str> | |
853 | + <str name="v.layout">layout</str> | |
854 | + <str name="title">Solritas</str> | |
855 | + | |
856 | + <!-- Query settings --> | |
857 | + <str name="defType">edismax</str> | |
858 | + <str name="qf"> | |
859 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
860 | + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 | |
861 | + </str> | |
862 | + <str name="df">text</str> | |
863 | + <str name="mm">100%</str> | |
864 | + <str name="q.alt">*:*</str> | |
865 | + <str name="rows">10</str> | |
866 | + <str name="fl">*,score</str> | |
867 | + | |
868 | + <str name="mlt.qf"> | |
869 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
870 | + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 | |
871 | + </str> | |
872 | + <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str> | |
873 | + <int name="mlt.count">3</int> | |
874 | + | |
875 | + <!-- Faceting defaults --> | |
876 | + <str name="facet">on</str> | |
877 | + <str name="facet.field">cat</str> | |
878 | + <str name="facet.field">manu_exact</str> | |
879 | + <str name="facet.field">content_type</str> | |
880 | + <str name="facet.field">author_s</str> | |
881 | + <str name="facet.query">ipod</str> | |
882 | + <str name="facet.query">GB</str> | |
883 | + <str name="facet.mincount">1</str> | |
884 | + <str name="facet.pivot">cat,inStock</str> | |
885 | + <str name="facet.range.other">after</str> | |
886 | + <str name="facet.range">price</str> | |
887 | + <int name="f.price.facet.range.start">0</int> | |
888 | + <int name="f.price.facet.range.end">600</int> | |
889 | + <int name="f.price.facet.range.gap">50</int> | |
890 | + <str name="facet.range">popularity</str> | |
891 | + <int name="f.popularity.facet.range.start">0</int> | |
892 | + <int name="f.popularity.facet.range.end">10</int> | |
893 | + <int name="f.popularity.facet.range.gap">3</int> | |
894 | + <str name="facet.range">manufacturedate_dt</str> | |
895 | + <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str> | |
896 | + <str name="f.manufacturedate_dt.facet.range.end">NOW</str> | |
897 | + <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str> | |
898 | + <str name="f.manufacturedate_dt.facet.range.other">before</str> | |
899 | + <str name="f.manufacturedate_dt.facet.range.other">after</str> | |
900 | + | |
901 | + <!-- Highlighting defaults --> | |
902 | + <str name="hl">on</str> | |
903 | + <str name="hl.fl">content</str> | |
904 | + <str name="hl.encoder">html</str> | |
905 | + <str name="hl.simple.pre"><![CDATA[<b>]]></str> | |
906 | + <str name="hl.simple.post"><![CDATA[</b>]]></str> | |
907 | + <str name="f.title.hl.fragsize">0</str> | |
908 | + <str name="f.title.hl.alternateField">title</str> | |
909 | + <str name="f.name.hl.fragsize">0</str> | |
910 | + <str name="f.name.hl.alternateField">name</str> | |
911 | + <str name="f.content.hl.snippets">3</str> | |
912 | + <str name="f.content.hl.fragsize">1000</str> | |
913 | + <str name="f.content.hl.alternateField">content</str> | |
914 | + <str name="f.content.hl.maxAlternateFieldLength">250</str> | |
915 | + | |
916 | + | |
917 | + <!-- Spell checking defaults --> | |
918 | + <str name="spellcheck">on</str> | |
919 | + <str name="spellcheck.extendedResults">false</str> | |
920 | + <str name="spellcheck.count">5</str> | |
921 | + <str name="spellcheck.alternativeTermCount">2</str> | |
922 | + <str name="spellcheck.maxResultsForSuggest">5</str> | |
923 | + <str name="spellcheck.collate">true</str> | |
924 | + <str name="spellcheck.collateExtendedResults">true</str> | |
925 | + <str name="spellcheck.maxCollationTries">5</str> | |
926 | + <str name="spellcheck.maxCollations">3</str> | |
927 | + </lst> | |
928 | + | |
929 | + <!-- append spellchecking to our list of components --> | |
930 | + <arr name="last-components"> | |
931 | + <str>spellcheck</str> | |
932 | + </arr> | |
933 | + </requestHandler> | |
934 | + | |
935 | + | |
936 | + <!-- Update Request Handler. | |
937 | + | |
938 | + http://wiki.apache.org/solr/UpdateXmlMessages | |
939 | + | |
940 | + The canonical Request Handler for Modifying the Index through | |
941 | + commands specified using XML, JSON, CSV, or JAVABIN | |
942 | + | |
943 | + Note: Since solr1.1 requestHandlers requires a valid content | |
944 | + type header if posted in the body. For example, curl now | |
945 | + requires: -H 'Content-type:text/xml; charset=utf-8' | |
946 | + | |
947 | + To override the request content type and force a specific | |
948 | + Content-type, use the request parameter: | |
949 | + ?update.contentType=text/csv | |
950 | + | |
951 | + This handler will pick a response format to match the input | |
952 | + if the 'wt' parameter is not explicit | |
953 | + --> | |
954 | + <requestHandler name="/update" class="solr.UpdateRequestHandler"> | |
955 | + <!-- See below for information on defining | |
956 | + updateRequestProcessorChains that can be used by name | |
957 | + on each Update Request | |
958 | + --> | |
959 | + <!-- | |
960 | + <lst name="defaults"> | |
961 | + <str name="update.chain">dedupe</str> | |
962 | + </lst> | |
963 | + --> | |
964 | + </requestHandler> | |
965 | + | |
966 | + <!-- for back compat with clients using /update/json and /update/csv --> | |
967 | + <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler"> | |
968 | + <lst name="defaults"> | |
969 | + <str name="stream.contentType">application/json</str> | |
970 | + </lst> | |
971 | + </requestHandler> | |
972 | + <requestHandler name="/update/csv" class="solr.CSVRequestHandler"> | |
973 | + <lst name="defaults"> | |
974 | + <str name="stream.contentType">application/csv</str> | |
975 | + </lst> | |
976 | + </requestHandler> | |
977 | + | |
978 | + <!-- Solr Cell Update Request Handler | |
979 | + | |
980 | + http://wiki.apache.org/solr/ExtractingRequestHandler | |
981 | + | |
982 | + --> | |
983 | + <requestHandler name="/update/extract" | |
984 | + startup="lazy" | |
985 | + class="solr.extraction.ExtractingRequestHandler" > | |
986 | + <lst name="defaults"> | |
987 | + <str name="lowernames">true</str> | |
988 | + <str name="uprefix">ignored_</str> | |
989 | + | |
990 | + <!-- capture link hrefs but ignore div attributes --> | |
991 | + <str name="captureAttr">true</str> | |
992 | + <str name="fmap.a">links</str> | |
993 | + <str name="fmap.div">ignored_</str> | |
994 | + </lst> | |
995 | + </requestHandler> | |
996 | + | |
997 | + | |
998 | + <!-- Field Analysis Request Handler | |
999 | + | |
1000 | + RequestHandler that provides much the same functionality as | |
1001 | + analysis.jsp. Provides the ability to specify multiple field | |
1002 | + types and field names in the same request and outputs | |
1003 | + index-time and query-time analysis for each of them. | |
1004 | + | |
1005 | + Request parameters are: | |
1006 | + analysis.fieldname - field name whose analyzers are to be used | |
1007 | + | |
1008 | + analysis.fieldtype - field type whose analyzers are to be used | |
1009 | + analysis.fieldvalue - text for index-time analysis | |
1010 | + q (or analysis.q) - text for query time analysis | |
1011 | + analysis.showmatch (true|false) - When set to true and when | |
1012 | + query analysis is performed, the produced tokens of the | |
1013 | + field value analysis will be marked as "matched" for every | |
1014 | + token that is produces by the query analysis | |
1015 | + --> | |
1016 | + <requestHandler name="/analysis/field" | |
1017 | + startup="lazy" | |
1018 | + class="solr.FieldAnalysisRequestHandler" /> | |
1019 | + | |
1020 | + | |
1021 | + <!-- Document Analysis Handler | |
1022 | + | |
1023 | + http://wiki.apache.org/solr/AnalysisRequestHandler | |
1024 | + | |
1025 | + An analysis handler that provides a breakdown of the analysis | |
1026 | + process of provided documents. This handler expects a (single) | |
1027 | + content stream with the following format: | |
1028 | + | |
1029 | + <docs> | |
1030 | + <doc> | |
1031 | + <field name="id">1</field> | |
1032 | + <field name="name">The Name</field> | |
1033 | + <field name="text">The Text Value</field> | |
1034 | + </doc> | |
1035 | + <doc>...</doc> | |
1036 | + <doc>...</doc> | |
1037 | + ... | |
1038 | + </docs> | |
1039 | + | |
1040 | + Note: Each document must contain a field which serves as the | |
1041 | + unique key. This key is used in the returned response to associate | |
1042 | + an analysis breakdown to the analyzed document. | |
1043 | + | |
1044 | + Like the FieldAnalysisRequestHandler, this handler also supports | |
1045 | + query analysis by sending either an "analysis.query" or "q" | |
1046 | + request parameter that holds the query text to be analyzed. It | |
1047 | + also supports the "analysis.showmatch" parameter which when set to | |
1048 | + true, all field tokens that match the query tokens will be marked | |
1049 | + as a "match". | |
1050 | + --> | |
1051 | + <requestHandler name="/analysis/document" | |
1052 | + class="solr.DocumentAnalysisRequestHandler" | |
1053 | + startup="lazy" /> | |
1054 | + | |
1055 | + <!-- Admin Handlers | |
1056 | + | |
1057 | + Admin Handlers - This will register all the standard admin | |
1058 | + RequestHandlers. | |
1059 | + --> | |
1060 | + <requestHandler name="/admin/" | |
1061 | + class="solr.admin.AdminHandlers" /> | |
1062 | + <!-- This single handler is equivalent to the following... --> | |
1063 | + <!-- | |
1064 | + <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" /> | |
1065 | + <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" /> | |
1066 | + <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" /> | |
1067 | + <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" /> | |
1068 | + <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" /> | |
1069 | + <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" > | |
1070 | + --> | |
1071 | + <!-- If you wish to hide files under ${solr.home}/conf, explicitly | |
1072 | + register the ShowFileRequestHandler using: | |
1073 | + --> | |
1074 | + <!-- | |
1075 | + <requestHandler name="/admin/file" | |
1076 | + class="solr.admin.ShowFileRequestHandler" > | |
1077 | + <lst name="invariants"> | |
1078 | + <str name="hidden">synonyms.txt</str> | |
1079 | + <str name="hidden">anotherfile.txt</str> | |
1080 | + </lst> | |
1081 | + </requestHandler> | |
1082 | + --> | |
1083 | + | |
1084 | + <!-- ping/healthcheck --> | |
1085 | + <requestHandler name="/admin/ping" class="solr.PingRequestHandler"> | |
1086 | + <lst name="invariants"> | |
1087 | + <str name="q">solrpingquery</str> | |
1088 | + </lst> | |
1089 | + <lst name="defaults"> | |
1090 | + <str name="echoParams">all</str> | |
1091 | + </lst> | |
1092 | + <!-- An optional feature of the PingRequestHandler is to configure the | |
1093 | + handler with a "healthcheckFile" which can be used to enable/disable | |
1094 | + the PingRequestHandler. | |
1095 | + relative paths are resolved against the data dir | |
1096 | + --> | |
1097 | + <!-- <str name="healthcheckFile">server-enabled.txt</str> --> | |
1098 | + </requestHandler> | |
1099 | + | |
1100 | + <!-- Echo the request contents back to the client --> | |
1101 | + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > | |
1102 | + <lst name="defaults"> | |
1103 | + <str name="echoParams">explicit</str> | |
1104 | + <str name="echoHandler">true</str> | |
1105 | + </lst> | |
1106 | + </requestHandler> | |
1107 | + | |
1108 | + <!-- Solr Replication | |
1109 | + | |
1110 | + The SolrReplicationHandler supports replicating indexes from a | |
1111 | + "master" used for indexing and "slaves" used for queries. | |
1112 | + | |
1113 | + http://wiki.apache.org/solr/SolrReplication | |
1114 | + | |
1115 | + It is also neccessary for SolrCloud to function (in Cloud mode, the | |
1116 | + replication handler is used to bulk transfer segments when nodes | |
1117 | + are added or need to recover). | |
1118 | + | |
1119 | + https://wiki.apache.org/solr/SolrCloud/ | |
1120 | + --> | |
1121 | + <requestHandler name="/replication" class="solr.ReplicationHandler" > | |
1122 | + <!-- | |
1123 | + To enable simple master/slave replication, uncomment one of the | |
1124 | + sections below, depending on wether this solr instance should be | |
1125 | + the "master" or a "slave". If this instance is a "slave" you will | |
1126 | + also need to fill in the masterUrl to point to a real machine. | |
1127 | + --> | |
1128 | + <!-- | |
1129 | + <lst name="master"> | |
1130 | + <str name="replicateAfter">commit</str> | |
1131 | + <str name="replicateAfter">startup</str> | |
1132 | + <str name="confFiles">schema.xml,stopwords.txt</str> | |
1133 | + </lst> | |
1134 | + --> | |
1135 | + <!-- | |
1136 | + <lst name="slave"> | |
1137 | + <str name="masterUrl">http://your-master-hostname:8983/solr</str> | |
1138 | + <str name="pollInterval">00:00:60</str> | |
1139 | + </lst> | |
1140 | + --> | |
1141 | + </requestHandler> | |
1142 | + | |
1143 | + <!-- Search Components | |
1144 | + | |
1145 | + Search components are registered to SolrCore and used by | |
1146 | + instances of SearchHandler (which can access them by name) | |
1147 | + | |
1148 | + By default, the following components are available: | |
1149 | + | |
1150 | + <searchComponent name="query" class="solr.QueryComponent" /> | |
1151 | + <searchComponent name="facet" class="solr.FacetComponent" /> | |
1152 | + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" /> | |
1153 | + <searchComponent name="highlight" class="solr.HighlightComponent" /> | |
1154 | + <searchComponent name="stats" class="solr.StatsComponent" /> | |
1155 | + <searchComponent name="debug" class="solr.DebugComponent" /> | |
1156 | + | |
1157 | + Default configuration in a requestHandler would look like: | |
1158 | + | |
1159 | + <arr name="components"> | |
1160 | + <str>query</str> | |
1161 | + <str>facet</str> | |
1162 | + <str>mlt</str> | |
1163 | + <str>highlight</str> | |
1164 | + <str>stats</str> | |
1165 | + <str>debug</str> | |
1166 | + </arr> | |
1167 | + | |
1168 | + If you register a searchComponent to one of the standard names, | |
1169 | + that will be used instead of the default. | |
1170 | + | |
1171 | + To insert components before or after the 'standard' components, use: | |
1172 | + | |
1173 | + <arr name="first-components"> | |
1174 | + <str>myFirstComponentName</str> | |
1175 | + </arr> | |
1176 | + | |
1177 | + <arr name="last-components"> | |
1178 | + <str>myLastComponentName</str> | |
1179 | + </arr> | |
1180 | + | |
1181 | + NOTE: The component registered with the name "debug" will | |
1182 | + always be executed after the "last-components" | |
1183 | + | |
1184 | + --> | |
1185 | + | |
1186 | + <!-- Spell Check | |
1187 | + | |
1188 | + The spell check component can return a list of alternative spelling | |
1189 | + suggestions. | |
1190 | + | |
1191 | + http://wiki.apache.org/solr/SpellCheckComponent | |
1192 | + --> | |
1193 | + <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> | |
1194 | + | |
1195 | + <str name="queryAnalyzerFieldType">textSpell</str> | |
1196 | + | |
1197 | + <!-- Multiple "Spell Checkers" can be declared and used by this | |
1198 | + component | |
1199 | + --> | |
1200 | + | |
1201 | + <!-- a spellchecker built from a field of the main index --> | |
1202 | + <lst name="spellchecker"> | |
1203 | + <str name="name">default</str> | |
1204 | + <str name="field">name</str> | |
1205 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1206 | + <!-- the spellcheck distance measure used, the default is the internal levenshtein --> | |
1207 | + <str name="distanceMeasure">internal</str> | |
1208 | + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> | |
1209 | + <float name="accuracy">0.5</float> | |
1210 | + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> | |
1211 | + <int name="maxEdits">2</int> | |
1212 | + <!-- the minimum shared prefix when enumerating terms --> | |
1213 | + <int name="minPrefix">1</int> | |
1214 | + <!-- maximum number of inspections per result. --> | |
1215 | + <int name="maxInspections">5</int> | |
1216 | + <!-- minimum length of a query term to be considered for correction --> | |
1217 | + <int name="minQueryLength">4</int> | |
1218 | + <!-- maximum threshold of documents a query term can appear to be considered for correction --> | |
1219 | + <float name="maxQueryFrequency">0.01</float> | |
1220 | + <!-- uncomment this to require suggestions to occur in 1% of the documents | |
1221 | + <float name="thresholdTokenFrequency">.01</float> | |
1222 | + --> | |
1223 | + </lst> | |
1224 | + | |
1225 | + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> | |
1226 | + <lst name="spellchecker"> | |
1227 | + <str name="name">wordbreak</str> | |
1228 | + <str name="classname">solr.WordBreakSolrSpellChecker</str> | |
1229 | + <str name="field">name</str> | |
1230 | + <str name="combineWords">true</str> | |
1231 | + <str name="breakWords">true</str> | |
1232 | + <int name="maxChanges">10</int> | |
1233 | + </lst> | |
1234 | + | |
1235 | + <!-- a spellchecker that uses a different distance measure --> | |
1236 | + <!-- | |
1237 | + <lst name="spellchecker"> | |
1238 | + <str name="name">jarowinkler</str> | |
1239 | + <str name="field">spell</str> | |
1240 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1241 | + <str name="distanceMeasure"> | |
1242 | + org.apache.lucene.search.spell.JaroWinklerDistance | |
1243 | + </str> | |
1244 | + </lst> | |
1245 | + --> | |
1246 | + | |
1247 | + <!-- a spellchecker that use an alternate comparator | |
1248 | + | |
1249 | + comparatorClass be one of: | |
1250 | + 1. score (default) | |
1251 | + 2. freq (Frequency first, then score) | |
1252 | + 3. A fully qualified class name | |
1253 | + --> | |
1254 | + <!-- | |
1255 | + <lst name="spellchecker"> | |
1256 | + <str name="name">freq</str> | |
1257 | + <str name="field">lowerfilt</str> | |
1258 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1259 | + <str name="comparatorClass">freq</str> | |
1260 | + --> | |
1261 | + | |
1262 | + <!-- A spellchecker that reads the list of words from a file --> | |
1263 | + <!-- | |
1264 | + <lst name="spellchecker"> | |
1265 | + <str name="classname">solr.FileBasedSpellChecker</str> | |
1266 | + <str name="name">file</str> | |
1267 | + <str name="sourceLocation">spellings.txt</str> | |
1268 | + <str name="characterEncoding">UTF-8</str> | |
1269 | + <str name="spellcheckIndexDir">spellcheckerFile</str> | |
1270 | + </lst> | |
1271 | + --> | |
1272 | + </searchComponent> | |
1273 | + | |
1274 | + <!-- A request handler for demonstrating the spellcheck component. | |
1275 | + | |
1276 | + NOTE: This is purely as an example. The whole purpose of the | |
1277 | + SpellCheckComponent is to hook it into the request handler that | |
1278 | + handles your normal user queries so that a separate request is | |
1279 | + not needed to get suggestions. | |
1280 | + | |
1281 | + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS | |
1282 | + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! | |
1283 | + | |
1284 | + See http://wiki.apache.org/solr/SpellCheckComponent for details | |
1285 | + on the request parameters. | |
1286 | + --> | |
1287 | + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> | |
1288 | + <lst name="defaults"> | |
1289 | + <str name="df">text</str> | |
1290 | + <!-- Solr will use suggestions from both the 'default' spellchecker | |
1291 | + and from the 'wordbreak' spellchecker and combine them. | |
1292 | + collations (re-written queries) can include a combination of | |
1293 | + corrections from both spellcheckers --> | |
1294 | + <str name="spellcheck.dictionary">default</str> | |
1295 | + <str name="spellcheck.dictionary">wordbreak</str> | |
1296 | + <str name="spellcheck">on</str> | |
1297 | + <str name="spellcheck.extendedResults">true</str> | |
1298 | + <str name="spellcheck.count">10</str> | |
1299 | + <str name="spellcheck.alternativeTermCount">5</str> | |
1300 | + <str name="spellcheck.maxResultsForSuggest">5</str> | |
1301 | + <str name="spellcheck.collate">true</str> | |
1302 | + <str name="spellcheck.collateExtendedResults">true</str> | |
1303 | + <str name="spellcheck.maxCollationTries">10</str> | |
1304 | + <str name="spellcheck.maxCollations">5</str> | |
1305 | + </lst> | |
1306 | + <arr name="last-components"> | |
1307 | + <str>spellcheck</str> | |
1308 | + </arr> | |
1309 | + </requestHandler> | |
1310 | + | |
1311 | + <!-- Term Vector Component | |
1312 | + | |
1313 | + http://wiki.apache.org/solr/TermVectorComponent | |
1314 | + --> | |
1315 | + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> | |
1316 | + | |
1317 | + <!-- A request handler for demonstrating the term vector component | |
1318 | + | |
1319 | + This is purely as an example. | |
1320 | + | |
1321 | + In reality you will likely want to add the component to your | |
1322 | + already specified request handlers. | |
1323 | + --> | |
1324 | + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> | |
1325 | + <lst name="defaults"> | |
1326 | + <str name="df">text</str> | |
1327 | + <bool name="tv">true</bool> | |
1328 | + </lst> | |
1329 | + <arr name="last-components"> | |
1330 | + <str>tvComponent</str> | |
1331 | + </arr> | |
1332 | + </requestHandler> | |
1333 | + | |
1334 | + <!-- Clustering Component | |
1335 | + | |
1336 | + http://wiki.apache.org/solr/ClusteringComponent | |
1337 | + | |
1338 | + You'll need to set the solr.cluster.enabled system property | |
1339 | + when running solr to run with clustering enabled: | |
1340 | + | |
1341 | + java -Dsolr.clustering.enabled=true -jar start.jar | |
1342 | + | |
1343 | + --> | |
1344 | + <searchComponent name="clustering" | |
1345 | + enable="${solr.clustering.enabled:false}" | |
1346 | + class="solr.clustering.ClusteringComponent" > | |
1347 | + <!-- Declare an engine --> | |
1348 | + <lst name="engine"> | |
1349 | + <!-- The name, only one can be named "default" --> | |
1350 | + <str name="name">default</str> | |
1351 | + | |
1352 | + <!-- Class name of Carrot2 clustering algorithm. | |
1353 | + | |
1354 | + Currently available algorithms are: | |
1355 | + | |
1356 | + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm | |
1357 | + * org.carrot2.clustering.stc.STCClusteringAlgorithm | |
1358 | + * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm | |
1359 | + | |
1360 | + See http://project.carrot2.org/algorithms.html for the | |
1361 | + algorithm's characteristics. | |
1362 | + --> | |
1363 | + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str> | |
1364 | + | |
1365 | + <!-- Overriding values for Carrot2 default algorithm attributes. | |
1366 | + | |
1367 | + For a description of all available attributes, see: | |
1368 | + http://download.carrot2.org/stable/manual/#chapter.components. | |
1369 | + Use attribute key as name attribute of str elements | |
1370 | + below. These can be further overridden for individual | |
1371 | + requests by specifying attribute key as request parameter | |
1372 | + name and attribute value as parameter value. | |
1373 | + --> | |
1374 | + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str> | |
1375 | + | |
1376 | + <!-- Location of Carrot2 lexical resources. | |
1377 | + | |
1378 | + A directory from which to load Carrot2-specific stop words | |
1379 | + and stop labels. Absolute or relative to Solr config directory. | |
1380 | + If a specific resource (e.g. stopwords.en) is present in the | |
1381 | + specified dir, it will completely override the corresponding | |
1382 | + default one that ships with Carrot2. | |
1383 | + | |
1384 | + For an overview of Carrot2 lexical resources, see: | |
1385 | + http://download.carrot2.org/head/manual/#chapter.lexical-resources | |
1386 | + --> | |
1387 | + <str name="carrot.lexicalResourcesDir">clustering/carrot2</str> | |
1388 | + | |
1389 | + <!-- The language to assume for the documents. | |
1390 | + | |
1391 | + For a list of allowed values, see: | |
1392 | + http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage | |
1393 | + --> | |
1394 | + <str name="MultilingualClustering.defaultLanguage">PORTUGUESE</str> | |
1395 | + </lst> | |
1396 | + <lst name="engine"> | |
1397 | + <str name="name">stc</str> | |
1398 | + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str> | |
1399 | + </lst> | |
1400 | + </searchComponent> | |
1401 | + | |
1402 | + <!-- A request handler for demonstrating the clustering component | |
1403 | + | |
1404 | + This is purely as an example. | |
1405 | + | |
1406 | + In reality you will likely want to add the component to your | |
1407 | + already specified request handlers. | |
1408 | + --> | |
1409 | + <requestHandler name="/clustering" | |
1410 | + startup="lazy" | |
1411 | + enable="${solr.clustering.enabled:false}" | |
1412 | + class="solr.SearchHandler"> | |
1413 | + <lst name="defaults"> | |
1414 | + <bool name="clustering">true</bool> | |
1415 | + <str name="clustering.engine">default</str> | |
1416 | + <bool name="clustering.results">true</bool> | |
1417 | + <!-- The title field --> | |
1418 | + <str name="carrot.title">name</str> | |
1419 | + <str name="carrot.url">id</str> | |
1420 | + <!-- The field to cluster on --> | |
1421 | + <str name="carrot.snippet">features</str> | |
1422 | + <!-- produce summaries --> | |
1423 | + <bool name="carrot.produceSummary">true</bool> | |
1424 | + <!-- the maximum number of labels per cluster --> | |
1425 | + <!--<int name="carrot.numDescriptions">5</int>--> | |
1426 | + <!-- produce sub clusters --> | |
1427 | + <bool name="carrot.outputSubClusters">false</bool> | |
1428 | + | |
1429 | + <str name="defType">edismax</str> | |
1430 | + <str name="qf"> | |
1431 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
1432 | + </str> | |
1433 | + <str name="q.alt">*:*</str> | |
1434 | + <str name="rows">10</str> | |
1435 | + <str name="fl">*,score</str> | |
1436 | + </lst> | |
1437 | + <arr name="last-components"> | |
1438 | + <str>clustering</str> | |
1439 | + </arr> | |
1440 | + </requestHandler> | |
1441 | + | |
1442 | + <!-- Terms Component | |
1443 | + | |
1444 | + http://wiki.apache.org/solr/TermsComponent | |
1445 | + | |
1446 | + A component to return terms and document frequency of those | |
1447 | + terms | |
1448 | + --> | |
1449 | + <searchComponent name="terms" class="solr.TermsComponent"/> | |
1450 | + | |
1451 | + <!-- A request handler for demonstrating the terms component --> | |
1452 | + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> | |
1453 | + <lst name="defaults"> | |
1454 | + <bool name="terms">true</bool> | |
1455 | + <bool name="distrib">false</bool> | |
1456 | + </lst> | |
1457 | + <arr name="components"> | |
1458 | + <str>terms</str> | |
1459 | + </arr> | |
1460 | + </requestHandler> | |
1461 | + | |
1462 | + | |
1463 | + <!-- Query Elevation Component | |
1464 | + | |
1465 | + http://wiki.apache.org/solr/QueryElevationComponent | |
1466 | + | |
1467 | + a search component that enables you to configure the top | |
1468 | + results for a given query regardless of the normal lucene | |
1469 | + scoring. | |
1470 | + --> | |
1471 | + <searchComponent name="elevator" class="solr.QueryElevationComponent" > | |
1472 | + <!-- pick a fieldType to analyze queries --> | |
1473 | + <str name="queryFieldType">string</str> | |
1474 | + <str name="config-file">elevate.xml</str> | |
1475 | + </searchComponent> | |
1476 | + | |
1477 | + <!-- A request handler for demonstrating the elevator component --> | |
1478 | + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> | |
1479 | + <lst name="defaults"> | |
1480 | + <str name="echoParams">explicit</str> | |
1481 | + <str name="df">text</str> | |
1482 | + </lst> | |
1483 | + <arr name="last-components"> | |
1484 | + <str>elevator</str> | |
1485 | + </arr> | |
1486 | + </requestHandler> | |
1487 | + | |
1488 | + <!-- Highlighting Component | |
1489 | + | |
1490 | + http://wiki.apache.org/solr/HighlightingParameters | |
1491 | + --> | |
1492 | + <searchComponent class="solr.HighlightComponent" name="highlight"> | |
1493 | + <highlighting> | |
1494 | + <!-- Configure the standard fragmenter --> | |
1495 | + <!-- This could most likely be commented out in the "default" case --> | |
1496 | + <fragmenter name="gap" | |
1497 | + default="true" | |
1498 | + class="solr.highlight.GapFragmenter"> | |
1499 | + <lst name="defaults"> | |
1500 | + <int name="hl.fragsize">250</int> | |
1501 | + </lst> | |
1502 | + </fragmenter> | |
1503 | + | |
1504 | + <!-- A regular-expression-based fragmenter | |
1505 | + (for sentence extraction) | |
1506 | + --> | |
1507 | + <fragmenter name="regex" | |
1508 | + class="solr.highlight.RegexFragmenter"> | |
1509 | + <lst name="defaults"> | |
1510 | + <!-- slightly smaller fragsizes work better because of slop --> | |
1511 | + <int name="hl.fragsize">250</int> | |
1512 | + <!-- allow 50% slop on fragment sizes --> | |
1513 | + <float name="hl.regex.slop">0.5</float> | |
1514 | + <!-- a basic sentence pattern --> | |
1515 | + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> | |
1516 | + </lst> | |
1517 | + </fragmenter> | |
1518 | + | |
1519 | + <!-- Configure the standard formatter --> | |
1520 | + <formatter name="html" | |
1521 | + default="true" | |
1522 | + class="solr.highlight.HtmlFormatter"> | |
1523 | + <lst name="defaults"> | |
1524 | + <str name="hl.simple.pre"><![CDATA[<b>]]></str> | |
1525 | + <str name="hl.simple.post"><![CDATA[</b>]]></str> | |
1526 | + </lst> | |
1527 | + </formatter> | |
1528 | + | |
1529 | + <!-- Configure the standard encoder --> | |
1530 | + <encoder name="html" | |
1531 | + class="solr.highlight.HtmlEncoder" /> | |
1532 | + | |
1533 | + <!-- Configure the standard fragListBuilder --> | |
1534 | + <fragListBuilder name="simple" | |
1535 | + class="solr.highlight.SimpleFragListBuilder"/> | |
1536 | + | |
1537 | + <!-- Configure the single fragListBuilder --> | |
1538 | + <fragListBuilder name="single" | |
1539 | + class="solr.highlight.SingleFragListBuilder"/> | |
1540 | + | |
1541 | + <!-- Configure the weighted fragListBuilder --> | |
1542 | + <fragListBuilder name="weighted" | |
1543 | + default="true" | |
1544 | + class="solr.highlight.WeightedFragListBuilder"/> | |
1545 | + | |
1546 | + <!-- default tag FragmentsBuilder --> | |
1547 | + <fragmentsBuilder name="default" | |
1548 | + default="true" | |
1549 | + class="solr.highlight.ScoreOrderFragmentsBuilder"> | |
1550 | + <!-- | |
1551 | + <lst name="defaults"> | |
1552 | + <str name="hl.multiValuedSeparatorChar">/</str> | |
1553 | + </lst> | |
1554 | + --> | |
1555 | + </fragmentsBuilder> | |
1556 | + | |
1557 | + <!-- multi-colored tag FragmentsBuilder --> | |
1558 | + <fragmentsBuilder name="colored" | |
1559 | + class="solr.highlight.ScoreOrderFragmentsBuilder"> | |
1560 | + <lst name="defaults"> | |
1561 | + <str name="hl.tag.pre"><![CDATA[ | |
1562 | + <b style="background:yellow">,<b style="background:lawgreen">, | |
1563 | + <b style="background:aquamarine">,<b style="background:magenta">, | |
1564 | + <b style="background:palegreen">,<b style="background:coral">, | |
1565 | + <b style="background:wheat">,<b style="background:khaki">, | |
1566 | + <b style="background:lime">,<b style="background:deepskyblue">]]></str> | |
1567 | + <str name="hl.tag.post"><![CDATA[</b>]]></str> | |
1568 | + </lst> | |
1569 | + </fragmentsBuilder> | |
1570 | + | |
1571 | + <boundaryScanner name="default" | |
1572 | + default="true" | |
1573 | + class="solr.highlight.SimpleBoundaryScanner"> | |
1574 | + <lst name="defaults"> | |
1575 | + <str name="hl.bs.maxScan">10</str> | |
1576 | + <str name="hl.bs.chars">.,!? | |
1577 | + | |
1578 | +</str> | |
1579 | + </lst> | |
1580 | + </boundaryScanner> | |
1581 | + | |
1582 | + <boundaryScanner name="breakIterator" | |
1583 | + class="solr.highlight.BreakIteratorBoundaryScanner"> | |
1584 | + <lst name="defaults"> | |
1585 | + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE --> | |
1586 | + <str name="hl.bs.type">WORD</str> | |
1587 | + <!-- language and country are used when constructing Locale object. --> | |
1588 | + <!-- And the Locale object will be used when getting instance of BreakIterator --> | |
1589 | + <str name="hl.bs.language">pt</str> | |
1590 | + <str name="hl.bs.country">BR</str> | |
1591 | + </lst> | |
1592 | + </boundaryScanner> | |
1593 | + </highlighting> | |
1594 | + </searchComponent> | |
1595 | + | |
1596 | + <!-- Update Processors | |
1597 | + | |
1598 | + Chains of Update Processor Factories for dealing with Update | |
1599 | + Requests can be declared, and then used by name in Update | |
1600 | + Request Processors | |
1601 | + | |
1602 | + http://wiki.apache.org/solr/UpdateRequestProcessor | |
1603 | + | |
1604 | + --> | |
1605 | + <!-- Deduplication | |
1606 | + | |
1607 | + An example dedup update processor that creates the "id" field | |
1608 | + on the fly based on the hash code of some other fields. This | |
1609 | + example has overwriteDupes set to false since we are using the | |
1610 | + id field as the signatureField and Solr will maintain | |
1611 | + uniqueness based on that anyway. | |
1612 | + | |
1613 | + --> | |
1614 | + <!-- | |
1615 | + <updateRequestProcessorChain name="dedupe"> | |
1616 | + <processor class="solr.processor.SignatureUpdateProcessorFactory"> | |
1617 | + <bool name="enabled">true</bool> | |
1618 | + <str name="signatureField">id</str> | |
1619 | + <bool name="overwriteDupes">false</bool> | |
1620 | + <str name="fields">name,features,cat</str> | |
1621 | + <str name="signatureClass">solr.processor.Lookup3Signature</str> | |
1622 | + </processor> | |
1623 | + <processor class="solr.LogUpdateProcessorFactory" /> | |
1624 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1625 | + </updateRequestProcessorChain> | |
1626 | + --> | |
1627 | + | |
1628 | + <!-- Language identification | |
1629 | + | |
1630 | + This example update chain identifies the language of the incoming | |
1631 | + documents using the langid contrib. The detected language is | |
1632 | + written to field language_s. No field name mapping is done. | |
1633 | + The fields used for detection are text, title, subject and description, | |
1634 | + making this example suitable for detecting languages form full-text | |
1635 | + rich documents injected via ExtractingRequestHandler. | |
1636 | + See more about langId at http://wiki.apache.org/solr/LanguageDetection | |
1637 | + --> | |
1638 | + <!-- | |
1639 | + <updateRequestProcessorChain name="langid"> | |
1640 | + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory"> | |
1641 | + <str name="langid.fl">text,title,subject,description</str> | |
1642 | + <str name="langid.langField">language_s</str> | |
1643 | + <str name="langid.fallback">en</str> | |
1644 | + </processor> | |
1645 | + <processor class="solr.LogUpdateProcessorFactory" /> | |
1646 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1647 | + </updateRequestProcessorChain> | |
1648 | + --> | |
1649 | + | |
1650 | + <!-- Script update processor | |
1651 | + | |
1652 | + This example hooks in an update processor implemented using JavaScript. | |
1653 | + | |
1654 | + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor | |
1655 | + --> | |
1656 | + <!-- | |
1657 | + <updateRequestProcessorChain name="script"> | |
1658 | + <processor class="solr.StatelessScriptUpdateProcessorFactory"> | |
1659 | + <str name="script">update-script.js</str> | |
1660 | + <lst name="params"> | |
1661 | + <str name="config_param">example config parameter</str> | |
1662 | + </lst> | |
1663 | + </processor> | |
1664 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1665 | + </updateRequestProcessorChain> | |
1666 | + --> | |
1667 | + | |
1668 | + <!-- Response Writers | |
1669 | + | |
1670 | + http://wiki.apache.org/solr/QueryResponseWriter | |
1671 | + | |
1672 | + Request responses will be written using the writer specified by | |
1673 | + the 'wt' request parameter matching the name of a registered | |
1674 | + writer. | |
1675 | + | |
1676 | + The "default" writer is the default and will be used if 'wt' is | |
1677 | + not specified in the request. | |
1678 | + --> | |
1679 | + <!-- The following response writers are implicitly configured unless | |
1680 | + overridden... | |
1681 | + --> | |
1682 | + <!-- | |
1683 | + <queryResponseWriter name="xml" | |
1684 | + default="true" | |
1685 | + class="solr.XMLResponseWriter" /> | |
1686 | + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> | |
1687 | + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> | |
1688 | + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> | |
1689 | + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> | |
1690 | + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> | |
1691 | + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> | |
1692 | + --> | |
1693 | + | |
1694 | + <queryResponseWriter name="json" class="solr.JSONResponseWriter"> | |
1695 | + <!-- For the purposes of the tutorial, JSON responses are written as | |
1696 | + plain text so that they are easy to read in *any* browser. | |
1697 | + If you expect a MIME type of "application/json" just remove this override. | |
1698 | + --> | |
1699 | + <str name="content-type">text/plain; charset=UTF-8</str> | |
1700 | + </queryResponseWriter> | |
1701 | + | |
1702 | + <!-- | |
1703 | + Custom response writers can be declared as needed... | |
1704 | + --> | |
1705 | + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/> | |
1706 | + | |
1707 | + | |
1708 | + <!-- XSLT response writer transforms the XML output by any xslt file found | |
1709 | + in Solr's conf/xslt directory. Changes to xslt files are checked for | |
1710 | + every xsltCacheLifetimeSeconds. | |
1711 | + --> | |
1712 | + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> | |
1713 | + <int name="xsltCacheLifetimeSeconds">5</int> | |
1714 | + </queryResponseWriter> | |
1715 | + | |
1716 | + <!-- Query Parsers | |
1717 | + | |
1718 | + http://wiki.apache.org/solr/SolrQuerySyntax | |
1719 | + | |
1720 | + Multiple QParserPlugins can be registered by name, and then | |
1721 | + used in either the "defType" param for the QueryComponent (used | |
1722 | + by SearchHandler) or in LocalParams | |
1723 | + --> | |
1724 | + <!-- example of registering a query parser --> | |
1725 | + <!-- | |
1726 | + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/> | |
1727 | + --> | |
1728 | + | |
1729 | + <!-- Function Parsers | |
1730 | + | |
1731 | + http://wiki.apache.org/solr/FunctionQuery | |
1732 | + | |
1733 | + Multiple ValueSourceParsers can be registered by name, and then | |
1734 | + used as function names when using the "func" QParser. | |
1735 | + --> | |
1736 | + <!-- example of registering a custom function parser --> | |
1737 | + <!-- | |
1738 | + <valueSourceParser name="myfunc" | |
1739 | + class="com.mycompany.MyValueSourceParser" /> | |
1740 | + --> | |
1741 | + | |
1742 | + | |
1743 | + <!-- Document Transformers | |
1744 | + http://wiki.apache.org/solr/DocTransformers | |
1745 | + --> | |
1746 | + <!-- | |
1747 | + Could be something like: | |
1748 | + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" > | |
1749 | + <int name="connection">jdbc://....</int> | |
1750 | + </transformer> | |
1751 | + | |
1752 | + To add a constant value to all docs, use: | |
1753 | + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" > | |
1754 | + <int name="value">5</int> | |
1755 | + </transformer> | |
1756 | + | |
1757 | + If you want the user to still be able to change it with _value:something_ use this: | |
1758 | + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" > | |
1759 | + <double name="defaultValue">5</double> | |
1760 | + </transformer> | |
1761 | + | |
1762 | + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The | |
1763 | + EditorialMarkerFactory will do exactly that: | |
1764 | + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" /> | |
1765 | + --> | |
1766 | + | |
1767 | + | |
1768 | + <!-- Legacy config for the admin interface --> | |
1769 | + <admin> | |
1770 | + <defaultQuery>*:*</defaultQuery> | |
1771 | + </admin> | |
1772 | + | |
1773 | +</config> | ... | ... |
1 | +++ a/index/sei-protocolos-schema.xml | |
... | ... | @@ -0,0 +1,1163 @@ |
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<!-- | |
3 | + Licensed to the Apache Software Foundation (ASF) under one or more | |
4 | + contributor license agreements. See the NOTICE file distributed with | |
5 | + this work for additional information regarding copyright ownership. | |
6 | + The ASF licenses this file to You under the Apache License, Version 2.0 | |
7 | + (the "License"); you may not use this file except in compliance with | |
8 | + the License. You may obtain a copy of the License at | |
9 | + | |
10 | + http://www.apache.org/licenses/LICENSE-2.0 | |
11 | + | |
12 | + Unless required by applicable law or agreed to in writing, software | |
13 | + distributed under the License is distributed on an "AS IS" BASIS, | |
14 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | + See the License for the specific language governing permissions and | |
16 | + limitations under the License. | |
17 | +--> | |
18 | + | |
19 | +<!-- | |
20 | + This is the Solr schema file. This file should be named "schema.xml" and | |
21 | + should be in the conf directory under the solr home | |
22 | + (i.e. ./solr/conf/schema.xml by default) | |
23 | + or located where the classloader for the Solr webapp can find it. | |
24 | + | |
25 | + This example schema is the recommended starting point for users. | |
26 | + It should be kept correct and concise, usable out-of-the-box. | |
27 | + | |
28 | + For more information, on how to customize this file, please see | |
29 | + http://wiki.apache.org/solr/SchemaXml | |
30 | + | |
31 | + PERFORMANCE NOTE: this schema includes many optional features and should not | |
32 | + be used for benchmarking. To improve performance one could | |
33 | + - set stored="false" for all fields possible (esp large fields) when you | |
34 | + only need to search on the field but don't need to return the original | |
35 | + value. | |
36 | + - set indexed="false" if you don't need to search on the field, but only | |
37 | + return the field as a result of searching on other indexed fields. | |
38 | + - remove all unneeded copyField statements | |
39 | + - for best index size and searching performance, set "index" to false | |
40 | + for all general text fields, use copyField to copy them to the | |
41 | + catchall "text" field, and use that for searching. | |
42 | + - For maximum indexing performance, use the StreamingUpdateSolrServer | |
43 | + java client. | |
44 | + - Remember to run the JVM in server mode, and use a higher logging level | |
45 | + that avoids logging every request | |
46 | +--> | |
47 | + | |
48 | +<schema name="sei-protocolos" version="1.5"> | |
49 | + <!-- attribute "name" is the name of this schema and is only used for display purposes. | |
50 | + version="x.y" is Solr's version number for the schema syntax and | |
51 | + semantics. It should not normally be changed by applications. | |
52 | + | |
53 | + 1.0: multiValued attribute did not exist, all fields are multiValued | |
54 | + by nature | |
55 | + 1.1: multiValued attribute introduced, false by default | |
56 | + 1.2: omitTermFreqAndPositions attribute introduced, true by default | |
57 | + except for text fields. | |
58 | + 1.3: removed optional field compress feature | |
59 | + 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser | |
60 | + behavior when a single string produces multiple tokens. Defaults | |
61 | + to off for version >= 1.4 | |
62 | + 1.5: omitNorms defaults to true for primitive field types | |
63 | + (int, float, boolean, string...) | |
64 | + --> | |
65 | + | |
66 | + <fields> | |
67 | + | |
68 | + | |
69 | + <field name="idx_descricao" type="text_general" indexed="true" stored="false" /> | |
70 | + <field name="numero" type="string" indexed="true" stored="true" /> | |
71 | + <field name="id_assinante" type="string" indexed="true" stored="false" /> | |
72 | + <field name="id_protocolo" type="string" indexed="false" stored="true" /> | |
73 | + <field name="id_unidade_geradora" type="string" indexed="true" stored="false" /> | |
74 | + <field name="id_serie" type="string" indexed="true" stored="false" /> | |
75 | + <field name="dta_geracao" type="date" indexed="true" stored="true" /> | |
76 | + <field name="sigla_usuario_gerador" type="string" indexed="true" stored="true" /> | |
77 | + <field name="nome_usuario_gerador" type="string" indexed="false" stored="true" /> | |
78 | + <field name="sta_protocolo" type="string" indexed="true" stored="false" /> | |
79 | + <field name="id_assunto" type="string" indexed="true" stored="false" /> | |
80 | + <field name="id_unidade_aberto" type="string" indexed="true" stored="false" /> | |
81 | + <field name="id_unidade_acesso" type="string" indexed="true" stored="false" /> | |
82 | + <field name="id_tipo_processo" type="string" indexed="true" stored="false" /> | |
83 | + <field name="nome_tipo_processo" type="string" indexed="false" stored="true" /> | |
84 | + <field name="sigla_unidade_geradora" type="string" indexed="false" stored="true" /> | |
85 | + <field name="descricao_unidade_geradora" type="string" indexed="false" stored="true" /> | |
86 | + <field name="id_participante" type="string" indexed="true" stored="false" /> | |
87 | + <field name="tipo_acesso" type="string" indexed="true" stored="false" /> | |
88 | + <field name="identificacao_protocolo" type="string" indexed="false" stored="true" /> | |
89 | + <field name="protocolo_formatado_pesquisa" type="string" indexed="true" stored="false" /> | |
90 | + <field name="protocolo_processo_formatado" type="string" indexed="false" stored="true" /> | |
91 | + <field name="protocolo_documento_formatado" type="string" indexed="true" stored="true" /> | |
92 | + <field name="link_arvore" type="string" indexed="false" stored="true" /> | |
93 | + <dynamicField name="idx_observacao_*" type="text_general" indexed="true" stored="false" /> | |
94 | + | |
95 | + <!-- Valid attributes for fields: | |
96 | + name: mandatory - the name for the field | |
97 | + type: mandatory - the name of a field type from the | |
98 | + <types> fieldType section | |
99 | + indexed: true if this field should be indexed (searchable or sortable) | |
100 | + stored: true if this field should be retrievable | |
101 | + multiValued: true if this field may contain multiple values per document | |
102 | + omitNorms: (expert) set to true to omit the norms associated with | |
103 | + this field (this disables length normalization and index-time | |
104 | + boosting for the field, and saves some memory). Only full-text | |
105 | + fields or fields that need an index-time boost need norms. | |
106 | + Norms are omitted for primitive (non-analyzed) types by default. | |
107 | + termVectors: [false] set to true to store the term vector for a | |
108 | + given field. | |
109 | + When using MoreLikeThis, fields used for similarity should be | |
110 | + stored for best performance. | |
111 | + termPositions: Store position information with the term vector. | |
112 | + This will increase storage costs. | |
113 | + termOffsets: Store offset information with the term vector. This | |
114 | + will increase storage costs. | |
115 | + required: The field is required. It will throw an error if the | |
116 | + value does not exist | |
117 | + default: a value that should be used if no value is specified | |
118 | + when adding a document. | |
119 | + --> | |
120 | + | |
121 | + <!-- field names should consist of alphanumeric or underscore characters only and | |
122 | + not start with a digit. This is not currently strictly enforced, | |
123 | + but other field names will not have first class support from all components | |
124 | + and back compatibility is not guaranteed. Names with both leading and | |
125 | + trailing underscores (e.g. _version_) are reserved. | |
126 | + --> | |
127 | + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> | |
128 | + | |
129 | + <!-- mairon | |
130 | + <field name="documento" type="string" indexed="true" stored="true" /> | |
131 | + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> | |
132 | + <field name="name" type="text_general" indexed="true" stored="true"/> | |
133 | + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> | |
134 | + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> | |
135 | + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
136 | + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> | |
137 | + | |
138 | + <field name="weight" type="float" indexed="true" stored="true"/> | |
139 | + <field name="price" type="float" indexed="true" stored="true"/> | |
140 | + <field name="popularity" type="int" indexed="true" stored="true" /> | |
141 | + <field name="inStock" type="boolean" indexed="true" stored="true" /> | |
142 | + | |
143 | + <field name="store" type="location" indexed="true" stored="true"/> | |
144 | + --> | |
145 | + | |
146 | + <!-- Common metadata fields, named specifically to match up with | |
147 | + SolrCell metadata when parsing rich documents such as Word, PDF. | |
148 | + Some fields are multiValued only because Tika currently may return | |
149 | + multiple values for them. Some metadata is parsed from the documents, | |
150 | + but there are some which come from the client context: | |
151 | + "content_type": From the HTTP headers of incoming stream | |
152 | + "resourcename": From SolrCell request param resource.name | |
153 | + --> | |
154 | + <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
155 | + <field name="subject" type="text_general" indexed="true" stored="true"/> | |
156 | + <field name="description" type="text_general" indexed="true" stored="true"/> | |
157 | + <field name="comments" type="text_general" indexed="true" stored="true"/> | |
158 | + <field name="author" type="text_general" indexed="true" stored="true"/> | |
159 | + <field name="keywords" type="text_general" indexed="true" stored="true"/> | |
160 | + <field name="category" type="text_general" indexed="true" stored="true"/> | |
161 | + <field name="resourcename" type="text_general" indexed="true" stored="true"/> | |
162 | + <field name="url" type="text_general" indexed="true" stored="true"/> | |
163 | + <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> | |
164 | + <field name="last_modified" type="date" indexed="true" stored="true"/> | |
165 | + <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> | |
166 | + | |
167 | + <!-- Main body of document extracted by SolrCell. | |
168 | + NOTE: This field is not indexed by default, since it is also copied to "text" | |
169 | + using copyField below. This is to save space. Use this field for returning and | |
170 | + highlighting document content. Use the "text" field to search the content. --> | |
171 | + <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/> | |
172 | + | |
173 | + | |
174 | + <!-- catchall field, containing all other searchable text fields (implemented | |
175 | + via copyField further on in this schema --> | |
176 | + <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> | |
177 | + | |
178 | + <!-- catchall text field that indexes tokens both normally and in reverse for efficient | |
179 | + leading wildcard queries. --> | |
180 | + <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> | |
181 | + | |
182 | + <!-- non-tokenized version of manufacturer to make it easier to sort or group | |
183 | + results by manufacturer. copied from "manu" via copyField --> | |
184 | + <field name="manu_exact" type="string" indexed="true" stored="false"/> | |
185 | + | |
186 | + <field name="payloads" type="payloads" indexed="true" stored="true"/> | |
187 | + | |
188 | + <field name="_version_" type="long" indexed="true" stored="true"/> | |
189 | + | |
190 | + <!-- Uncommenting the following will create a "timestamp" field using | |
191 | + a default value of "NOW" to indicate when each document was indexed. | |
192 | + --> | |
193 | + <!-- | |
194 | + <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/> | |
195 | + --> | |
196 | + | |
197 | + <!-- Dynamic field definitions allow using convention over configuration | |
198 | + for fields via the specification of patterns to match field names. | |
199 | + EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) | |
200 | + RESTRICTION: the glob-like pattern in the name attribute must have | |
201 | + a "*" only at the start or the end. --> | |
202 | + | |
203 | + <dynamicField name="*_i" type="int" indexed="true" stored="true"/> | |
204 | + <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/> | |
205 | + <dynamicField name="*_s" type="string" indexed="true" stored="true" /> | |
206 | + <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/> | |
207 | + <dynamicField name="*_l" type="long" indexed="true" stored="true"/> | |
208 | + <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/> | |
209 | + <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> | |
210 | + <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
211 | + <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/> | |
212 | + <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |
213 | + <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/> | |
214 | + <dynamicField name="*_f" type="float" indexed="true" stored="true"/> | |
215 | + <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/> | |
216 | + <dynamicField name="*_d" type="double" indexed="true" stored="true"/> | |
217 | + <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/> | |
218 | + | |
219 | + <!-- Type used to index the lat and lon components for the "location" FieldType --> | |
220 | + <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" /> | |
221 | + | |
222 | + <dynamicField name="*_dt" type="date" indexed="true" stored="true"/> | |
223 | + <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/> | |
224 | + <dynamicField name="*_p" type="location" indexed="true" stored="true"/> | |
225 | + | |
226 | + <!-- some trie-coded dynamic fields for faster range queries --> | |
227 | + <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/> | |
228 | + <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/> | |
229 | + <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/> | |
230 | + <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/> | |
231 | + <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/> | |
232 | + | |
233 | + <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/> | |
234 | + <dynamicField name="*_c" type="currency" indexed="true" stored="true"/> | |
235 | + | |
236 | + <dynamicField name="ignored_*" type="ignored" multiValued="true"/> | |
237 | + <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
238 | + | |
239 | + <dynamicField name="random_*" type="random" /> | |
240 | + | |
241 | + <!-- uncomment the following to ignore any fields that don't already match an existing | |
242 | + field name or dynamic field, rather than reporting them as an error. | |
243 | + alternately, change the type="ignored" to some other type e.g. "text" if you want | |
244 | + unknown fields indexed and/or stored by default --> | |
245 | + <!--dynamicField name="*" type="ignored" multiValued="true" /--> | |
246 | + | |
247 | + </fields> | |
248 | + | |
249 | + | |
250 | + <!-- Field to use to determine and enforce document uniqueness. | |
251 | + Unless this field is marked with required="false", it will be a required field | |
252 | + --> | |
253 | + <uniqueKey>id</uniqueKey> | |
254 | + | |
255 | + <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when | |
256 | + parsing a query string that isn't explicit about the field. Machine (non-user) | |
257 | + generated queries are best made explicit, or they can use the "df" request parameter | |
258 | + which takes precedence over this. | |
259 | + Note: Un-commenting defaultSearchField will be insufficient if your request handler | |
260 | + in solrconfig.xml defines "df", which takes precedence. That would need to be removed. | |
261 | + <defaultSearchField>text</defaultSearchField> --> | |
262 | + | |
263 | + <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers | |
264 | + when parsing a query string to determine if a clause of the query should be marked as | |
265 | + required or optional, assuming the clause isn't already marked by some operator. | |
266 | + The default is OR, which is generally assumed so it is not a good idea to change it | |
267 | + globally here. The "q.op" request parameter takes precedence over this. | |
268 | + <solrQueryParser defaultOperator="OR"/> --> | |
269 | + | |
270 | + <!-- copyField commands copy one field to another at the time a document | |
271 | + is added to the index. It's used either to index the same field differently, | |
272 | + or to add multiple fields to the same field for easier/faster searching. --> | |
273 | + | |
274 | + <!-- mairon | |
275 | + <copyField source="cat" dest="text"/> | |
276 | + <copyField source="name" dest="text"/> | |
277 | + <copyField source="manu" dest="text"/> | |
278 | + <copyField source="features" dest="text"/> | |
279 | + <copyField source="includes" dest="text"/> | |
280 | + <copyField source="manu" dest="manu_exact"/> | |
281 | + <copyField source="price" dest="price_c"/> | |
282 | + --> | |
283 | + | |
284 | + <!-- Text fields from SolrCell to search by default in our catch-all field --> | |
285 | + <copyField source="title" dest="text"/> | |
286 | + <copyField source="author" dest="text"/> | |
287 | + <copyField source="description" dest="text"/> | |
288 | + <copyField source="keywords" dest="text"/> | |
289 | + <copyField source="content" dest="text"/> | |
290 | + <copyField source="content_type" dest="text"/> | |
291 | + <copyField source="resourcename" dest="text"/> | |
292 | + <copyField source="url" dest="text"/> | |
293 | + | |
294 | + <!-- Create a string version of author for faceting --> | |
295 | + <copyField source="author" dest="author_s"/> | |
296 | + | |
297 | + <!-- Above, multiple source fields are copied to the [text] field. | |
298 | + Another way to map multiple source fields to the same | |
299 | + destination field is to use the dynamic field syntax. | |
300 | + copyField also supports a maxChars to copy setting. --> | |
301 | + | |
302 | + <!-- <copyField source="*_t" dest="text" maxChars="3000"/> --> | |
303 | + | |
304 | + <!-- copy name to alphaNameSort, a field designed for sorting by name --> | |
305 | + <!-- <copyField source="name" dest="alphaNameSort"/> --> | |
306 | + | |
307 | + <types> | |
308 | + <!-- field type definitions. The "name" attribute is | |
309 | + just a label to be used by field definitions. The "class" | |
310 | + attribute and any other attributes determine the real | |
311 | + behavior of the fieldType. | |
312 | + Class names starting with "solr" refer to java classes in a | |
313 | + standard package such as org.apache.solr.analysis | |
314 | + --> | |
315 | + | |
316 | + <fieldType name="date" class="solr.DateField" omitNorms="true"/> | |
317 | + | |
318 | + <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> | |
319 | + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> | |
320 | + | |
321 | + <!-- boolean type: "true" or "false" --> | |
322 | + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |
323 | + | |
324 | + <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are | |
325 | + currently supported on types that are sorted internally as strings | |
326 | + and on numeric types. | |
327 | + This includes "string","boolean", and, as of 3.5 (and 4.x), | |
328 | + int, float, long, date, double, including the "Trie" variants. | |
329 | + - If sortMissingLast="true", then a sort on this field will cause documents | |
330 | + without the field to come after documents with the field, | |
331 | + regardless of the requested sort order (asc or desc). | |
332 | + - If sortMissingFirst="true", then a sort on this field will cause documents | |
333 | + without the field to come before documents with the field, | |
334 | + regardless of the requested sort order. | |
335 | + - If sortMissingLast="false" and sortMissingFirst="false" (the default), | |
336 | + then default lucene sorting will be used which places docs without the | |
337 | + field first in an ascending sort and last in a descending sort. | |
338 | + --> | |
339 | + | |
340 | + <!-- | |
341 | + Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. | |
342 | + --> | |
343 | + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> | |
344 | + <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> | |
345 | + <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> | |
346 | + <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> | |
347 | + | |
348 | + <!-- | |
349 | + Numeric field types that index each value at various levels of precision | |
350 | + to accelerate range queries when the number of values between the range | |
351 | + endpoints is large. See the javadoc for NumericRangeQuery for internal | |
352 | + implementation details. | |
353 | + | |
354 | + Smaller precisionStep values (specified in bits) will lead to more tokens | |
355 | + indexed per value, slightly larger index size, and faster range queries. | |
356 | + A precisionStep of 0 disables indexing at different precision levels. | |
357 | + --> | |
358 | + <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> | |
359 | + <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> | |
360 | + <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> | |
361 | + <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> | |
362 | + | |
363 | + <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and | |
364 | + is a more restricted form of the canonical representation of dateTime | |
365 | + http://www.w3.org/TR/xmlschema-2/#dateTime | |
366 | + The trailing "Z" designates UTC time and is mandatory. | |
367 | + Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z | |
368 | + All other components are mandatory. | |
369 | + | |
370 | + Expressions can also be used to denote calculations that should be | |
371 | + performed relative to "NOW" to determine the value, ie... | |
372 | + | |
373 | + NOW/HOUR | |
374 | + ... Round to the start of the current hour | |
375 | + NOW-1DAY | |
376 | + ... Exactly 1 day prior to now | |
377 | + NOW/DAY+6MONTHS+3DAYS | |
378 | + ... 6 months and 3 days in the future from the start of | |
379 | + the current day | |
380 | + | |
381 | + Consult the DateField javadocs for more information. | |
382 | + | |
383 | + Note: For faster range queries, consider the tdate type | |
384 | + --> | |
385 | +<!-- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> | |
386 | + | |
387 | + A Trie based date field for faster date range queries and date faceting. --> | |
388 | + <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> | |
389 | + | |
390 | + | |
391 | + <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> | |
392 | + <fieldtype name="binary" class="solr.BinaryField"/> | |
393 | + | |
394 | + <!-- | |
395 | + Note: | |
396 | + These should only be used for compatibility with existing indexes (created with lucene or older Solr versions). | |
397 | + Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last | |
398 | + | |
399 | + Plain numeric field types that store and index the text | |
400 | + value verbatim (and hence don't correctly support range queries, since the | |
401 | + lexicographic ordering isn't equal to the numeric ordering) | |
402 | + --> | |
403 | + <fieldType name="pint" class="solr.IntField"/> | |
404 | + <fieldType name="plong" class="solr.LongField"/> | |
405 | + <fieldType name="pfloat" class="solr.FloatField"/> | |
406 | + <fieldType name="pdouble" class="solr.DoubleField"/> | |
407 | + <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/> | |
408 | + | |
409 | + <!-- The "RandomSortField" is not used to store or search any | |
410 | + data. You can declare fields of this type it in your schema | |
411 | + to generate pseudo-random orderings of your docs for sorting | |
412 | + or function purposes. The ordering is generated based on the field | |
413 | + name and the version of the index. As long as the index version | |
414 | + remains unchanged, and the same field name is reused, | |
415 | + the ordering of the docs will be consistent. | |
416 | + If you want different psuedo-random orderings of documents, | |
417 | + for the same version of the index, use a dynamicField and | |
418 | + change the field name in the request. | |
419 | + --> | |
420 | + <fieldType name="random" class="solr.RandomSortField" indexed="true" /> | |
421 | + | |
422 | + <!-- solr.TextField allows the specification of custom text analyzers | |
423 | + specified as a tokenizer and a list of token filters. Different | |
424 | + analyzers may be specified for indexing and querying. | |
425 | + | |
426 | + The optional positionIncrementGap puts space between multiple fields of | |
427 | + this type on the same document, with the purpose of preventing false phrase | |
428 | + matching across fields. | |
429 | + | |
430 | + For more info on customizing your analyzer chain, please see | |
431 | + http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters | |
432 | + --> | |
433 | + | |
434 | + <!-- One can also specify an existing Analyzer class that has a | |
435 | + default constructor via the class attribute on the analyzer element. | |
436 | + Example: | |
437 | + <fieldType name="text_greek" class="solr.TextField"> | |
438 | + <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> | |
439 | + </fieldType> | |
440 | + --> | |
441 | + | |
442 | + <!-- A text field that only splits on whitespace for exact matching of words --> | |
443 | + <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |
444 | + <analyzer> | |
445 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
446 | + </analyzer> | |
447 | + </fieldType> | |
448 | + | |
449 | + <!-- A general text field that has reasonable, generic | |
450 | + cross-language defaults: it tokenizes with StandardTokenizer, | |
451 | + removes stop words from case-insensitive "stopwords.txt" | |
452 | + (empty by default), and down cases. At query time only, it | |
453 | + also applies synonyms. --> | |
454 | + <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> | |
455 | + <analyzer type="index"> | |
456 | + | |
457 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
458 | + | |
459 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" /> | |
460 | + <!-- in this example, we will only use synonyms at query time | |
461 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
462 | + --> | |
463 | + <filter class="solr.LowerCaseFilterFactory"/> | |
464 | + | |
465 | + <!-- mairon --> | |
466 | + <filter class="solr.ASCIIFoldingFilterFactory"/> | |
467 | + <!-- mairon --> | |
468 | + | |
469 | + | |
470 | + </analyzer> | |
471 | + | |
472 | + <analyzer type="query"> | |
473 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
474 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" /> | |
475 | + <filter class="solr.LowerCaseFilterFactory"/> | |
476 | + | |
477 | + <!-- mairon --> | |
478 | + <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> --> | |
479 | + <!-- <filter class="solr.ASCIIFoldingFilterFactory"/> --> | |
480 | + <!-- <filter class="solr.BrazilianStemFilterFactory"/> --> | |
481 | + <!-- mairon --> | |
482 | + | |
483 | + </analyzer> | |
484 | + | |
485 | + </fieldType> | |
486 | + | |
487 | + <!-- A text field with defaults appropriate for English: it | |
488 | + tokenizes with StandardTokenizer, removes English stop words | |
489 | + (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and | |
490 | + finally applies Porter's stemming. The query time analyzer | |
491 | + also applies synonyms from synonyms.txt. --> | |
492 | + <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |
493 | + <analyzer type="index"> | |
494 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
495 | + <!-- in this example, we will only use synonyms at query time | |
496 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
497 | + --> | |
498 | + <!-- Case insensitive stop word removal. | |
499 | + add enablePositionIncrements=true in both the index and query | |
500 | + analyzers to leave a 'gap' for more accurate phrase queries. | |
501 | + --> | |
502 | + <filter class="solr.StopFilterFactory" | |
503 | + ignoreCase="true" | |
504 | + words="lang/stopwords_en.txt" | |
505 | + enablePositionIncrements="true" | |
506 | + /> | |
507 | + <filter class="solr.LowerCaseFilterFactory"/> | |
508 | + <filter class="solr.EnglishPossessiveFilterFactory"/> | |
509 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
510 | + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | |
511 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
512 | + --> | |
513 | + <filter class="solr.PorterStemFilterFactory"/> | |
514 | + </analyzer> | |
515 | + <analyzer type="query"> | |
516 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
517 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
518 | + <filter class="solr.StopFilterFactory" | |
519 | + ignoreCase="true" | |
520 | + words="lang/stopwords_en.txt" | |
521 | + enablePositionIncrements="true" | |
522 | + /> | |
523 | + <filter class="solr.LowerCaseFilterFactory"/> | |
524 | + <filter class="solr.EnglishPossessiveFilterFactory"/> | |
525 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
526 | + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | |
527 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
528 | + --> | |
529 | + <filter class="solr.PorterStemFilterFactory"/> | |
530 | + </analyzer> | |
531 | + </fieldType> | |
532 | + | |
533 | + <!-- A text field with defaults appropriate for English, plus | |
534 | + aggressive word-splitting and autophrase features enabled. | |
535 | + This field is just like text_en, except it adds | |
536 | + WordDelimiterFilter to enable splitting and matching of | |
537 | + words on case-change, alpha numeric boundaries, and | |
538 | + non-alphanumeric chars. This means certain compound word | |
539 | + cases will work, for example query "wi fi" will match | |
540 | + document "WiFi" or "wi-fi". | |
541 | + --> | |
542 | + <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
543 | + <analyzer type="index"> | |
544 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
545 | + <!-- in this example, we will only use synonyms at query time | |
546 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
547 | + --> | |
548 | + <!-- Case insensitive stop word removal. | |
549 | + add enablePositionIncrements=true in both the index and query | |
550 | + analyzers to leave a 'gap' for more accurate phrase queries. | |
551 | + --> | |
552 | + <filter class="solr.StopFilterFactory" | |
553 | + ignoreCase="true" | |
554 | + words="lang/stopwords_en.txt" | |
555 | + enablePositionIncrements="true" | |
556 | + /> | |
557 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> | |
558 | + <filter class="solr.LowerCaseFilterFactory"/> | |
559 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
560 | + <filter class="solr.PorterStemFilterFactory"/> | |
561 | + | |
562 | + | |
563 | + </analyzer> | |
564 | + <analyzer type="query"> | |
565 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
566 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
567 | + <filter class="solr.StopFilterFactory" | |
568 | + ignoreCase="true" | |
569 | + words="lang/stopwords_en.txt" | |
570 | + enablePositionIncrements="true" | |
571 | + /> | |
572 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | |
573 | + <filter class="solr.LowerCaseFilterFactory"/> | |
574 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
575 | + <filter class="solr.PorterStemFilterFactory"/> | |
576 | + </analyzer> | |
577 | + </fieldType> | |
578 | + | |
579 | + <!-- Less flexible matching, but less false matches. Probably not ideal for product names, | |
580 | + but may be good for SKUs. Can insert dashes in the wrong place and still match. --> | |
581 | + <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
582 | + <analyzer> | |
583 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
584 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | |
585 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> | |
586 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | |
587 | + <filter class="solr.LowerCaseFilterFactory"/> | |
588 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
589 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
590 | + <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes | |
591 | + possible with WordDelimiterFilter in conjuncton with stemming. --> | |
592 | + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
593 | + </analyzer> | |
594 | + </fieldType> | |
595 | + | |
596 | + <!-- Just like text_general except it reverses the characters of | |
597 | + each token, to enable more efficient leading wildcard queries. --> | |
598 | + <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |
599 | + <analyzer type="index"> | |
600 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
601 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
602 | + <filter class="solr.LowerCaseFilterFactory"/> | |
603 | + <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" | |
604 | + maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> | |
605 | + </analyzer> | |
606 | + <analyzer type="query"> | |
607 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
608 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
609 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
610 | + <filter class="solr.LowerCaseFilterFactory"/> | |
611 | + </analyzer> | |
612 | + </fieldType> | |
613 | + | |
614 | + <!-- charFilter + WhitespaceTokenizer --> | |
615 | + <!-- | |
616 | + <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > | |
617 | + <analyzer> | |
618 | + <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
619 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
620 | + </analyzer> | |
621 | + </fieldType> | |
622 | + --> | |
623 | + | |
624 | + <!-- This is an example of using the KeywordTokenizer along | |
625 | + With various TokenFilterFactories to produce a sortable field | |
626 | + that does not include some properties of the source text | |
627 | + --> | |
628 | + <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
629 | + <analyzer> | |
630 | + <!-- KeywordTokenizer does no actual tokenizing, so the entire | |
631 | + input string is preserved as a single token | |
632 | + --> | |
633 | + <tokenizer class="solr.KeywordTokenizerFactory"/> | |
634 | + <!-- The LowerCase TokenFilter does what you expect, which can be | |
635 | + when you want your sorting to be case insensitive | |
636 | + --> | |
637 | + <filter class="solr.LowerCaseFilterFactory" /> | |
638 | + <!-- The TrimFilter removes any leading or trailing whitespace --> | |
639 | + <filter class="solr.TrimFilterFactory" /> | |
640 | + <!-- The PatternReplaceFilter gives you the flexibility to use | |
641 | + Java Regular expression to replace any sequence of characters | |
642 | + matching a pattern with an arbitrary replacement string, | |
643 | + which may include back references to portions of the original | |
644 | + string matched by the pattern. | |
645 | + | |
646 | + See the Java Regular Expression documentation for more | |
647 | + information on pattern and replacement string syntax. | |
648 | + | |
649 | + http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html | |
650 | + --> | |
651 | + <filter class="solr.PatternReplaceFilterFactory" | |
652 | + pattern="([^a-z])" replacement="" replace="all" | |
653 | + /> | |
654 | + </analyzer> | |
655 | + </fieldType> | |
656 | + | |
657 | + <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" > | |
658 | + <analyzer> | |
659 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
660 | + <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |
661 | + </analyzer> | |
662 | + </fieldtype> | |
663 | + | |
664 | + <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" > | |
665 | + <analyzer> | |
666 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
667 | + <!-- | |
668 | + The DelimitedPayloadTokenFilter can put payloads on tokens... for example, | |
669 | + a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f | |
670 | + Attributes of the DelimitedPayloadTokenFilterFactory : | |
671 | + "delimiter" - a one character delimiter. Default is | (pipe) | |
672 | + "encoder" - how to encode the following value into a playload | |
673 | + float -> org.apache.lucene.analysis.payloads.FloatEncoder, | |
674 | + integer -> o.a.l.a.p.IntegerEncoder | |
675 | + identity -> o.a.l.a.p.IdentityEncoder | |
676 | + Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. | |
677 | + --> | |
678 | + <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> | |
679 | + </analyzer> | |
680 | + </fieldtype> | |
681 | + | |
682 | + <!-- lowercases the entire field value, keeping it as a single token. --> | |
683 | + <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |
684 | + <analyzer> | |
685 | + <tokenizer class="solr.KeywordTokenizerFactory"/> | |
686 | + <filter class="solr.LowerCaseFilterFactory" /> | |
687 | + </analyzer> | |
688 | + </fieldType> | |
689 | + | |
690 | + <!-- | |
691 | + Example of using PathHierarchyTokenizerFactory at index time, so | |
692 | + queries for paths match documents at that path, or in descendent paths | |
693 | + --> | |
694 | + <fieldType name="descendent_path" class="solr.TextField"> | |
695 | + <analyzer type="index"> | |
696 | + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
697 | + </analyzer> | |
698 | + <analyzer type="query"> | |
699 | + <tokenizer class="solr.KeywordTokenizerFactory" /> | |
700 | + </analyzer> | |
701 | + </fieldType> | |
702 | + <!-- | |
703 | + Example of using PathHierarchyTokenizerFactory at query time, so | |
704 | + queries for paths match documents at that path, or in ancestor paths | |
705 | + --> | |
706 | + <fieldType name="ancestor_path" class="solr.TextField"> | |
707 | + <analyzer type="index"> | |
708 | + <tokenizer class="solr.KeywordTokenizerFactory" /> | |
709 | + </analyzer> | |
710 | + <analyzer type="query"> | |
711 | + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
712 | + </analyzer> | |
713 | + </fieldType> | |
714 | + | |
715 | + <!-- since fields of this type are by default not stored or indexed, | |
716 | + any data added to them will be ignored outright. --> | |
717 | + <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> | |
718 | + | |
719 | + <!-- This point type indexes the coordinates as separate fields (subFields) | |
720 | + If subFieldType is defined, it references a type, and a dynamic field | |
721 | + definition is created matching *___<typename>. Alternately, if | |
722 | + subFieldSuffix is defined, that is used to create the subFields. | |
723 | + Example: if subFieldType="double", then the coordinates would be | |
724 | + indexed in fields myloc_0___double,myloc_1___double. | |
725 | + Example: if subFieldSuffix="_d" then the coordinates would be indexed | |
726 | + in fields myloc_0_d,myloc_1_d | |
727 | + The subFields are an implementation detail of the fieldType, and end | |
728 | + users normally should not need to know about them. | |
729 | + --> | |
730 | + <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> | |
731 | + | |
732 | + <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. --> | |
733 | + <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |
734 | + | |
735 | + <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes. | |
736 | + For more information about this and other Spatial fields new to Solr 4, see: | |
737 | + http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 | |
738 | + --> | |
739 | + <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" | |
740 | + geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" /> | |
741 | + | |
742 | + <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType | |
743 | + Parameters: | |
744 | + defaultCurrency: Specifies the default currency if none specified. Defaults to "USD" | |
745 | + precisionStep: Specifies the precisionStep for the TrieLong field used for the amount | |
746 | + providerClass: Lets you plug in other exchange provider backend: | |
747 | + solr.FileExchangeRateProvider is the default and takes one parameter: | |
748 | + currencyConfig: name of an xml file holding exchange rates | |
749 | + solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org: | |
750 | + ratesFileLocation: URL or path to rates JSON file (default latest.json on the web) | |
751 | + refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60) | |
752 | + --> | |
753 | + <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" /> | |
754 | + | |
755 | + | |
756 | + | |
757 | + <!-- some examples for different languages (generally ordered by ISO code) --> | |
758 | + | |
759 | + <!-- Arabic --> | |
760 | + <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> | |
761 | + <analyzer> | |
762 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
763 | + <!-- for any non-arabic --> | |
764 | + <filter class="solr.LowerCaseFilterFactory"/> | |
765 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/> | |
766 | + <!-- normalizes ﻯ to ﻱ, etc --> | |
767 | + <filter class="solr.ArabicNormalizationFilterFactory"/> | |
768 | + <filter class="solr.ArabicStemFilterFactory"/> | |
769 | + </analyzer> | |
770 | + </fieldType> | |
771 | + | |
772 | + <!-- Bulgarian --> | |
773 | + <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> | |
774 | + <analyzer> | |
775 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
776 | + <filter class="solr.LowerCaseFilterFactory"/> | |
777 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/> | |
778 | + <filter class="solr.BulgarianStemFilterFactory"/> | |
779 | + </analyzer> | |
780 | + </fieldType> | |
781 | + | |
782 | + <!-- Catalan --> | |
783 | + <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> | |
784 | + <analyzer> | |
785 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
786 | + <!-- removes l', etc --> | |
787 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> | |
788 | + <filter class="solr.LowerCaseFilterFactory"/> | |
789 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/> | |
790 | + <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> | |
791 | + </analyzer> | |
792 | + </fieldType> | |
793 | + | |
794 | + <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> | |
795 | + <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> | |
796 | + <analyzer> | |
797 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
798 | + <!-- normalize width before bigram, as e.g. half-width dakuten combine --> | |
799 | + <filter class="solr.CJKWidthFilterFactory"/> | |
800 | + <!-- for any non-CJK --> | |
801 | + <filter class="solr.LowerCaseFilterFactory"/> | |
802 | + <filter class="solr.CJKBigramFilterFactory"/> | |
803 | + </analyzer> | |
804 | + </fieldType> | |
805 | + | |
806 | + <!-- Czech --> | |
807 | + <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> | |
808 | + <analyzer> | |
809 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
810 | + <filter class="solr.LowerCaseFilterFactory"/> | |
811 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/> | |
812 | + <filter class="solr.CzechStemFilterFactory"/> | |
813 | + </analyzer> | |
814 | + </fieldType> | |
815 | + | |
816 | + <!-- Danish --> | |
817 | + <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> | |
818 | + <analyzer> | |
819 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
820 | + <filter class="solr.LowerCaseFilterFactory"/> | |
821 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/> | |
822 | + <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> | |
823 | + </analyzer> | |
824 | + </fieldType> | |
825 | + | |
826 | + <!-- German --> | |
827 | + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> | |
828 | + <analyzer> | |
829 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
830 | + <filter class="solr.LowerCaseFilterFactory"/> | |
831 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/> | |
832 | + <filter class="solr.GermanNormalizationFilterFactory"/> | |
833 | + <filter class="solr.GermanLightStemFilterFactory"/> | |
834 | + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> | |
835 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> | |
836 | + </analyzer> | |
837 | + </fieldType> | |
838 | + | |
839 | + <!-- Greek --> | |
840 | + <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> | |
841 | + <analyzer> | |
842 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
843 | + <!-- greek specific lowercase for sigma --> | |
844 | + <filter class="solr.GreekLowerCaseFilterFactory"/> | |
845 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/> | |
846 | + <filter class="solr.GreekStemFilterFactory"/> | |
847 | + </analyzer> | |
848 | + </fieldType> | |
849 | + | |
850 | + <!-- Spanish --> | |
851 | + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> | |
852 | + <analyzer> | |
853 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
854 | + <filter class="solr.LowerCaseFilterFactory"/> | |
855 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/> | |
856 | + <filter class="solr.SpanishLightStemFilterFactory"/> | |
857 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> | |
858 | + </analyzer> | |
859 | + </fieldType> | |
860 | + | |
861 | + <!-- Basque --> | |
862 | + <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> | |
863 | + <analyzer> | |
864 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
865 | + <filter class="solr.LowerCaseFilterFactory"/> | |
866 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/> | |
867 | + <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> | |
868 | + </analyzer> | |
869 | + </fieldType> | |
870 | + | |
871 | + <!-- Persian --> | |
872 | + <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> | |
873 | + <analyzer> | |
874 | + <!-- for ZWNJ --> | |
875 | + <charFilter class="solr.PersianCharFilterFactory"/> | |
876 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
877 | + <filter class="solr.LowerCaseFilterFactory"/> | |
878 | + <filter class="solr.ArabicNormalizationFilterFactory"/> | |
879 | + <filter class="solr.PersianNormalizationFilterFactory"/> | |
880 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/> | |
881 | + </analyzer> | |
882 | + </fieldType> | |
883 | + | |
884 | + <!-- Finnish --> | |
885 | + <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> | |
886 | + <analyzer> | |
887 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
888 | + <filter class="solr.LowerCaseFilterFactory"/> | |
889 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/> | |
890 | + <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> | |
891 | + <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> | |
892 | + </analyzer> | |
893 | + </fieldType> | |
894 | + | |
895 | + <!-- French --> | |
896 | + <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> | |
897 | + <analyzer> | |
898 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
899 | + <!-- removes l', etc --> | |
900 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> | |
901 | + <filter class="solr.LowerCaseFilterFactory"/> | |
902 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/> | |
903 | + <filter class="solr.FrenchLightStemFilterFactory"/> | |
904 | + <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> | |
905 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> | |
906 | + </analyzer> | |
907 | + </fieldType> | |
908 | + | |
909 | + <!-- Irish --> | |
910 | + <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> | |
911 | + <analyzer> | |
912 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
913 | + <!-- removes d', etc --> | |
914 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> | |
915 | + <!-- removes n-, etc. position increments is intentionally false! --> | |
916 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/> | |
917 | + <filter class="solr.IrishLowerCaseFilterFactory"/> | |
918 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/> | |
919 | + <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> | |
920 | + </analyzer> | |
921 | + </fieldType> | |
922 | + | |
923 | + <!-- Galician --> | |
924 | + <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> | |
925 | + <analyzer> | |
926 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
927 | + <filter class="solr.LowerCaseFilterFactory"/> | |
928 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/> | |
929 | + <filter class="solr.GalicianStemFilterFactory"/> | |
930 | + <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> | |
931 | + </analyzer> | |
932 | + </fieldType> | |
933 | + | |
934 | + <!-- Hindi --> | |
935 | + <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> | |
936 | + <analyzer> | |
937 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
938 | + <filter class="solr.LowerCaseFilterFactory"/> | |
939 | + <!-- normalizes unicode representation --> | |
940 | + <filter class="solr.IndicNormalizationFilterFactory"/> | |
941 | + <!-- normalizes variation in spelling --> | |
942 | + <filter class="solr.HindiNormalizationFilterFactory"/> | |
943 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/> | |
944 | + <filter class="solr.HindiStemFilterFactory"/> | |
945 | + </analyzer> | |
946 | + </fieldType> | |
947 | + | |
948 | + <!-- Hungarian --> | |
949 | + <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> | |
950 | + <analyzer> | |
951 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
952 | + <filter class="solr.LowerCaseFilterFactory"/> | |
953 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/> | |
954 | + <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> | |
955 | + <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> | |
956 | + </analyzer> | |
957 | + </fieldType> | |
958 | + | |
959 | + <!-- Armenian --> | |
960 | + <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> | |
961 | + <analyzer> | |
962 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
963 | + <filter class="solr.LowerCaseFilterFactory"/> | |
964 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/> | |
965 | + <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> | |
966 | + </analyzer> | |
967 | + </fieldType> | |
968 | + | |
969 | + <!-- Indonesian --> | |
970 | + <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> | |
971 | + <analyzer> | |
972 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
973 | + <filter class="solr.LowerCaseFilterFactory"/> | |
974 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/> | |
975 | + <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> | |
976 | + <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> | |
977 | + </analyzer> | |
978 | + </fieldType> | |
979 | + | |
980 | + <!-- Italian --> | |
981 | + <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> | |
982 | + <analyzer> | |
983 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
984 | + <!-- removes l', etc --> | |
985 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> | |
986 | + <filter class="solr.LowerCaseFilterFactory"/> | |
987 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/> | |
988 | + <filter class="solr.ItalianLightStemFilterFactory"/> | |
989 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> | |
990 | + </analyzer> | |
991 | + </fieldType> | |
992 | + | |
993 | + <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming) | |
994 | + | |
995 | + NOTE: If you want to optimize search for precision, use default operator AND in your query | |
996 | + parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use | |
997 | + OR if you would like to optimize for recall (default). | |
998 | + --> | |
999 | + <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> | |
1000 | + <analyzer> | |
1001 | + <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer) | |
1002 | + | |
1003 | + Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic | |
1004 | + is used to segment compounds into its parts and the compound itself is kept as synonym. | |
1005 | + | |
1006 | + Valid values for attribute mode are: | |
1007 | + normal: regular segmentation | |
1008 | + search: segmentation useful for search with synonyms compounds (default) | |
1009 | + extended: same as search mode, but unigrams unknown words (experimental) | |
1010 | + | |
1011 | + For some applications it might be good to use search mode for indexing and normal mode for | |
1012 | + queries to reduce recall and prevent parts of compounds from being matched and highlighted. | |
1013 | + Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query. | |
1014 | + | |
1015 | + Kuromoji also has a convenient user dictionary feature that allows overriding the statistical | |
1016 | + model with your own entries for segmentation, part-of-speech tags and readings without a need | |
1017 | + to specify weights. Notice that user dictionaries have not been subject to extensive testing. | |
1018 | + | |
1019 | + User dictionary attributes are: | |
1020 | + userDictionary: user dictionary filename | |
1021 | + userDictionaryEncoding: user dictionary encoding (default is UTF-8) | |
1022 | + | |
1023 | + See lang/userdict_ja.txt for a sample user dictionary file. | |
1024 | + | |
1025 | + Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them. | |
1026 | + | |
1027 | + See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. | |
1028 | + --> | |
1029 | + <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> | |
1030 | + <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> | |
1031 | + <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> | |
1032 | + <filter class="solr.JapaneseBaseFormFilterFactory"/> | |
1033 | + <!-- Removes tokens with certain part-of-speech tags --> | |
1034 | + <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/> | |
1035 | + <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> | |
1036 | + <filter class="solr.CJKWidthFilterFactory"/> | |
1037 | + <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> | |
1038 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" /> | |
1039 | + <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> | |
1040 | + <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> | |
1041 | + <!-- Lower-cases romaji characters --> | |
1042 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1043 | + </analyzer> | |
1044 | + </fieldType> | |
1045 | + | |
1046 | + <!-- Latvian --> | |
1047 | + <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> | |
1048 | + <analyzer> | |
1049 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1050 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1051 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/> | |
1052 | + <filter class="solr.LatvianStemFilterFactory"/> | |
1053 | + </analyzer> | |
1054 | + </fieldType> | |
1055 | + | |
1056 | + <!-- Dutch --> | |
1057 | + <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> | |
1058 | + <analyzer> | |
1059 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1060 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1061 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/> | |
1062 | + <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> | |
1063 | + <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> | |
1064 | + </analyzer> | |
1065 | + </fieldType> | |
1066 | + | |
1067 | + <!-- Norwegian --> | |
1068 | + <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> | |
1069 | + <analyzer> | |
1070 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1071 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1072 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/> | |
1073 | + <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> | |
1074 | + <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> | |
1075 | + <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> | |
1076 | + </analyzer> | |
1077 | + </fieldType> | |
1078 | + | |
1079 | + <!-- Portuguese --> | |
1080 | + <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | |
1081 | + <analyzer> | |
1082 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1083 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1084 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/> | |
1085 | + | |
1086 | + <!-- mairon --> | |
1087 | + <!-- <filter class="solr.PortugueseLightStemFilterFactory"/> --> | |
1088 | + <filter class="solr.BrazilianStemFilterFactory"/> | |
1089 | + <filter class="solr.ASCIIFoldingFilterFactory"/> | |
1090 | + <!-- mairon --> | |
1091 | + | |
1092 | + <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> | |
1093 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> | |
1094 | + <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> | |
1095 | + </analyzer> | |
1096 | + </fieldType> | |
1097 | + | |
1098 | + <!-- Romanian --> | |
1099 | + <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> | |
1100 | + <analyzer> | |
1101 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1102 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1103 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/> | |
1104 | + <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> | |
1105 | + </analyzer> | |
1106 | + </fieldType> | |
1107 | + | |
1108 | + <!-- Russian --> | |
1109 | + <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> | |
1110 | + <analyzer> | |
1111 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1112 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1113 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/> | |
1114 | + <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> | |
1115 | + <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> | |
1116 | + </analyzer> | |
1117 | + </fieldType> | |
1118 | + | |
1119 | + <!-- Swedish --> | |
1120 | + <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |
1121 | + <analyzer> | |
1122 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1123 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1124 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/> | |
1125 | + <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |
1126 | + <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> | |
1127 | + </analyzer> | |
1128 | + </fieldType> | |
1129 | + | |
1130 | + <!-- Thai --> | |
1131 | + <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> | |
1132 | + <analyzer> | |
1133 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1134 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1135 | + <filter class="solr.ThaiWordFilterFactory"/> | |
1136 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/> | |
1137 | + </analyzer> | |
1138 | + </fieldType> | |
1139 | + | |
1140 | + <!-- Turkish --> | |
1141 | + <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> | |
1142 | + <analyzer> | |
1143 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1144 | + <filter class="solr.TurkishLowerCaseFilterFactory"/> | |
1145 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/> | |
1146 | + <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> | |
1147 | + </analyzer> | |
1148 | + </fieldType> | |
1149 | + | |
1150 | + </types> | |
1151 | + | |
1152 | + <!-- Similarity is the scoring routine for each document vs. a query. | |
1153 | + A custom Similarity or SimilarityFactory may be specified here, but | |
1154 | + the default is fine for most applications. | |
1155 | + For more info: http://wiki.apache.org/solr/SchemaXml#Similarity | |
1156 | + --> | |
1157 | + <!-- | |
1158 | + <similarity class="com.example.solr.CustomSimilarityFactory"> | |
1159 | + <str name="paramkey">param value</str> | |
1160 | + </similarity> | |
1161 | + --> | |
1162 | + | |
1163 | +</schema> | |
0 | 1164 | \ No newline at end of file | ... | ... |
1 | +++ a/index/sei-publicacoes-config.xml | |
... | ... | @@ -0,0 +1,1773 @@ |
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<!-- | |
3 | + Licensed to the Apache Software Foundation (ASF) under one or more | |
4 | + contributor license agreements. See the NOTICE file distributed with | |
5 | + this work for additional information regarding copyright ownership. | |
6 | + The ASF licenses this file to You under the Apache License, Version 2.0 | |
7 | + (the "License"); you may not use this file except in compliance with | |
8 | + the License. You may obtain a copy of the License at | |
9 | + | |
10 | + http://www.apache.org/licenses/LICENSE-2.0 | |
11 | + | |
12 | + Unless required by applicable law or agreed to in writing, software | |
13 | + distributed under the License is distributed on an "AS IS" BASIS, | |
14 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | + See the License for the specific language governing permissions and | |
16 | + limitations under the License. | |
17 | +--> | |
18 | + | |
19 | +<!-- | |
20 | + For more details about configurations options that may appear in | |
21 | + this file, see http://wiki.apache.org/solr/SolrConfigXml. | |
22 | +--> | |
23 | +<config> | |
24 | + <!-- In all configuration below, a prefix of "solr." for class names | |
25 | + is an alias that causes solr to search appropriate packages, | |
26 | + including org.apache.solr.(search|update|request|core|analysis) | |
27 | + | |
28 | + You may also specify a fully qualified Java classname if you | |
29 | + have your own custom plugins. | |
30 | + --> | |
31 | + | |
32 | + <!-- Controls what version of Lucene various components of Solr | |
33 | + adhere to. Generally, you want to use the latest version to | |
34 | + get all bug fixes and improvements. It is highly recommended | |
35 | + that you fully re-index after changing this setting as it can | |
36 | + affect both how text is indexed and queried. | |
37 | + --> | |
38 | + <luceneMatchVersion>LUCENE_40</luceneMatchVersion> | |
39 | + | |
40 | + <!-- <lib/> directives can be used to instruct Solr to load an Jars | |
41 | + identified and use them to resolve any "plugins" specified in | |
42 | + your solrconfig.xml or schema.xml (ie: Analyzers, Request | |
43 | + Handlers, etc...). | |
44 | + | |
45 | + All directories and paths are resolved relative to the | |
46 | + instanceDir. | |
47 | + | |
48 | + Please note that <lib/> directives are processed in the order | |
49 | + that they appear in your solrconfig.xml file, and are "stacked" | |
50 | + on top of each other when building a ClassLoader - so if you have | |
51 | + plugin jars with dependencies on other jars, the "lower level" | |
52 | + dependency jars should be loaded first. | |
53 | + | |
54 | + If a "./lib" directory exists in your instanceDir, all files | |
55 | + found in it are included as if you had used the following | |
56 | + syntax... | |
57 | + | |
58 | +--> | |
59 | + <lib dir="./lib" /> | |
60 | + | |
61 | + | |
62 | + <!-- A 'dir' option by itself adds any files found in the directory | |
63 | + to the classpath, this is useful for including all jars in a | |
64 | + directory. | |
65 | + | |
66 | + When a 'regex' is specified in addition to a 'dir', only the | |
67 | + files in that directory which completely match the regex | |
68 | + (anchored on both ends) will be included. | |
69 | + | |
70 | + The examples below can be used to load some solr-contribs along | |
71 | + with their external dependencies. | |
72 | + --> | |
73 | + <lib dir="./contrib/extraction/lib" regex=".*\.jar" /> | |
74 | + <lib dir="./dist/" regex="apache-solr-cell-\d.*\.jar" /> | |
75 | + | |
76 | + <lib dir="./contrib/clustering/lib/" regex=".*\.jar" /> | |
77 | + <lib dir="./dist/" regex="apache-solr-clustering-\d.*\.jar" /> | |
78 | + | |
79 | + <lib dir="./contrib/langid/lib/" regex=".*\.jar" /> | |
80 | + <lib dir="./dist/" regex="apache-solr-langid-\d.*\.jar" /> | |
81 | + | |
82 | + <lib dir="./contrib/velocity/lib" regex=".*\.jar" /> | |
83 | + <lib dir="./dist/" regex="apache-solr-velocity-\d.*\.jar" /> | |
84 | + | |
85 | + <!-- If a 'dir' option (with or without a regex) is used and nothing | |
86 | + is found that matches, it will be ignored | |
87 | + --> | |
88 | + <lib dir="/total/crap/dir/ignored" /> | |
89 | + | |
90 | + <!-- an exact 'path' can be used instead of a 'dir' to specify a | |
91 | + specific jar file. This will cause a serious error to be logged | |
92 | + if it can't be loaded. | |
93 | + --> | |
94 | + <!-- | |
95 | + <lib path="../a-jar-that-does-not-exist.jar" /> | |
96 | + --> | |
97 | + | |
98 | + <!-- Data Directory | |
99 | + | |
100 | + Used to specify an alternate directory to hold all index data | |
101 | + other than the default ./data under the Solr home. If | |
102 | + replication is in use, this should match the replication | |
103 | + configuration. | |
104 | + --> | |
105 | + <dataDir>${solr.data.dir:}</dataDir> | |
106 | + | |
107 | + | |
108 | + <!-- The DirectoryFactory to use for indexes. | |
109 | + | |
110 | + solr.StandardDirectoryFactory is filesystem | |
111 | + based and tries to pick the best implementation for the current | |
112 | + JVM and platform. solr.NRTCachingDirectoryFactory, the default, | |
113 | + wraps solr.StandardDirectoryFactory and caches small files in memory | |
114 | + for better NRT performance. | |
115 | + | |
116 | + One can force a particular implementation via solr.MMapDirectoryFactory, | |
117 | + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. | |
118 | + | |
119 | + solr.RAMDirectoryFactory is memory based, not | |
120 | + persistent, and doesn't work with replication. | |
121 | + --> | |
122 | + <directoryFactory name="DirectoryFactory" | |
123 | + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> | |
124 | + | |
125 | + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
126 | + Index Config - These settings control low-level behavior of indexing | |
127 | + Most example settings here show the default value, but are commented | |
128 | + out, to more easily see where customizations have been made. | |
129 | + | |
130 | + Note: This replaces <indexDefaults> and <mainIndex> from older versions | |
131 | + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> | |
132 | + <indexConfig> | |
133 | + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a | |
134 | + LimitTokenCountFilterFactory in your fieldType definition. E.g. | |
135 | + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> | |
136 | + --> | |
137 | + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> | |
138 | + <!-- <writeLockTimeout>1000</writeLockTimeout> --> | |
139 | + | |
140 | + <!-- Expert: Enabling compound file will use less files for the index, | |
141 | + using fewer file descriptors on the expense of performance decrease. | |
142 | + Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> | |
143 | + <!-- <useCompoundFile>false</useCompoundFile> --> | |
144 | + | |
145 | + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene | |
146 | + indexing for buffering added documents and deletions before they are | |
147 | + flushed to the Directory. | |
148 | + maxBufferedDocs sets a limit on the number of documents buffered | |
149 | + before flushing. | |
150 | + If both ramBufferSizeMB and maxBufferedDocs is set, then | |
151 | + Lucene will flush based on whichever limit is hit first. --> | |
152 | + <!-- <ramBufferSizeMB>32</ramBufferSizeMB> --> | |
153 | + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> | |
154 | + | |
155 | + <!-- Expert: Merge Policy | |
156 | + The Merge Policy in Lucene controls how merging of segments is done. | |
157 | + The default since Solr/Lucene 3.3 is TieredMergePolicy. | |
158 | + The default since Lucene 2.3 was the LogByteSizeMergePolicy, | |
159 | + Even older versions of Lucene used LogDocMergePolicy. | |
160 | + --> | |
161 | + <!-- | |
162 | + <mergePolicy class="org.apache.lucene.index.TieredMergePolicy"> | |
163 | + <int name="maxMergeAtOnce">10</int> | |
164 | + <int name="segmentsPerTier">10</int> | |
165 | + </mergePolicy> | |
166 | + --> | |
167 | + | |
168 | + <!-- Merge Factor | |
169 | + The merge factor controls how many segments will get merged at a time. | |
170 | + For TieredMergePolicy, mergeFactor is a convenience parameter which | |
171 | + will set both MaxMergeAtOnce and SegmentsPerTier at once. | |
172 | + For LogByteSizeMergePolicy, mergeFactor decides how many new segments | |
173 | + will be allowed before they are merged into one. | |
174 | + Default is 10 for both merge policies. | |
175 | + --> | |
176 | + <!-- | |
177 | + <mergeFactor>10</mergeFactor> | |
178 | + --> | |
179 | + | |
180 | + <!-- Expert: Merge Scheduler | |
181 | + The Merge Scheduler in Lucene controls how merges are | |
182 | + performed. The ConcurrentMergeScheduler (Lucene 2.3 default) | |
183 | + can perform merges in the background using separate threads. | |
184 | + The SerialMergeScheduler (Lucene 2.2 default) does not. | |
185 | + --> | |
186 | + <!-- | |
187 | + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> | |
188 | + --> | |
189 | + | |
190 | + <!-- LockFactory | |
191 | + | |
192 | + This option specifies which Lucene LockFactory implementation | |
193 | + to use. | |
194 | + | |
195 | + single = SingleInstanceLockFactory - suggested for a | |
196 | + read-only index or when there is no possibility of | |
197 | + another process trying to modify the index. | |
198 | + native = NativeFSLockFactory - uses OS native file locking. | |
199 | + Do not use when multiple solr webapps in the same | |
200 | + JVM are attempting to share a single index. | |
201 | + simple = SimpleFSLockFactory - uses a plain file for locking | |
202 | + | |
203 | + Defaults: 'native' is default for Solr3.6 and later, otherwise | |
204 | + 'simple' is the default | |
205 | + | |
206 | + More details on the nuances of each LockFactory... | |
207 | + http://wiki.apache.org/lucene-java/AvailableLockFactories | |
208 | + --> | |
209 | + <!-- <lockType>native</lockType> --> | |
210 | + | |
211 | + <!-- Unlock On Startup | |
212 | + | |
213 | + If true, unlock any held write or commit locks on startup. | |
214 | + This defeats the locking mechanism that allows multiple | |
215 | + processes to safely access a lucene index, and should be used | |
216 | + with care. Default is "false". | |
217 | + | |
218 | + This is not needed if lock type is 'none' or 'single' | |
219 | + --> | |
220 | + <!-- | |
221 | + <unlockOnStartup>false</unlockOnStartup> | |
222 | + --> | |
223 | + | |
224 | + <!-- Expert: Controls how often Lucene loads terms into memory | |
225 | + Default is 128 and is likely good for most everyone. | |
226 | + --> | |
227 | + <!-- <termIndexInterval>128</termIndexInterval> --> | |
228 | + | |
229 | + <!-- If true, IndexReaders will be reopened (often more efficient) | |
230 | + instead of closed and then opened. Default: true | |
231 | + --> | |
232 | + <!-- | |
233 | + <reopenReaders>true</reopenReaders> | |
234 | + --> | |
235 | + | |
236 | + <!-- Commit Deletion Policy | |
237 | + | |
238 | + Custom deletion policies can be specified here. The class must | |
239 | + implement org.apache.lucene.index.IndexDeletionPolicy. | |
240 | + | |
241 | + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html | |
242 | + | |
243 | + The default Solr IndexDeletionPolicy implementation supports | |
244 | + deleting index commit points on number of commits, age of | |
245 | + commit point and optimized status. | |
246 | + | |
247 | + The latest commit point should always be preserved regardless | |
248 | + of the criteria. | |
249 | + --> | |
250 | + <!-- | |
251 | + <deletionPolicy class="solr.SolrDeletionPolicy"> | |
252 | + --> | |
253 | + <!-- The number of commit points to be kept --> | |
254 | + <!-- <str name="maxCommitsToKeep">1</str> --> | |
255 | + <!-- The number of optimized commit points to be kept --> | |
256 | + <!-- <str name="maxOptimizedCommitsToKeep">0</str> --> | |
257 | + <!-- | |
258 | + Delete all commit points once they have reached the given age. | |
259 | + Supports DateMathParser syntax e.g. | |
260 | + --> | |
261 | + <!-- | |
262 | + <str name="maxCommitAge">30MINUTES</str> | |
263 | + <str name="maxCommitAge">1DAY</str> | |
264 | + --> | |
265 | + <!-- | |
266 | + </deletionPolicy> | |
267 | + --> | |
268 | + | |
269 | + <!-- Lucene Infostream | |
270 | + | |
271 | + To aid in advanced debugging, Lucene provides an "InfoStream" | |
272 | + of detailed information when indexing. | |
273 | + | |
274 | + Setting The value to true will instruct the underlying Lucene | |
275 | + IndexWriter to write its debugging info the specified file | |
276 | + --> | |
277 | + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> | |
278 | + </indexConfig> | |
279 | + | |
280 | + | |
281 | + <!-- JMX | |
282 | + | |
283 | + This example enables JMX if and only if an existing MBeanServer | |
284 | + is found, use this if you want to configure JMX through JVM | |
285 | + parameters. Remove this to disable exposing Solr configuration | |
286 | + and statistics to JMX. | |
287 | + | |
288 | + For more details see http://wiki.apache.org/solr/SolrJmx | |
289 | + --> | |
290 | + <jmx /> | |
291 | + <!-- If you want to connect to a particular server, specify the | |
292 | + agentId | |
293 | + --> | |
294 | + <!-- <jmx agentId="myAgent" /> --> | |
295 | + <!-- If you want to start a new MBeanServer, specify the serviceUrl --> | |
296 | + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> | |
297 | + --> | |
298 | + | |
299 | + <!-- The default high-performance update handler --> | |
300 | + <updateHandler class="solr.DirectUpdateHandler2"> | |
301 | + | |
302 | + <!-- AutoCommit | |
303 | + | |
304 | + Perform a hard commit automatically under certain conditions. | |
305 | + Instead of enabling autoCommit, consider using "commitWithin" | |
306 | + when adding documents. | |
307 | + | |
308 | + http://wiki.apache.org/solr/UpdateXmlMessages | |
309 | + | |
310 | + maxDocs - Maximum number of documents to add since the last | |
311 | + commit before automatically triggering a new commit. | |
312 | + | |
313 | + maxTime - Maximum amount of time in ms that is allowed to pass | |
314 | + since a document was added before automaticly | |
315 | + triggering a new commit. | |
316 | + openSearcher - if false, the commit causes recent index changes | |
317 | + to be flushed to stable storage, but does not cause a new | |
318 | + searcher to be opened to make those changes visible. | |
319 | + --> | |
320 | + <autoCommit> | |
321 | + <maxTime>300000</maxTime> | |
322 | + <openSearcher>false</openSearcher> | |
323 | + </autoCommit> | |
324 | + | |
325 | + <!-- softAutoCommit is like autoCommit except it causes a | |
326 | + 'soft' commit which only ensures that changes are visible | |
327 | + but does not ensure that data is synced to disk. This is | |
328 | + faster and more near-realtime friendly than a hard commit. | |
329 | + --> | |
330 | + <!-- | |
331 | + <autoSoftCommit> | |
332 | + <maxTime>1000</maxTime> | |
333 | + </autoSoftCommit> | |
334 | + --> | |
335 | + | |
336 | + <!-- Update Related Event Listeners | |
337 | + | |
338 | + Various IndexWriter related events can trigger Listeners to | |
339 | + take actions. | |
340 | + | |
341 | + postCommit - fired after every commit or optimize command | |
342 | + postOptimize - fired after every optimize command | |
343 | + --> | |
344 | + <!-- The RunExecutableListener executes an external command from a | |
345 | + hook such as postCommit or postOptimize. | |
346 | + | |
347 | + exe - the name of the executable to run | |
348 | + dir - dir to use as the current working directory. (default=".") | |
349 | + wait - the calling thread waits until the executable returns. | |
350 | + (default="true") | |
351 | + args - the arguments to pass to the program. (default is none) | |
352 | + env - environment variables to set. (default is none) | |
353 | + --> | |
354 | + <!-- This example shows how RunExecutableListener could be used | |
355 | + with the script based replication... | |
356 | + http://wiki.apache.org/solr/CollectionDistribution | |
357 | + --> | |
358 | + <!-- | |
359 | + <listener event="postCommit" class="solr.RunExecutableListener"> | |
360 | + <str name="exe">solr/bin/snapshooter</str> | |
361 | + <str name="dir">.</str> | |
362 | + <bool name="wait">true</bool> | |
363 | + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> | |
364 | + <arr name="env"> <str>MYVAR=val1</str> </arr> | |
365 | + </listener> | |
366 | + --> | |
367 | + | |
368 | + <!-- Enables a transaction log, currently used for real-time get. | |
369 | + "dir" - the target directory for transaction logs, defaults to the | |
370 | + solr data directory. --> | |
371 | + <updateLog> | |
372 | + <str name="dir">${solr.data.dir:}</str> | |
373 | + </updateLog> | |
374 | + | |
375 | + | |
376 | + </updateHandler> | |
377 | + | |
378 | + <!-- IndexReaderFactory | |
379 | + | |
380 | + Use the following format to specify a custom IndexReaderFactory, | |
381 | + which allows for alternate IndexReader implementations. | |
382 | + | |
383 | + ** Experimental Feature ** | |
384 | + | |
385 | + Please note - Using a custom IndexReaderFactory may prevent | |
386 | + certain other features from working. The API to | |
387 | + IndexReaderFactory may change without warning or may even be | |
388 | + removed from future releases if the problems cannot be | |
389 | + resolved. | |
390 | + | |
391 | + | |
392 | + ** Features that may not work with custom IndexReaderFactory ** | |
393 | + | |
394 | + The ReplicationHandler assumes a disk-resident index. Using a | |
395 | + custom IndexReader implementation may cause incompatibility | |
396 | + with ReplicationHandler and may cause replication to not work | |
397 | + correctly. See SOLR-1366 for details. | |
398 | + | |
399 | + --> | |
400 | + <!-- | |
401 | + <indexReaderFactory name="IndexReaderFactory" class="package.class"> | |
402 | + <str name="someArg">Some Value</str> | |
403 | + </indexReaderFactory > | |
404 | + --> | |
405 | + <!-- By explicitly declaring the Factory, the termIndexDivisor can | |
406 | + be specified. | |
407 | + --> | |
408 | + <!-- | |
409 | + <indexReaderFactory name="IndexReaderFactory" | |
410 | + class="solr.StandardIndexReaderFactory"> | |
411 | + <int name="setTermIndexDivisor">12</int> | |
412 | + </indexReaderFactory > | |
413 | + --> | |
414 | + | |
415 | + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
416 | + Query section - these settings control query time things like caches | |
417 | + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> | |
418 | + <query> | |
419 | + <!-- Max Boolean Clauses | |
420 | + | |
421 | + Maximum number of clauses in each BooleanQuery, an exception | |
422 | + is thrown if exceeded. | |
423 | + | |
424 | + ** WARNING ** | |
425 | + | |
426 | + This option actually modifies a global Lucene property that | |
427 | + will affect all SolrCores. If multiple solrconfig.xml files | |
428 | + disagree on this property, the value at any given moment will | |
429 | + be based on the last SolrCore to be initialized. | |
430 | + | |
431 | + --> | |
432 | + <maxBooleanClauses>1024</maxBooleanClauses> | |
433 | + | |
434 | + | |
435 | + <!-- Solr Internal Query Caches | |
436 | + | |
437 | + There are two implementations of cache available for Solr, | |
438 | + LRUCache, based on a synchronized LinkedHashMap, and | |
439 | + FastLRUCache, based on a ConcurrentHashMap. | |
440 | + | |
441 | + FastLRUCache has faster gets and slower puts in single | |
442 | + threaded operation and thus is generally faster than LRUCache | |
443 | + when the hit ratio of the cache is high (> 75%), and may be | |
444 | + faster under other scenarios on multi-cpu systems. | |
445 | + --> | |
446 | + | |
447 | + <!-- Filter Cache | |
448 | + | |
449 | + Cache used by SolrIndexSearcher for filters (DocSets), | |
450 | + unordered sets of *all* documents that match a query. When a | |
451 | + new searcher is opened, its caches may be prepopulated or | |
452 | + "autowarmed" using data from caches in the old searcher. | |
453 | + autowarmCount is the number of items to prepopulate. For | |
454 | + LRUCache, the autowarmed items will be the most recently | |
455 | + accessed items. | |
456 | + | |
457 | + Parameters: | |
458 | + class - the SolrCache implementation LRUCache or | |
459 | + (LRUCache or FastLRUCache) | |
460 | + size - the maximum number of entries in the cache | |
461 | + initialSize - the initial capacity (number of entries) of | |
462 | + the cache. (see java.util.HashMap) | |
463 | + autowarmCount - the number of entries to prepopulate from | |
464 | + and old cache. | |
465 | + --> | |
466 | + <filterCache class="solr.FastLRUCache" | |
467 | + size="512" | |
468 | + initialSize="512" | |
469 | + autowarmCount="0"/> | |
470 | + | |
471 | + <!-- Query Result Cache | |
472 | + | |
473 | + Caches results of searches - ordered lists of document ids | |
474 | + (DocList) based on a query, a sort, and the range of documents requested. | |
475 | + --> | |
476 | + <queryResultCache class="solr.LRUCache" | |
477 | + size="512" | |
478 | + initialSize="512" | |
479 | + autowarmCount="0"/> | |
480 | + | |
481 | + <!-- Document Cache | |
482 | + | |
483 | + Caches Lucene Document objects (the stored fields for each | |
484 | + document). Since Lucene internal document ids are transient, | |
485 | + this cache will not be autowarmed. | |
486 | + --> | |
487 | + <documentCache class="solr.LRUCache" | |
488 | + size="512" | |
489 | + initialSize="512" | |
490 | + autowarmCount="0"/> | |
491 | + | |
492 | + <!-- Field Value Cache | |
493 | + | |
494 | + Cache used to hold field values that are quickly accessible | |
495 | + by document id. The fieldValueCache is created by default | |
496 | + even if not configured here. | |
497 | + --> | |
498 | + <!-- | |
499 | + <fieldValueCache class="solr.FastLRUCache" | |
500 | + size="512" | |
501 | + autowarmCount="128" | |
502 | + showItems="32" /> | |
503 | + --> | |
504 | + | |
505 | + <!-- Custom Cache | |
506 | + | |
507 | + Example of a generic cache. These caches may be accessed by | |
508 | + name through SolrIndexSearcher.getCache(),cacheLookup(), and | |
509 | + cacheInsert(). The purpose is to enable easy caching of | |
510 | + user/application level data. The regenerator argument should | |
511 | + be specified as an implementation of solr.CacheRegenerator | |
512 | + if autowarming is desired. | |
513 | + --> | |
514 | + <!-- | |
515 | + <cache name="myUserCache" | |
516 | + class="solr.LRUCache" | |
517 | + size="4096" | |
518 | + initialSize="1024" | |
519 | + autowarmCount="1024" | |
520 | + regenerator="com.mycompany.MyRegenerator" | |
521 | + /> | |
522 | + --> | |
523 | + | |
524 | + | |
525 | + <!-- Lazy Field Loading | |
526 | + | |
527 | + If true, stored fields that are not requested will be loaded | |
528 | + lazily. This can result in a significant speed improvement | |
529 | + if the usual case is to not load all stored fields, | |
530 | + especially if the skipped fields are large compressed text | |
531 | + fields. | |
532 | + --> | |
533 | + <enableLazyFieldLoading>true</enableLazyFieldLoading> | |
534 | + | |
535 | + <!-- Use Filter For Sorted Query | |
536 | + | |
537 | + A possible optimization that attempts to use a filter to | |
538 | + satisfy a search. If the requested sort does not include | |
539 | + score, then the filterCache will be checked for a filter | |
540 | + matching the query. If found, the filter will be used as the | |
541 | + source of document ids, and then the sort will be applied to | |
542 | + that. | |
543 | + | |
544 | + For most situations, this will not be useful unless you | |
545 | + frequently get the same search repeatedly with different sort | |
546 | + options, and none of them ever use "score" | |
547 | + --> | |
548 | + <!-- | |
549 | + <useFilterForSortedQuery>true</useFilterForSortedQuery> | |
550 | + --> | |
551 | + | |
552 | + <!-- Result Window Size | |
553 | + | |
554 | + An optimization for use with the queryResultCache. When a search | |
555 | + is requested, a superset of the requested number of document ids | |
556 | + are collected. For example, if a search for a particular query | |
557 | + requests matching documents 10 through 19, and queryWindowSize is 50, | |
558 | + then documents 0 through 49 will be collected and cached. Any further | |
559 | + requests in that range can be satisfied via the cache. | |
560 | + --> | |
561 | + <queryResultWindowSize>20</queryResultWindowSize> | |
562 | + | |
563 | + <!-- Maximum number of documents to cache for any entry in the | |
564 | + queryResultCache. | |
565 | + --> | |
566 | + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> | |
567 | + | |
568 | + <!-- Query Related Event Listeners | |
569 | + | |
570 | + Various IndexSearcher related events can trigger Listeners to | |
571 | + take actions. | |
572 | + | |
573 | + newSearcher - fired whenever a new searcher is being prepared | |
574 | + and there is a current searcher handling requests (aka | |
575 | + registered). It can be used to prime certain caches to | |
576 | + prevent long request times for certain requests. | |
577 | + | |
578 | + firstSearcher - fired whenever a new searcher is being | |
579 | + prepared but there is no current registered searcher to handle | |
580 | + requests or to gain autowarming data from. | |
581 | + | |
582 | + | |
583 | + --> | |
584 | + <!-- QuerySenderListener takes an array of NamedList and executes a | |
585 | + local query request for each NamedList in sequence. | |
586 | + --> | |
587 | + <listener event="newSearcher" class="solr.QuerySenderListener"> | |
588 | + <arr name="queries"> | |
589 | + <!-- | |
590 | + <lst><str name="q">solr</str><str name="sort">price asc</str></lst> | |
591 | + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> | |
592 | + --> | |
593 | + </arr> | |
594 | + </listener> | |
595 | + <listener event="firstSearcher" class="solr.QuerySenderListener"> | |
596 | + <arr name="queries"> | |
597 | + <lst> | |
598 | + <str name="q">static firstSearcher warming in solrconfig.xml</str> | |
599 | + </lst> | |
600 | + </arr> | |
601 | + </listener> | |
602 | + | |
603 | + <!-- Use Cold Searcher | |
604 | + | |
605 | + If a search request comes in and there is no current | |
606 | + registered searcher, then immediately register the still | |
607 | + warming searcher and use it. If "false" then all requests | |
608 | + will block until the first searcher is done warming. | |
609 | + --> | |
610 | + <useColdSearcher>false</useColdSearcher> | |
611 | + | |
612 | + <!-- Max Warming Searchers | |
613 | + | |
614 | + Maximum number of searchers that may be warming in the | |
615 | + background concurrently. An error is returned if this limit | |
616 | + is exceeded. | |
617 | + | |
618 | + Recommend values of 1-2 for read-only slaves, higher for | |
619 | + masters w/o cache warming. | |
620 | + --> | |
621 | + <maxWarmingSearchers>2</maxWarmingSearchers> | |
622 | + | |
623 | + </query> | |
624 | + | |
625 | + | |
626 | + <!-- Request Dispatcher | |
627 | + | |
628 | + This section contains instructions for how the SolrDispatchFilter | |
629 | + should behave when processing requests for this SolrCore. | |
630 | + | |
631 | + handleSelect is a legacy option that affects the behavior of requests | |
632 | + such as /select?qt=XXX | |
633 | + | |
634 | + handleSelect="true" will cause the SolrDispatchFilter to process | |
635 | + the request and dispatch the query to a handler specified by the | |
636 | + "qt" param, assuming "/select" isn't already registered. | |
637 | + | |
638 | + handleSelect="false" will cause the SolrDispatchFilter to | |
639 | + ignore "/select" requests, resulting in a 404 unless a handler | |
640 | + is explicitly registered with the name "/select" | |
641 | + | |
642 | + handleSelect="true" is not recommended for new users, but is the default | |
643 | + for backwards compatibility | |
644 | + --> | |
645 | + <requestDispatcher handleSelect="false" > | |
646 | + <!-- Request Parsing | |
647 | + | |
648 | + These settings indicate how Solr Requests may be parsed, and | |
649 | + what restrictions may be placed on the ContentStreams from | |
650 | + those requests | |
651 | + | |
652 | + enableRemoteStreaming - enables use of the stream.file | |
653 | + and stream.url parameters for specifying remote streams. | |
654 | + | |
655 | + multipartUploadLimitInKB - specifies the max size of | |
656 | + Multipart File Uploads that Solr will allow in a Request. | |
657 | + | |
658 | + *** WARNING *** | |
659 | + The settings below authorize Solr to fetch remote files, You | |
660 | + should make sure your system has some authentication before | |
661 | + using enableRemoteStreaming="true" | |
662 | + | |
663 | + --> | |
664 | + <requestParsers enableRemoteStreaming="true" | |
665 | + multipartUploadLimitInKB="2048000" /> | |
666 | + | |
667 | + <!-- HTTP Caching | |
668 | + | |
669 | + Set HTTP caching related parameters (for proxy caches and clients). | |
670 | + | |
671 | + The options below instruct Solr not to output any HTTP Caching | |
672 | + related headers | |
673 | + --> | |
674 | + <httpCaching never304="true" /> | |
675 | + <!-- If you include a <cacheControl> directive, it will be used to | |
676 | + generate a Cache-Control header (as well as an Expires header | |
677 | + if the value contains "max-age=") | |
678 | + | |
679 | + By default, no Cache-Control header is generated. | |
680 | + | |
681 | + You can use the <cacheControl> option even if you have set | |
682 | + never304="true" | |
683 | + --> | |
684 | + <!-- | |
685 | + <httpCaching never304="true" > | |
686 | + <cacheControl>max-age=30, public</cacheControl> | |
687 | + </httpCaching> | |
688 | + --> | |
689 | + <!-- To enable Solr to respond with automatically generated HTTP | |
690 | + Caching headers, and to response to Cache Validation requests | |
691 | + correctly, set the value of never304="false" | |
692 | + | |
693 | + This will cause Solr to generate Last-Modified and ETag | |
694 | + headers based on the properties of the Index. | |
695 | + | |
696 | + The following options can also be specified to affect the | |
697 | + values of these headers... | |
698 | + | |
699 | + lastModFrom - the default value is "openTime" which means the | |
700 | + Last-Modified value (and validation against If-Modified-Since | |
701 | + requests) will all be relative to when the current Searcher | |
702 | + was opened. You can change it to lastModFrom="dirLastMod" if | |
703 | + you want the value to exactly correspond to when the physical | |
704 | + index was last modified. | |
705 | + | |
706 | + etagSeed="..." is an option you can change to force the ETag | |
707 | + header (and validation against If-None-Match requests) to be | |
708 | + different even if the index has not changed (ie: when making | |
709 | + significant changes to your config file) | |
710 | + | |
711 | + (lastModifiedFrom and etagSeed are both ignored if you use | |
712 | + the never304="true" option) | |
713 | + --> | |
714 | + <!-- | |
715 | + <httpCaching lastModifiedFrom="openTime" | |
716 | + etagSeed="Solr"> | |
717 | + <cacheControl>max-age=30, public</cacheControl> | |
718 | + </httpCaching> | |
719 | + --> | |
720 | + </requestDispatcher> | |
721 | + | |
722 | + <!-- Request Handlers | |
723 | + | |
724 | + http://wiki.apache.org/solr/SolrRequestHandler | |
725 | + | |
726 | + Incoming queries will be dispatched to a specific handler by name | |
727 | + based on the path specified in the request. | |
728 | + | |
729 | + Legacy behavior: If the request path uses "/select" but no Request | |
730 | + Handler has that name, and if handleSelect="true" has been specified in | |
731 | + the requestDispatcher, then the Request Handler is dispatched based on | |
732 | + the qt parameter. Handlers without a leading '/' are accessed this way | |
733 | + like so: http://host/app/[core/]select?qt=name If no qt is | |
734 | + given, then the requestHandler that declares default="true" will be | |
735 | + used or the one named "standard". | |
736 | + | |
737 | + If a Request Handler is declared with startup="lazy", then it will | |
738 | + not be initialized until the first request that uses it. | |
739 | + | |
740 | + --> | |
741 | + <!-- SearchHandler | |
742 | + | |
743 | + http://wiki.apache.org/solr/SearchHandler | |
744 | + | |
745 | + For processing Search Queries, the primary Request Handler | |
746 | + provided with Solr is "SearchHandler" It delegates to a sequent | |
747 | + of SearchComponents (see below) and supports distributed | |
748 | + queries across multiple shards | |
749 | + --> | |
750 | + <requestHandler name="/select" class="solr.SearchHandler"> | |
751 | + <!-- default values for query parameters can be specified, these | |
752 | + will be overridden by parameters in the request | |
753 | + --> | |
754 | + <lst name="defaults"> | |
755 | + <str name="echoParams">explicit</str> | |
756 | + <int name="rows">10</int> | |
757 | + <str name="df">text</str> | |
758 | + </lst> | |
759 | + <!-- In addition to defaults, "appends" params can be specified | |
760 | + to identify values which should be appended to the list of | |
761 | + multi-val params from the query (or the existing "defaults"). | |
762 | + --> | |
763 | + <!-- In this example, the param "fq=instock:true" would be appended to | |
764 | + any query time fq params the user may specify, as a mechanism for | |
765 | + partitioning the index, independent of any user selected filtering | |
766 | + that may also be desired (perhaps as a result of faceted searching). | |
767 | + | |
768 | + NOTE: there is *absolutely* nothing a client can do to prevent these | |
769 | + "appends" values from being used, so don't use this mechanism | |
770 | + unless you are sure you always want it. | |
771 | + --> | |
772 | + <!-- | |
773 | + <lst name="appends"> | |
774 | + <str name="fq">inStock:true</str> | |
775 | + </lst> | |
776 | + --> | |
777 | + <!-- "invariants" are a way of letting the Solr maintainer lock down | |
778 | + the options available to Solr clients. Any params values | |
779 | + specified here are used regardless of what values may be specified | |
780 | + in either the query, the "defaults", or the "appends" params. | |
781 | + | |
782 | + In this example, the facet.field and facet.query params would | |
783 | + be fixed, limiting the facets clients can use. Faceting is | |
784 | + not turned on by default - but if the client does specify | |
785 | + facet=true in the request, these are the only facets they | |
786 | + will be able to see counts for; regardless of what other | |
787 | + facet.field or facet.query params they may specify. | |
788 | + | |
789 | + NOTE: there is *absolutely* nothing a client can do to prevent these | |
790 | + "invariants" values from being used, so don't use this mechanism | |
791 | + unless you are sure you always want it. | |
792 | + --> | |
793 | + <!-- | |
794 | + <lst name="invariants"> | |
795 | + <str name="facet.field">cat</str> | |
796 | + <str name="facet.field">manu_exact</str> | |
797 | + <str name="facet.query">price:[* TO 500]</str> | |
798 | + <str name="facet.query">price:[500 TO *]</str> | |
799 | + </lst> | |
800 | + --> | |
801 | + <!-- If the default list of SearchComponents is not desired, that | |
802 | + list can either be overridden completely, or components can be | |
803 | + prepended or appended to the default list. (see below) | |
804 | + --> | |
805 | + <!-- | |
806 | + <arr name="components"> | |
807 | + <str>nameOfCustomComponent1</str> | |
808 | + <str>nameOfCustomComponent2</str> | |
809 | + </arr> | |
810 | + --> | |
811 | + | |
812 | + </requestHandler> | |
813 | + | |
814 | + <!-- A request handler that returns indented JSON by default --> | |
815 | + <requestHandler name="/query" class="solr.SearchHandler"> | |
816 | + <lst name="defaults"> | |
817 | + <str name="echoParams">explicit</str> | |
818 | + <str name="wt">json</str> | |
819 | + <str name="indent">true</str> | |
820 | + <str name="df">text</str> | |
821 | + </lst> | |
822 | + </requestHandler> | |
823 | + | |
824 | + | |
825 | + <!-- realtime get handler, guaranteed to return the latest stored fields of | |
826 | + any document, without the need to commit or open a new searcher. The | |
827 | + current implementation relies on the updateLog feature being enabled. --> | |
828 | + <requestHandler name="/get" class="solr.RealTimeGetHandler"> | |
829 | + <lst name="defaults"> | |
830 | + <str name="omitHeader">true</str> | |
831 | + <str name="wt">json</str> | |
832 | + <str name="indent">true</str> | |
833 | + </lst> | |
834 | + </requestHandler> | |
835 | + | |
836 | + | |
837 | + <!-- A Robust Example | |
838 | + | |
839 | + This example SearchHandler declaration shows off usage of the | |
840 | + SearchHandler with many defaults declared | |
841 | + | |
842 | + Note that multiple instances of the same Request Handler | |
843 | + (SearchHandler) can be registered multiple times with different | |
844 | + names (and different init parameters) | |
845 | + --> | |
846 | + <requestHandler name="/browse" class="solr.SearchHandler"> | |
847 | + <lst name="defaults"> | |
848 | + <str name="echoParams">explicit</str> | |
849 | + | |
850 | + <!-- VelocityResponseWriter settings --> | |
851 | + <str name="wt">velocity</str> | |
852 | + <str name="v.template">browse</str> | |
853 | + <str name="v.layout">layout</str> | |
854 | + <str name="title">Solritas</str> | |
855 | + | |
856 | + <!-- Query settings --> | |
857 | + <str name="defType">edismax</str> | |
858 | + <str name="qf"> | |
859 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
860 | + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 | |
861 | + </str> | |
862 | + <str name="df">text</str> | |
863 | + <str name="mm">100%</str> | |
864 | + <str name="q.alt">*:*</str> | |
865 | + <str name="rows">10</str> | |
866 | + <str name="fl">*,score</str> | |
867 | + | |
868 | + <str name="mlt.qf"> | |
869 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
870 | + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 | |
871 | + </str> | |
872 | + <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str> | |
873 | + <int name="mlt.count">3</int> | |
874 | + | |
875 | + <!-- Faceting defaults --> | |
876 | + <str name="facet">on</str> | |
877 | + <str name="facet.field">cat</str> | |
878 | + <str name="facet.field">manu_exact</str> | |
879 | + <str name="facet.field">content_type</str> | |
880 | + <str name="facet.field">author_s</str> | |
881 | + <str name="facet.query">ipod</str> | |
882 | + <str name="facet.query">GB</str> | |
883 | + <str name="facet.mincount">1</str> | |
884 | + <str name="facet.pivot">cat,inStock</str> | |
885 | + <str name="facet.range.other">after</str> | |
886 | + <str name="facet.range">price</str> | |
887 | + <int name="f.price.facet.range.start">0</int> | |
888 | + <int name="f.price.facet.range.end">600</int> | |
889 | + <int name="f.price.facet.range.gap">50</int> | |
890 | + <str name="facet.range">popularity</str> | |
891 | + <int name="f.popularity.facet.range.start">0</int> | |
892 | + <int name="f.popularity.facet.range.end">10</int> | |
893 | + <int name="f.popularity.facet.range.gap">3</int> | |
894 | + <str name="facet.range">manufacturedate_dt</str> | |
895 | + <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str> | |
896 | + <str name="f.manufacturedate_dt.facet.range.end">NOW</str> | |
897 | + <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str> | |
898 | + <str name="f.manufacturedate_dt.facet.range.other">before</str> | |
899 | + <str name="f.manufacturedate_dt.facet.range.other">after</str> | |
900 | + | |
901 | + <!-- Highlighting defaults --> | |
902 | + <str name="hl">on</str> | |
903 | + <str name="hl.fl">content</str> | |
904 | + <str name="hl.encoder">html</str> | |
905 | + <str name="hl.simple.pre"><![CDATA[<b>]]></str> | |
906 | + <str name="hl.simple.post"><![CDATA[</b>]]></str> | |
907 | + <str name="f.title.hl.fragsize">0</str> | |
908 | + <str name="f.title.hl.alternateField">title</str> | |
909 | + <str name="f.name.hl.fragsize">0</str> | |
910 | + <str name="f.name.hl.alternateField">name</str> | |
911 | + <str name="f.content.hl.snippets">3</str> | |
912 | + <str name="f.content.hl.fragsize">1000</str> | |
913 | + <str name="f.content.hl.alternateField">content</str> | |
914 | + <str name="f.content.hl.maxAlternateFieldLength">250</str> | |
915 | + | |
916 | + | |
917 | + <!-- Spell checking defaults --> | |
918 | + <str name="spellcheck">on</str> | |
919 | + <str name="spellcheck.extendedResults">false</str> | |
920 | + <str name="spellcheck.count">5</str> | |
921 | + <str name="spellcheck.alternativeTermCount">2</str> | |
922 | + <str name="spellcheck.maxResultsForSuggest">5</str> | |
923 | + <str name="spellcheck.collate">true</str> | |
924 | + <str name="spellcheck.collateExtendedResults">true</str> | |
925 | + <str name="spellcheck.maxCollationTries">5</str> | |
926 | + <str name="spellcheck.maxCollations">3</str> | |
927 | + </lst> | |
928 | + | |
929 | + <!-- append spellchecking to our list of components --> | |
930 | + <arr name="last-components"> | |
931 | + <str>spellcheck</str> | |
932 | + </arr> | |
933 | + </requestHandler> | |
934 | + | |
935 | + | |
936 | + <!-- Update Request Handler. | |
937 | + | |
938 | + http://wiki.apache.org/solr/UpdateXmlMessages | |
939 | + | |
940 | + The canonical Request Handler for Modifying the Index through | |
941 | + commands specified using XML, JSON, CSV, or JAVABIN | |
942 | + | |
943 | + Note: Since solr1.1 requestHandlers requires a valid content | |
944 | + type header if posted in the body. For example, curl now | |
945 | + requires: -H 'Content-type:text/xml; charset=utf-8' | |
946 | + | |
947 | + To override the request content type and force a specific | |
948 | + Content-type, use the request parameter: | |
949 | + ?update.contentType=text/csv | |
950 | + | |
951 | + This handler will pick a response format to match the input | |
952 | + if the 'wt' parameter is not explicit | |
953 | + --> | |
954 | + <requestHandler name="/update" class="solr.UpdateRequestHandler"> | |
955 | + <!-- See below for information on defining | |
956 | + updateRequestProcessorChains that can be used by name | |
957 | + on each Update Request | |
958 | + --> | |
959 | + <!-- | |
960 | + <lst name="defaults"> | |
961 | + <str name="update.chain">dedupe</str> | |
962 | + </lst> | |
963 | + --> | |
964 | + </requestHandler> | |
965 | + | |
966 | + <!-- for back compat with clients using /update/json and /update/csv --> | |
967 | + <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler"> | |
968 | + <lst name="defaults"> | |
969 | + <str name="stream.contentType">application/json</str> | |
970 | + </lst> | |
971 | + </requestHandler> | |
972 | + <requestHandler name="/update/csv" class="solr.CSVRequestHandler"> | |
973 | + <lst name="defaults"> | |
974 | + <str name="stream.contentType">application/csv</str> | |
975 | + </lst> | |
976 | + </requestHandler> | |
977 | + | |
978 | + <!-- Solr Cell Update Request Handler | |
979 | + | |
980 | + http://wiki.apache.org/solr/ExtractingRequestHandler | |
981 | + | |
982 | + --> | |
983 | + <requestHandler name="/update/extract" | |
984 | + startup="lazy" | |
985 | + class="solr.extraction.ExtractingRequestHandler" > | |
986 | + <lst name="defaults"> | |
987 | + <str name="lowernames">true</str> | |
988 | + <str name="uprefix">ignored_</str> | |
989 | + | |
990 | + <!-- capture link hrefs but ignore div attributes --> | |
991 | + <str name="captureAttr">true</str> | |
992 | + <str name="fmap.a">links</str> | |
993 | + <str name="fmap.div">ignored_</str> | |
994 | + </lst> | |
995 | + </requestHandler> | |
996 | + | |
997 | + | |
998 | + <!-- Field Analysis Request Handler | |
999 | + | |
1000 | + RequestHandler that provides much the same functionality as | |
1001 | + analysis.jsp. Provides the ability to specify multiple field | |
1002 | + types and field names in the same request and outputs | |
1003 | + index-time and query-time analysis for each of them. | |
1004 | + | |
1005 | + Request parameters are: | |
1006 | + analysis.fieldname - field name whose analyzers are to be used | |
1007 | + | |
1008 | + analysis.fieldtype - field type whose analyzers are to be used | |
1009 | + analysis.fieldvalue - text for index-time analysis | |
1010 | + q (or analysis.q) - text for query time analysis | |
1011 | + analysis.showmatch (true|false) - When set to true and when | |
1012 | + query analysis is performed, the produced tokens of the | |
1013 | + field value analysis will be marked as "matched" for every | |
1014 | + token that is produces by the query analysis | |
1015 | + --> | |
1016 | + <requestHandler name="/analysis/field" | |
1017 | + startup="lazy" | |
1018 | + class="solr.FieldAnalysisRequestHandler" /> | |
1019 | + | |
1020 | + | |
1021 | + <!-- Document Analysis Handler | |
1022 | + | |
1023 | + http://wiki.apache.org/solr/AnalysisRequestHandler | |
1024 | + | |
1025 | + An analysis handler that provides a breakdown of the analysis | |
1026 | + process of provided documents. This handler expects a (single) | |
1027 | + content stream with the following format: | |
1028 | + | |
1029 | + <docs> | |
1030 | + <doc> | |
1031 | + <field name="id">1</field> | |
1032 | + <field name="name">The Name</field> | |
1033 | + <field name="text">The Text Value</field> | |
1034 | + </doc> | |
1035 | + <doc>...</doc> | |
1036 | + <doc>...</doc> | |
1037 | + ... | |
1038 | + </docs> | |
1039 | + | |
1040 | + Note: Each document must contain a field which serves as the | |
1041 | + unique key. This key is used in the returned response to associate | |
1042 | + an analysis breakdown to the analyzed document. | |
1043 | + | |
1044 | + Like the FieldAnalysisRequestHandler, this handler also supports | |
1045 | + query analysis by sending either an "analysis.query" or "q" | |
1046 | + request parameter that holds the query text to be analyzed. It | |
1047 | + also supports the "analysis.showmatch" parameter which when set to | |
1048 | + true, all field tokens that match the query tokens will be marked | |
1049 | + as a "match". | |
1050 | + --> | |
1051 | + <requestHandler name="/analysis/document" | |
1052 | + class="solr.DocumentAnalysisRequestHandler" | |
1053 | + startup="lazy" /> | |
1054 | + | |
1055 | + <!-- Admin Handlers | |
1056 | + | |
1057 | + Admin Handlers - This will register all the standard admin | |
1058 | + RequestHandlers. | |
1059 | + --> | |
1060 | + <requestHandler name="/admin/" | |
1061 | + class="solr.admin.AdminHandlers" /> | |
1062 | + <!-- This single handler is equivalent to the following... --> | |
1063 | + <!-- | |
1064 | + <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" /> | |
1065 | + <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" /> | |
1066 | + <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" /> | |
1067 | + <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" /> | |
1068 | + <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" /> | |
1069 | + <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" > | |
1070 | + --> | |
1071 | + <!-- If you wish to hide files under ${solr.home}/conf, explicitly | |
1072 | + register the ShowFileRequestHandler using: | |
1073 | + --> | |
1074 | + <!-- | |
1075 | + <requestHandler name="/admin/file" | |
1076 | + class="solr.admin.ShowFileRequestHandler" > | |
1077 | + <lst name="invariants"> | |
1078 | + <str name="hidden">synonyms.txt</str> | |
1079 | + <str name="hidden">anotherfile.txt</str> | |
1080 | + </lst> | |
1081 | + </requestHandler> | |
1082 | + --> | |
1083 | + | |
1084 | + <!-- ping/healthcheck --> | |
1085 | + <requestHandler name="/admin/ping" class="solr.PingRequestHandler"> | |
1086 | + <lst name="invariants"> | |
1087 | + <str name="q">solrpingquery</str> | |
1088 | + </lst> | |
1089 | + <lst name="defaults"> | |
1090 | + <str name="echoParams">all</str> | |
1091 | + </lst> | |
1092 | + <!-- An optional feature of the PingRequestHandler is to configure the | |
1093 | + handler with a "healthcheckFile" which can be used to enable/disable | |
1094 | + the PingRequestHandler. | |
1095 | + relative paths are resolved against the data dir | |
1096 | + --> | |
1097 | + <!-- <str name="healthcheckFile">server-enabled.txt</str> --> | |
1098 | + </requestHandler> | |
1099 | + | |
1100 | + <!-- Echo the request contents back to the client --> | |
1101 | + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > | |
1102 | + <lst name="defaults"> | |
1103 | + <str name="echoParams">explicit</str> | |
1104 | + <str name="echoHandler">true</str> | |
1105 | + </lst> | |
1106 | + </requestHandler> | |
1107 | + | |
1108 | + <!-- Solr Replication | |
1109 | + | |
1110 | + The SolrReplicationHandler supports replicating indexes from a | |
1111 | + "master" used for indexing and "slaves" used for queries. | |
1112 | + | |
1113 | + http://wiki.apache.org/solr/SolrReplication | |
1114 | + | |
1115 | + It is also neccessary for SolrCloud to function (in Cloud mode, the | |
1116 | + replication handler is used to bulk transfer segments when nodes | |
1117 | + are added or need to recover). | |
1118 | + | |
1119 | + https://wiki.apache.org/solr/SolrCloud/ | |
1120 | + --> | |
1121 | + <requestHandler name="/replication" class="solr.ReplicationHandler" > | |
1122 | + <!-- | |
1123 | + To enable simple master/slave replication, uncomment one of the | |
1124 | + sections below, depending on wether this solr instance should be | |
1125 | + the "master" or a "slave". If this instance is a "slave" you will | |
1126 | + also need to fill in the masterUrl to point to a real machine. | |
1127 | + --> | |
1128 | + <!-- | |
1129 | + <lst name="master"> | |
1130 | + <str name="replicateAfter">commit</str> | |
1131 | + <str name="replicateAfter">startup</str> | |
1132 | + <str name="confFiles">schema.xml,stopwords.txt</str> | |
1133 | + </lst> | |
1134 | + --> | |
1135 | + <!-- | |
1136 | + <lst name="slave"> | |
1137 | + <str name="masterUrl">http://your-master-hostname:8983/solr</str> | |
1138 | + <str name="pollInterval">00:00:60</str> | |
1139 | + </lst> | |
1140 | + --> | |
1141 | + </requestHandler> | |
1142 | + | |
1143 | + <!-- Search Components | |
1144 | + | |
1145 | + Search components are registered to SolrCore and used by | |
1146 | + instances of SearchHandler (which can access them by name) | |
1147 | + | |
1148 | + By default, the following components are available: | |
1149 | + | |
1150 | + <searchComponent name="query" class="solr.QueryComponent" /> | |
1151 | + <searchComponent name="facet" class="solr.FacetComponent" /> | |
1152 | + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" /> | |
1153 | + <searchComponent name="highlight" class="solr.HighlightComponent" /> | |
1154 | + <searchComponent name="stats" class="solr.StatsComponent" /> | |
1155 | + <searchComponent name="debug" class="solr.DebugComponent" /> | |
1156 | + | |
1157 | + Default configuration in a requestHandler would look like: | |
1158 | + | |
1159 | + <arr name="components"> | |
1160 | + <str>query</str> | |
1161 | + <str>facet</str> | |
1162 | + <str>mlt</str> | |
1163 | + <str>highlight</str> | |
1164 | + <str>stats</str> | |
1165 | + <str>debug</str> | |
1166 | + </arr> | |
1167 | + | |
1168 | + If you register a searchComponent to one of the standard names, | |
1169 | + that will be used instead of the default. | |
1170 | + | |
1171 | + To insert components before or after the 'standard' components, use: | |
1172 | + | |
1173 | + <arr name="first-components"> | |
1174 | + <str>myFirstComponentName</str> | |
1175 | + </arr> | |
1176 | + | |
1177 | + <arr name="last-components"> | |
1178 | + <str>myLastComponentName</str> | |
1179 | + </arr> | |
1180 | + | |
1181 | + NOTE: The component registered with the name "debug" will | |
1182 | + always be executed after the "last-components" | |
1183 | + | |
1184 | + --> | |
1185 | + | |
1186 | + <!-- Spell Check | |
1187 | + | |
1188 | + The spell check component can return a list of alternative spelling | |
1189 | + suggestions. | |
1190 | + | |
1191 | + http://wiki.apache.org/solr/SpellCheckComponent | |
1192 | + --> | |
1193 | + <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> | |
1194 | + | |
1195 | + <str name="queryAnalyzerFieldType">textSpell</str> | |
1196 | + | |
1197 | + <!-- Multiple "Spell Checkers" can be declared and used by this | |
1198 | + component | |
1199 | + --> | |
1200 | + | |
1201 | + <!-- a spellchecker built from a field of the main index --> | |
1202 | + <lst name="spellchecker"> | |
1203 | + <str name="name">default</str> | |
1204 | + <str name="field">name</str> | |
1205 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1206 | + <!-- the spellcheck distance measure used, the default is the internal levenshtein --> | |
1207 | + <str name="distanceMeasure">internal</str> | |
1208 | + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> | |
1209 | + <float name="accuracy">0.5</float> | |
1210 | + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> | |
1211 | + <int name="maxEdits">2</int> | |
1212 | + <!-- the minimum shared prefix when enumerating terms --> | |
1213 | + <int name="minPrefix">1</int> | |
1214 | + <!-- maximum number of inspections per result. --> | |
1215 | + <int name="maxInspections">5</int> | |
1216 | + <!-- minimum length of a query term to be considered for correction --> | |
1217 | + <int name="minQueryLength">4</int> | |
1218 | + <!-- maximum threshold of documents a query term can appear to be considered for correction --> | |
1219 | + <float name="maxQueryFrequency">0.01</float> | |
1220 | + <!-- uncomment this to require suggestions to occur in 1% of the documents | |
1221 | + <float name="thresholdTokenFrequency">.01</float> | |
1222 | + --> | |
1223 | + </lst> | |
1224 | + | |
1225 | + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> | |
1226 | + <lst name="spellchecker"> | |
1227 | + <str name="name">wordbreak</str> | |
1228 | + <str name="classname">solr.WordBreakSolrSpellChecker</str> | |
1229 | + <str name="field">name</str> | |
1230 | + <str name="combineWords">true</str> | |
1231 | + <str name="breakWords">true</str> | |
1232 | + <int name="maxChanges">10</int> | |
1233 | + </lst> | |
1234 | + | |
1235 | + <!-- a spellchecker that uses a different distance measure --> | |
1236 | + <!-- | |
1237 | + <lst name="spellchecker"> | |
1238 | + <str name="name">jarowinkler</str> | |
1239 | + <str name="field">spell</str> | |
1240 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1241 | + <str name="distanceMeasure"> | |
1242 | + org.apache.lucene.search.spell.JaroWinklerDistance | |
1243 | + </str> | |
1244 | + </lst> | |
1245 | + --> | |
1246 | + | |
1247 | + <!-- a spellchecker that use an alternate comparator | |
1248 | + | |
1249 | + comparatorClass be one of: | |
1250 | + 1. score (default) | |
1251 | + 2. freq (Frequency first, then score) | |
1252 | + 3. A fully qualified class name | |
1253 | + --> | |
1254 | + <!-- | |
1255 | + <lst name="spellchecker"> | |
1256 | + <str name="name">freq</str> | |
1257 | + <str name="field">lowerfilt</str> | |
1258 | + <str name="classname">solr.DirectSolrSpellChecker</str> | |
1259 | + <str name="comparatorClass">freq</str> | |
1260 | + --> | |
1261 | + | |
1262 | + <!-- A spellchecker that reads the list of words from a file --> | |
1263 | + <!-- | |
1264 | + <lst name="spellchecker"> | |
1265 | + <str name="classname">solr.FileBasedSpellChecker</str> | |
1266 | + <str name="name">file</str> | |
1267 | + <str name="sourceLocation">spellings.txt</str> | |
1268 | + <str name="characterEncoding">UTF-8</str> | |
1269 | + <str name="spellcheckIndexDir">spellcheckerFile</str> | |
1270 | + </lst> | |
1271 | + --> | |
1272 | + </searchComponent> | |
1273 | + | |
1274 | + <!-- A request handler for demonstrating the spellcheck component. | |
1275 | + | |
1276 | + NOTE: This is purely as an example. The whole purpose of the | |
1277 | + SpellCheckComponent is to hook it into the request handler that | |
1278 | + handles your normal user queries so that a separate request is | |
1279 | + not needed to get suggestions. | |
1280 | + | |
1281 | + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS | |
1282 | + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! | |
1283 | + | |
1284 | + See http://wiki.apache.org/solr/SpellCheckComponent for details | |
1285 | + on the request parameters. | |
1286 | + --> | |
1287 | + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> | |
1288 | + <lst name="defaults"> | |
1289 | + <str name="df">text</str> | |
1290 | + <!-- Solr will use suggestions from both the 'default' spellchecker | |
1291 | + and from the 'wordbreak' spellchecker and combine them. | |
1292 | + collations (re-written queries) can include a combination of | |
1293 | + corrections from both spellcheckers --> | |
1294 | + <str name="spellcheck.dictionary">default</str> | |
1295 | + <str name="spellcheck.dictionary">wordbreak</str> | |
1296 | + <str name="spellcheck">on</str> | |
1297 | + <str name="spellcheck.extendedResults">true</str> | |
1298 | + <str name="spellcheck.count">10</str> | |
1299 | + <str name="spellcheck.alternativeTermCount">5</str> | |
1300 | + <str name="spellcheck.maxResultsForSuggest">5</str> | |
1301 | + <str name="spellcheck.collate">true</str> | |
1302 | + <str name="spellcheck.collateExtendedResults">true</str> | |
1303 | + <str name="spellcheck.maxCollationTries">10</str> | |
1304 | + <str name="spellcheck.maxCollations">5</str> | |
1305 | + </lst> | |
1306 | + <arr name="last-components"> | |
1307 | + <str>spellcheck</str> | |
1308 | + </arr> | |
1309 | + </requestHandler> | |
1310 | + | |
1311 | + <!-- Term Vector Component | |
1312 | + | |
1313 | + http://wiki.apache.org/solr/TermVectorComponent | |
1314 | + --> | |
1315 | + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> | |
1316 | + | |
1317 | + <!-- A request handler for demonstrating the term vector component | |
1318 | + | |
1319 | + This is purely as an example. | |
1320 | + | |
1321 | + In reality you will likely want to add the component to your | |
1322 | + already specified request handlers. | |
1323 | + --> | |
1324 | + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> | |
1325 | + <lst name="defaults"> | |
1326 | + <str name="df">text</str> | |
1327 | + <bool name="tv">true</bool> | |
1328 | + </lst> | |
1329 | + <arr name="last-components"> | |
1330 | + <str>tvComponent</str> | |
1331 | + </arr> | |
1332 | + </requestHandler> | |
1333 | + | |
1334 | + <!-- Clustering Component | |
1335 | + | |
1336 | + http://wiki.apache.org/solr/ClusteringComponent | |
1337 | + | |
1338 | + You'll need to set the solr.cluster.enabled system property | |
1339 | + when running solr to run with clustering enabled: | |
1340 | + | |
1341 | + java -Dsolr.clustering.enabled=true -jar start.jar | |
1342 | + | |
1343 | + --> | |
1344 | + <searchComponent name="clustering" | |
1345 | + enable="${solr.clustering.enabled:false}" | |
1346 | + class="solr.clustering.ClusteringComponent" > | |
1347 | + <!-- Declare an engine --> | |
1348 | + <lst name="engine"> | |
1349 | + <!-- The name, only one can be named "default" --> | |
1350 | + <str name="name">default</str> | |
1351 | + | |
1352 | + <!-- Class name of Carrot2 clustering algorithm. | |
1353 | + | |
1354 | + Currently available algorithms are: | |
1355 | + | |
1356 | + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm | |
1357 | + * org.carrot2.clustering.stc.STCClusteringAlgorithm | |
1358 | + * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm | |
1359 | + | |
1360 | + See http://project.carrot2.org/algorithms.html for the | |
1361 | + algorithm's characteristics. | |
1362 | + --> | |
1363 | + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str> | |
1364 | + | |
1365 | + <!-- Overriding values for Carrot2 default algorithm attributes. | |
1366 | + | |
1367 | + For a description of all available attributes, see: | |
1368 | + http://download.carrot2.org/stable/manual/#chapter.components. | |
1369 | + Use attribute key as name attribute of str elements | |
1370 | + below. These can be further overridden for individual | |
1371 | + requests by specifying attribute key as request parameter | |
1372 | + name and attribute value as parameter value. | |
1373 | + --> | |
1374 | + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str> | |
1375 | + | |
1376 | + <!-- Location of Carrot2 lexical resources. | |
1377 | + | |
1378 | + A directory from which to load Carrot2-specific stop words | |
1379 | + and stop labels. Absolute or relative to Solr config directory. | |
1380 | + If a specific resource (e.g. stopwords.en) is present in the | |
1381 | + specified dir, it will completely override the corresponding | |
1382 | + default one that ships with Carrot2. | |
1383 | + | |
1384 | + For an overview of Carrot2 lexical resources, see: | |
1385 | + http://download.carrot2.org/head/manual/#chapter.lexical-resources | |
1386 | + --> | |
1387 | + <str name="carrot.lexicalResourcesDir">clustering/carrot2</str> | |
1388 | + | |
1389 | + <!-- The language to assume for the documents. | |
1390 | + | |
1391 | + For a list of allowed values, see: | |
1392 | + http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage | |
1393 | + --> | |
1394 | + <str name="MultilingualClustering.defaultLanguage">PORTUGUESE</str> | |
1395 | + </lst> | |
1396 | + <lst name="engine"> | |
1397 | + <str name="name">stc</str> | |
1398 | + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str> | |
1399 | + </lst> | |
1400 | + </searchComponent> | |
1401 | + | |
1402 | + <!-- A request handler for demonstrating the clustering component | |
1403 | + | |
1404 | + This is purely as an example. | |
1405 | + | |
1406 | + In reality you will likely want to add the component to your | |
1407 | + already specified request handlers. | |
1408 | + --> | |
1409 | + <requestHandler name="/clustering" | |
1410 | + startup="lazy" | |
1411 | + enable="${solr.clustering.enabled:false}" | |
1412 | + class="solr.SearchHandler"> | |
1413 | + <lst name="defaults"> | |
1414 | + <bool name="clustering">true</bool> | |
1415 | + <str name="clustering.engine">default</str> | |
1416 | + <bool name="clustering.results">true</bool> | |
1417 | + <!-- The title field --> | |
1418 | + <str name="carrot.title">name</str> | |
1419 | + <str name="carrot.url">id</str> | |
1420 | + <!-- The field to cluster on --> | |
1421 | + <str name="carrot.snippet">features</str> | |
1422 | + <!-- produce summaries --> | |
1423 | + <bool name="carrot.produceSummary">true</bool> | |
1424 | + <!-- the maximum number of labels per cluster --> | |
1425 | + <!--<int name="carrot.numDescriptions">5</int>--> | |
1426 | + <!-- produce sub clusters --> | |
1427 | + <bool name="carrot.outputSubClusters">false</bool> | |
1428 | + | |
1429 | + <str name="defType">edismax</str> | |
1430 | + <str name="qf"> | |
1431 | + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 | |
1432 | + </str> | |
1433 | + <str name="q.alt">*:*</str> | |
1434 | + <str name="rows">10</str> | |
1435 | + <str name="fl">*,score</str> | |
1436 | + </lst> | |
1437 | + <arr name="last-components"> | |
1438 | + <str>clustering</str> | |
1439 | + </arr> | |
1440 | + </requestHandler> | |
1441 | + | |
1442 | + <!-- Terms Component | |
1443 | + | |
1444 | + http://wiki.apache.org/solr/TermsComponent | |
1445 | + | |
1446 | + A component to return terms and document frequency of those | |
1447 | + terms | |
1448 | + --> | |
1449 | + <searchComponent name="terms" class="solr.TermsComponent"/> | |
1450 | + | |
1451 | + <!-- A request handler for demonstrating the terms component --> | |
1452 | + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> | |
1453 | + <lst name="defaults"> | |
1454 | + <bool name="terms">true</bool> | |
1455 | + <bool name="distrib">false</bool> | |
1456 | + </lst> | |
1457 | + <arr name="components"> | |
1458 | + <str>terms</str> | |
1459 | + </arr> | |
1460 | + </requestHandler> | |
1461 | + | |
1462 | + | |
1463 | + <!-- Query Elevation Component | |
1464 | + | |
1465 | + http://wiki.apache.org/solr/QueryElevationComponent | |
1466 | + | |
1467 | + a search component that enables you to configure the top | |
1468 | + results for a given query regardless of the normal lucene | |
1469 | + scoring. | |
1470 | + --> | |
1471 | + <searchComponent name="elevator" class="solr.QueryElevationComponent" > | |
1472 | + <!-- pick a fieldType to analyze queries --> | |
1473 | + <str name="queryFieldType">string</str> | |
1474 | + <str name="config-file">elevate.xml</str> | |
1475 | + </searchComponent> | |
1476 | + | |
1477 | + <!-- A request handler for demonstrating the elevator component --> | |
1478 | + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> | |
1479 | + <lst name="defaults"> | |
1480 | + <str name="echoParams">explicit</str> | |
1481 | + <str name="df">text</str> | |
1482 | + </lst> | |
1483 | + <arr name="last-components"> | |
1484 | + <str>elevator</str> | |
1485 | + </arr> | |
1486 | + </requestHandler> | |
1487 | + | |
1488 | + <!-- Highlighting Component | |
1489 | + | |
1490 | + http://wiki.apache.org/solr/HighlightingParameters | |
1491 | + --> | |
1492 | + <searchComponent class="solr.HighlightComponent" name="highlight"> | |
1493 | + <highlighting> | |
1494 | + <!-- Configure the standard fragmenter --> | |
1495 | + <!-- This could most likely be commented out in the "default" case --> | |
1496 | + <fragmenter name="gap" | |
1497 | + default="true" | |
1498 | + class="solr.highlight.GapFragmenter"> | |
1499 | + <lst name="defaults"> | |
1500 | + <int name="hl.fragsize">250</int> | |
1501 | + </lst> | |
1502 | + </fragmenter> | |
1503 | + | |
1504 | + <!-- A regular-expression-based fragmenter | |
1505 | + (for sentence extraction) | |
1506 | + --> | |
1507 | + <fragmenter name="regex" | |
1508 | + class="solr.highlight.RegexFragmenter"> | |
1509 | + <lst name="defaults"> | |
1510 | + <!-- slightly smaller fragsizes work better because of slop --> | |
1511 | + <int name="hl.fragsize">250</int> | |
1512 | + <!-- allow 50% slop on fragment sizes --> | |
1513 | + <float name="hl.regex.slop">0.5</float> | |
1514 | + <!-- a basic sentence pattern --> | |
1515 | + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> | |
1516 | + </lst> | |
1517 | + </fragmenter> | |
1518 | + | |
1519 | + <!-- Configure the standard formatter --> | |
1520 | + <formatter name="html" | |
1521 | + default="true" | |
1522 | + class="solr.highlight.HtmlFormatter"> | |
1523 | + <lst name="defaults"> | |
1524 | + <str name="hl.simple.pre"><![CDATA[<b>]]></str> | |
1525 | + <str name="hl.simple.post"><![CDATA[</b>]]></str> | |
1526 | + </lst> | |
1527 | + </formatter> | |
1528 | + | |
1529 | + <!-- Configure the standard encoder --> | |
1530 | + <encoder name="html" | |
1531 | + class="solr.highlight.HtmlEncoder" /> | |
1532 | + | |
1533 | + <!-- Configure the standard fragListBuilder --> | |
1534 | + <fragListBuilder name="simple" | |
1535 | + class="solr.highlight.SimpleFragListBuilder"/> | |
1536 | + | |
1537 | + <!-- Configure the single fragListBuilder --> | |
1538 | + <fragListBuilder name="single" | |
1539 | + class="solr.highlight.SingleFragListBuilder"/> | |
1540 | + | |
1541 | + <!-- Configure the weighted fragListBuilder --> | |
1542 | + <fragListBuilder name="weighted" | |
1543 | + default="true" | |
1544 | + class="solr.highlight.WeightedFragListBuilder"/> | |
1545 | + | |
1546 | + <!-- default tag FragmentsBuilder --> | |
1547 | + <fragmentsBuilder name="default" | |
1548 | + default="true" | |
1549 | + class="solr.highlight.ScoreOrderFragmentsBuilder"> | |
1550 | + <!-- | |
1551 | + <lst name="defaults"> | |
1552 | + <str name="hl.multiValuedSeparatorChar">/</str> | |
1553 | + </lst> | |
1554 | + --> | |
1555 | + </fragmentsBuilder> | |
1556 | + | |
1557 | + <!-- multi-colored tag FragmentsBuilder --> | |
1558 | + <fragmentsBuilder name="colored" | |
1559 | + class="solr.highlight.ScoreOrderFragmentsBuilder"> | |
1560 | + <lst name="defaults"> | |
1561 | + <str name="hl.tag.pre"><![CDATA[ | |
1562 | + <b style="background:yellow">,<b style="background:lawgreen">, | |
1563 | + <b style="background:aquamarine">,<b style="background:magenta">, | |
1564 | + <b style="background:palegreen">,<b style="background:coral">, | |
1565 | + <b style="background:wheat">,<b style="background:khaki">, | |
1566 | + <b style="background:lime">,<b style="background:deepskyblue">]]></str> | |
1567 | + <str name="hl.tag.post"><![CDATA[</b>]]></str> | |
1568 | + </lst> | |
1569 | + </fragmentsBuilder> | |
1570 | + | |
1571 | + <boundaryScanner name="default" | |
1572 | + default="true" | |
1573 | + class="solr.highlight.SimpleBoundaryScanner"> | |
1574 | + <lst name="defaults"> | |
1575 | + <str name="hl.bs.maxScan">10</str> | |
1576 | + <str name="hl.bs.chars">.,!? | |
1577 | + | |
1578 | +</str> | |
1579 | + </lst> | |
1580 | + </boundaryScanner> | |
1581 | + | |
1582 | + <boundaryScanner name="breakIterator" | |
1583 | + class="solr.highlight.BreakIteratorBoundaryScanner"> | |
1584 | + <lst name="defaults"> | |
1585 | + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE --> | |
1586 | + <str name="hl.bs.type">WORD</str> | |
1587 | + <!-- language and country are used when constructing Locale object. --> | |
1588 | + <!-- And the Locale object will be used when getting instance of BreakIterator --> | |
1589 | + <str name="hl.bs.language">pt</str> | |
1590 | + <str name="hl.bs.country">BR</str> | |
1591 | + </lst> | |
1592 | + </boundaryScanner> | |
1593 | + </highlighting> | |
1594 | + </searchComponent> | |
1595 | + | |
1596 | + <!-- Update Processors | |
1597 | + | |
1598 | + Chains of Update Processor Factories for dealing with Update | |
1599 | + Requests can be declared, and then used by name in Update | |
1600 | + Request Processors | |
1601 | + | |
1602 | + http://wiki.apache.org/solr/UpdateRequestProcessor | |
1603 | + | |
1604 | + --> | |
1605 | + <!-- Deduplication | |
1606 | + | |
1607 | + An example dedup update processor that creates the "id" field | |
1608 | + on the fly based on the hash code of some other fields. This | |
1609 | + example has overwriteDupes set to false since we are using the | |
1610 | + id field as the signatureField and Solr will maintain | |
1611 | + uniqueness based on that anyway. | |
1612 | + | |
1613 | + --> | |
1614 | + <!-- | |
1615 | + <updateRequestProcessorChain name="dedupe"> | |
1616 | + <processor class="solr.processor.SignatureUpdateProcessorFactory"> | |
1617 | + <bool name="enabled">true</bool> | |
1618 | + <str name="signatureField">id</str> | |
1619 | + <bool name="overwriteDupes">false</bool> | |
1620 | + <str name="fields">name,features,cat</str> | |
1621 | + <str name="signatureClass">solr.processor.Lookup3Signature</str> | |
1622 | + </processor> | |
1623 | + <processor class="solr.LogUpdateProcessorFactory" /> | |
1624 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1625 | + </updateRequestProcessorChain> | |
1626 | + --> | |
1627 | + | |
1628 | + <!-- Language identification | |
1629 | + | |
1630 | + This example update chain identifies the language of the incoming | |
1631 | + documents using the langid contrib. The detected language is | |
1632 | + written to field language_s. No field name mapping is done. | |
1633 | + The fields used for detection are text, title, subject and description, | |
1634 | + making this example suitable for detecting languages form full-text | |
1635 | + rich documents injected via ExtractingRequestHandler. | |
1636 | + See more about langId at http://wiki.apache.org/solr/LanguageDetection | |
1637 | + --> | |
1638 | + <!-- | |
1639 | + <updateRequestProcessorChain name="langid"> | |
1640 | + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory"> | |
1641 | + <str name="langid.fl">text,title,subject,description</str> | |
1642 | + <str name="langid.langField">language_s</str> | |
1643 | + <str name="langid.fallback">en</str> | |
1644 | + </processor> | |
1645 | + <processor class="solr.LogUpdateProcessorFactory" /> | |
1646 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1647 | + </updateRequestProcessorChain> | |
1648 | + --> | |
1649 | + | |
1650 | + <!-- Script update processor | |
1651 | + | |
1652 | + This example hooks in an update processor implemented using JavaScript. | |
1653 | + | |
1654 | + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor | |
1655 | + --> | |
1656 | + <!-- | |
1657 | + <updateRequestProcessorChain name="script"> | |
1658 | + <processor class="solr.StatelessScriptUpdateProcessorFactory"> | |
1659 | + <str name="script">update-script.js</str> | |
1660 | + <lst name="params"> | |
1661 | + <str name="config_param">example config parameter</str> | |
1662 | + </lst> | |
1663 | + </processor> | |
1664 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1665 | + </updateRequestProcessorChain> | |
1666 | + --> | |
1667 | + | |
1668 | + <!-- Response Writers | |
1669 | + | |
1670 | + http://wiki.apache.org/solr/QueryResponseWriter | |
1671 | + | |
1672 | + Request responses will be written using the writer specified by | |
1673 | + the 'wt' request parameter matching the name of a registered | |
1674 | + writer. | |
1675 | + | |
1676 | + The "default" writer is the default and will be used if 'wt' is | |
1677 | + not specified in the request. | |
1678 | + --> | |
1679 | + <!-- The following response writers are implicitly configured unless | |
1680 | + overridden... | |
1681 | + --> | |
1682 | + <!-- | |
1683 | + <queryResponseWriter name="xml" | |
1684 | + default="true" | |
1685 | + class="solr.XMLResponseWriter" /> | |
1686 | + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> | |
1687 | + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> | |
1688 | + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> | |
1689 | + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> | |
1690 | + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> | |
1691 | + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> | |
1692 | + --> | |
1693 | + | |
1694 | + <queryResponseWriter name="json" class="solr.JSONResponseWriter"> | |
1695 | + <!-- For the purposes of the tutorial, JSON responses are written as | |
1696 | + plain text so that they are easy to read in *any* browser. | |
1697 | + If you expect a MIME type of "application/json" just remove this override. | |
1698 | + --> | |
1699 | + <str name="content-type">text/plain; charset=UTF-8</str> | |
1700 | + </queryResponseWriter> | |
1701 | + | |
1702 | + <!-- | |
1703 | + Custom response writers can be declared as needed... | |
1704 | + --> | |
1705 | + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/> | |
1706 | + | |
1707 | + | |
1708 | + <!-- XSLT response writer transforms the XML output by any xslt file found | |
1709 | + in Solr's conf/xslt directory. Changes to xslt files are checked for | |
1710 | + every xsltCacheLifetimeSeconds. | |
1711 | + --> | |
1712 | + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> | |
1713 | + <int name="xsltCacheLifetimeSeconds">5</int> | |
1714 | + </queryResponseWriter> | |
1715 | + | |
1716 | + <!-- Query Parsers | |
1717 | + | |
1718 | + http://wiki.apache.org/solr/SolrQuerySyntax | |
1719 | + | |
1720 | + Multiple QParserPlugins can be registered by name, and then | |
1721 | + used in either the "defType" param for the QueryComponent (used | |
1722 | + by SearchHandler) or in LocalParams | |
1723 | + --> | |
1724 | + <!-- example of registering a query parser --> | |
1725 | + <!-- | |
1726 | + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/> | |
1727 | + --> | |
1728 | + | |
1729 | + <!-- Function Parsers | |
1730 | + | |
1731 | + http://wiki.apache.org/solr/FunctionQuery | |
1732 | + | |
1733 | + Multiple ValueSourceParsers can be registered by name, and then | |
1734 | + used as function names when using the "func" QParser. | |
1735 | + --> | |
1736 | + <!-- example of registering a custom function parser --> | |
1737 | + <!-- | |
1738 | + <valueSourceParser name="myfunc" | |
1739 | + class="com.mycompany.MyValueSourceParser" /> | |
1740 | + --> | |
1741 | + | |
1742 | + | |
1743 | + <!-- Document Transformers | |
1744 | + http://wiki.apache.org/solr/DocTransformers | |
1745 | + --> | |
1746 | + <!-- | |
1747 | + Could be something like: | |
1748 | + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" > | |
1749 | + <int name="connection">jdbc://....</int> | |
1750 | + </transformer> | |
1751 | + | |
1752 | + To add a constant value to all docs, use: | |
1753 | + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" > | |
1754 | + <int name="value">5</int> | |
1755 | + </transformer> | |
1756 | + | |
1757 | + If you want the user to still be able to change it with _value:something_ use this: | |
1758 | + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" > | |
1759 | + <double name="defaultValue">5</double> | |
1760 | + </transformer> | |
1761 | + | |
1762 | + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The | |
1763 | + EditorialMarkerFactory will do exactly that: | |
1764 | + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" /> | |
1765 | + --> | |
1766 | + | |
1767 | + | |
1768 | + <!-- Legacy config for the admin interface --> | |
1769 | + <admin> | |
1770 | + <defaultQuery>*:*</defaultQuery> | |
1771 | + </admin> | |
1772 | + | |
1773 | +</config> | |
0 | 1774 | \ No newline at end of file | ... | ... |
1 | +++ a/index/sei-publicacoes-schema.xml | |
... | ... | @@ -0,0 +1,1177 @@ |
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<!-- | |
3 | + Licensed to the Apache Software Foundation (ASF) under one or more | |
4 | + contributor license agreements. See the NOTICE file distributed with | |
5 | + this work for additional information regarding copyright ownership. | |
6 | + The ASF licenses this file to You under the Apache License, Version 2.0 | |
7 | + (the "License"); you may not use this file except in compliance with | |
8 | + the License. You may obtain a copy of the License at | |
9 | + | |
10 | + http://www.apache.org/licenses/LICENSE-2.0 | |
11 | + | |
12 | + Unless required by applicable law or agreed to in writing, software | |
13 | + distributed under the License is distributed on an "AS IS" BASIS, | |
14 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | + See the License for the specific language governing permissions and | |
16 | + limitations under the License. | |
17 | +--> | |
18 | + | |
19 | +<!-- | |
20 | + This is the Solr schema file. This file should be named "schema.xml" and | |
21 | + should be in the conf directory under the solr home | |
22 | + (i.e. ./solr/conf/schema.xml by default) | |
23 | + or located where the classloader for the Solr webapp can find it. | |
24 | + | |
25 | + This example schema is the recommended starting point for users. | |
26 | + It should be kept correct and concise, usable out-of-the-box. | |
27 | + | |
28 | + For more information, on how to customize this file, please see | |
29 | + http://wiki.apache.org/solr/SchemaXml | |
30 | + | |
31 | + PERFORMANCE NOTE: this schema includes many optional features and should not | |
32 | + be used for benchmarking. To improve performance one could | |
33 | + - set stored="false" for all fields possible (esp large fields) when you | |
34 | + only need to search on the field but don't need to return the original | |
35 | + value. | |
36 | + - set indexed="false" if you don't need to search on the field, but only | |
37 | + return the field as a result of searching on other indexed fields. | |
38 | + - remove all unneeded copyField statements | |
39 | + - for best index size and searching performance, set "index" to false | |
40 | + for all general text fields, use copyField to copy them to the | |
41 | + catchall "text" field, and use that for searching. | |
42 | + - For maximum indexing performance, use the StreamingUpdateSolrServer | |
43 | + java client. | |
44 | + - Remember to run the JVM in server mode, and use a higher logging level | |
45 | + that avoids logging every request | |
46 | +--> | |
47 | + | |
48 | +<schema name="sei-publicacoes" version="1.5"> | |
49 | + <!-- attribute "name" is the name of this schema and is only used for display purposes. | |
50 | + version="x.y" is Solr's version number for the schema syntax and | |
51 | + semantics. It should not normally be changed by applications. | |
52 | + | |
53 | + 1.0: multiValued attribute did not exist, all fields are multiValued | |
54 | + by nature | |
55 | + 1.1: multiValued attribute introduced, false by default | |
56 | + 1.2: omitTermFreqAndPositions attribute introduced, true by default | |
57 | + except for text fields. | |
58 | + 1.3: removed optional field compress feature | |
59 | + 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser | |
60 | + behavior when a single string produces multiple tokens. Defaults | |
61 | + to off for version >= 1.4 | |
62 | + 1.5: omitNorms defaults to true for primitive field types | |
63 | + (int, float, boolean, string...) | |
64 | + --> | |
65 | + | |
66 | + <fields> | |
67 | + | |
68 | + <field name="id_publicacao" type="string" indexed="true" stored="true" /> | |
69 | + <field name="id_publicacao_legado" type="string" indexed="true" stored="true" /> | |
70 | + <field name="id_documento" type="string" indexed="true" stored="true" /> | |
71 | + <field name="id_protocolo_agrupador" type="string" indexed="true" stored="true" /> | |
72 | + <field name="id_orgao_responsavel" type="string" indexed="true" stored="true" /> | |
73 | + <field name="sigla_orgao_responsavel" type="string" indexed="false" stored="true" /> | |
74 | + <field name="descricao_orgao_responsavel" type="string" indexed="false" stored="true" /> | |
75 | + <field name="id_unidade_responsavel" type="string" indexed="true" stored="true" /> | |
76 | + <field name="sigla_unidade_responsavel" type="string" indexed="false" stored="true" /> | |
77 | + <field name="descricao_unidade_responsavel" type="string" indexed="false" stored="true" /> | |
78 | + <field name="id_serie" type="string" indexed="true" stored="true" /> | |
79 | + <field name="nome_serie" type="string" indexed="false" stored="true" /> | |
80 | + <field name="numero" type="string" indexed="true" stored="true" /> | |
81 | + <field name="protocolo_formatado_pesquisa" type="string" indexed="true" stored="false" /> | |
82 | + <field name="protocolo_documento_formatado" type="string" indexed="false" stored="true" /> | |
83 | + <field name="dta_documento" type="date" indexed="true" stored="true" /> | |
84 | + <field name="dta_publicacao" type="date" indexed="true" stored="true" /> | |
85 | + <field name="numero_publicacao" type="string" indexed="true" stored="true" /> | |
86 | + <field name="id_veiculo_publicacao" type="string" indexed="true" stored="true" /> | |
87 | + <field name="nome_veiculo_publicacao" type="string" indexed="false" stored="true" /> | |
88 | + <field name="resumo" type="text_general" indexed="true" stored="true" /> | |
89 | + <field name="id_veiculo_io" type="string" indexed="true" stored="true" /> | |
90 | + <field name="sigla_veiculo_io" type="string" indexed="false" stored="true" /> | |
91 | + <field name="descricao_veiculo_io" type="string" indexed="false" stored="true" /> | |
92 | + <field name="dta_publicacao_io" type="date" indexed="true" stored="true" /> | |
93 | + <field name="id_secao_io" type="string" indexed="true" stored="true" /> | |
94 | + <field name="nome_secao_io" type="string" indexed="false" stored="true" /> | |
95 | + <field name="pagina_io" type="string" indexed="false" stored="true" /> | |
96 | + | |
97 | + <!-- Valid attributes for fields: | |
98 | + name: mandatory - the name for the field | |
99 | + type: mandatory - the name of a field type from the | |
100 | + <types> fieldType section | |
101 | + indexed: true if this field should be indexed (searchable or sortable) | |
102 | + stored: true if this field should be retrievable | |
103 | + multiValued: true if this field may contain multiple values per document | |
104 | + omitNorms: (expert) set to true to omit the norms associated with | |
105 | + this field (this disables length normalization and index-time | |
106 | + boosting for the field, and saves some memory). Only full-text | |
107 | + fields or fields that need an index-time boost need norms. | |
108 | + Norms are omitted for primitive (non-analyzed) types by default. | |
109 | + termVectors: [false] set to true to store the term vector for a | |
110 | + given field. | |
111 | + When using MoreLikeThis, fields used for similarity should be | |
112 | + stored for best performance. | |
113 | + termPositions: Store position information with the term vector. | |
114 | + This will increase storage costs. | |
115 | + termOffsets: Store offset information with the term vector. This | |
116 | + will increase storage costs. | |
117 | + required: The field is required. It will throw an error if the | |
118 | + value does not exist | |
119 | + default: a value that should be used if no value is specified | |
120 | + when adding a document. | |
121 | + --> | |
122 | + | |
123 | + <!-- field names should consist of alphanumeric or underscore characters only and | |
124 | + not start with a digit. This is not currently strictly enforced, | |
125 | + but other field names will not have first class support from all components | |
126 | + and back compatibility is not guaranteed. Names with both leading and | |
127 | + trailing underscores (e.g. _version_) are reserved. | |
128 | + --> | |
129 | + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> | |
130 | + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> | |
131 | + <field name="name" type="text_general" indexed="true" stored="true"/> | |
132 | + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> | |
133 | + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> | |
134 | + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
135 | + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> | |
136 | + | |
137 | + <field name="weight" type="float" indexed="true" stored="true"/> | |
138 | + <field name="price" type="float" indexed="true" stored="true"/> | |
139 | + <field name="popularity" type="int" indexed="true" stored="true" /> | |
140 | + <field name="inStock" type="boolean" indexed="true" stored="true" /> | |
141 | + | |
142 | + <field name="store" type="location" indexed="true" stored="true"/> | |
143 | + <!-- mairon | |
144 | + <field name="documento" type="string" indexed="true" stored="true" /> | |
145 | + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> | |
146 | + <field name="name" type="text_general" indexed="true" stored="true"/> | |
147 | + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> | |
148 | + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> | |
149 | + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
150 | + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> | |
151 | + | |
152 | + <field name="weight" type="float" indexed="true" stored="true"/> | |
153 | + <field name="price" type="float" indexed="true" stored="true"/> | |
154 | + <field name="popularity" type="int" indexed="true" stored="true" /> | |
155 | + <field name="inStock" type="boolean" indexed="true" stored="true" /> | |
156 | + | |
157 | + <field name="store" type="location" indexed="true" stored="true"/> | |
158 | + --> | |
159 | + | |
160 | + <!-- Common metadata fields, named specifically to match up with | |
161 | + SolrCell metadata when parsing rich documents such as Word, PDF. | |
162 | + Some fields are multiValued only because Tika currently may return | |
163 | + multiple values for them. Some metadata is parsed from the documents, | |
164 | + but there are some which come from the client context: | |
165 | + "content_type": From the HTTP headers of incoming stream | |
166 | + "resourcename": From SolrCell request param resource.name | |
167 | + --> | |
168 | + <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
169 | + <field name="subject" type="text_general" indexed="true" stored="true"/> | |
170 | + <field name="description" type="text_general" indexed="true" stored="true"/> | |
171 | + <field name="comments" type="text_general" indexed="true" stored="true"/> | |
172 | + <field name="author" type="text_general" indexed="true" stored="true"/> | |
173 | + <field name="keywords" type="text_general" indexed="true" stored="true"/> | |
174 | + <field name="category" type="text_general" indexed="true" stored="true"/> | |
175 | + <field name="resourcename" type="text_general" indexed="true" stored="true"/> | |
176 | + <field name="url" type="text_general" indexed="true" stored="true"/> | |
177 | + <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> | |
178 | + <field name="last_modified" type="date" indexed="true" stored="true"/> | |
179 | + <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> | |
180 | + | |
181 | + <!-- Main body of document extracted by SolrCell. | |
182 | + NOTE: This field is not indexed by default, since it is also copied to "text" | |
183 | + using copyField below. This is to save space. Use this field for returning and | |
184 | + highlighting document content. Use the "text" field to search the content. --> | |
185 | + <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/> | |
186 | + | |
187 | + | |
188 | + <!-- catchall field, containing all other searchable text fields (implemented | |
189 | + via copyField further on in this schema --> | |
190 | + <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> | |
191 | + | |
192 | + <!-- catchall text field that indexes tokens both normally and in reverse for efficient | |
193 | + leading wildcard queries. --> | |
194 | + <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> | |
195 | + | |
196 | + <!-- non-tokenized version of manufacturer to make it easier to sort or group | |
197 | + results by manufacturer. copied from "manu" via copyField --> | |
198 | + <field name="manu_exact" type="string" indexed="true" stored="false"/> | |
199 | + | |
200 | + <field name="payloads" type="payloads" indexed="true" stored="true"/> | |
201 | + | |
202 | + <field name="_version_" type="long" indexed="true" stored="true"/> | |
203 | + | |
204 | + <!-- Uncommenting the following will create a "timestamp" field using | |
205 | + a default value of "NOW" to indicate when each document was indexed. | |
206 | + --> | |
207 | + <!-- | |
208 | + <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/> | |
209 | + --> | |
210 | + | |
211 | + <!-- Dynamic field definitions allow using convention over configuration | |
212 | + for fields via the specification of patterns to match field names. | |
213 | + EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) | |
214 | + RESTRICTION: the glob-like pattern in the name attribute must have | |
215 | + a "*" only at the start or the end. --> | |
216 | + | |
217 | + <dynamicField name="*_i" type="int" indexed="true" stored="true"/> | |
218 | + <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/> | |
219 | + <dynamicField name="*_s" type="string" indexed="true" stored="true" /> | |
220 | + <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/> | |
221 | + <dynamicField name="*_l" type="long" indexed="true" stored="true"/> | |
222 | + <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/> | |
223 | + <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> | |
224 | + <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
225 | + <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/> | |
226 | + <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |
227 | + <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/> | |
228 | + <dynamicField name="*_f" type="float" indexed="true" stored="true"/> | |
229 | + <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/> | |
230 | + <dynamicField name="*_d" type="double" indexed="true" stored="true"/> | |
231 | + <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/> | |
232 | + | |
233 | + <!-- Type used to index the lat and lon components for the "location" FieldType --> | |
234 | + <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" /> | |
235 | + | |
236 | + <dynamicField name="*_dt" type="date" indexed="true" stored="true"/> | |
237 | + <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/> | |
238 | + <dynamicField name="*_p" type="location" indexed="true" stored="true"/> | |
239 | + | |
240 | + <!-- some trie-coded dynamic fields for faster range queries --> | |
241 | + <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/> | |
242 | + <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/> | |
243 | + <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/> | |
244 | + <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/> | |
245 | + <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/> | |
246 | + | |
247 | + <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/> | |
248 | + <dynamicField name="*_c" type="currency" indexed="true" stored="true"/> | |
249 | + | |
250 | + <dynamicField name="ignored_*" type="ignored" multiValued="true"/> | |
251 | + <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> | |
252 | + | |
253 | + <dynamicField name="random_*" type="random" /> | |
254 | + | |
255 | + <!-- uncomment the following to ignore any fields that don't already match an existing | |
256 | + field name or dynamic field, rather than reporting them as an error. | |
257 | + alternately, change the type="ignored" to some other type e.g. "text" if you want | |
258 | + unknown fields indexed and/or stored by default --> | |
259 | + <!--dynamicField name="*" type="ignored" multiValued="true" /--> | |
260 | + | |
261 | + </fields> | |
262 | + | |
263 | + | |
264 | + <!-- Field to use to determine and enforce document uniqueness. | |
265 | + Unless this field is marked with required="false", it will be a required field | |
266 | + --> | |
267 | + <uniqueKey>id</uniqueKey> | |
268 | + | |
269 | + <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when | |
270 | + parsing a query string that isn't explicit about the field. Machine (non-user) | |
271 | + generated queries are best made explicit, or they can use the "df" request parameter | |
272 | + which takes precedence over this. | |
273 | + Note: Un-commenting defaultSearchField will be insufficient if your request handler | |
274 | + in solrconfig.xml defines "df", which takes precedence. That would need to be removed. | |
275 | + <defaultSearchField>text</defaultSearchField> --> | |
276 | + | |
277 | + <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers | |
278 | + when parsing a query string to determine if a clause of the query should be marked as | |
279 | + required or optional, assuming the clause isn't already marked by some operator. | |
280 | + The default is OR, which is generally assumed so it is not a good idea to change it | |
281 | + globally here. The "q.op" request parameter takes precedence over this. | |
282 | + <solrQueryParser defaultOperator="OR"/> --> | |
283 | + | |
284 | + <!-- copyField commands copy one field to another at the time a document | |
285 | + is added to the index. It's used either to index the same field differently, | |
286 | + or to add multiple fields to the same field for easier/faster searching. --> | |
287 | + | |
288 | + <!-- mairon | |
289 | + <copyField source="cat" dest="text"/> | |
290 | + <copyField source="name" dest="text"/> | |
291 | + <copyField source="manu" dest="text"/> | |
292 | + <copyField source="features" dest="text"/> | |
293 | + <copyField source="includes" dest="text"/> | |
294 | + <copyField source="manu" dest="manu_exact"/> | |
295 | + <copyField source="price" dest="price_c"/> | |
296 | + --> | |
297 | + | |
298 | + <!-- Text fields from SolrCell to search by default in our catch-all field --> | |
299 | + <copyField source="title" dest="text"/> | |
300 | + <copyField source="author" dest="text"/> | |
301 | + <copyField source="description" dest="text"/> | |
302 | + <copyField source="keywords" dest="text"/> | |
303 | + <copyField source="content" dest="text"/> | |
304 | + <copyField source="content_type" dest="text"/> | |
305 | + <copyField source="resourcename" dest="text"/> | |
306 | + <copyField source="url" dest="text"/> | |
307 | + | |
308 | + <!-- Create a string version of author for faceting --> | |
309 | + <copyField source="author" dest="author_s"/> | |
310 | + | |
311 | + <!-- Above, multiple source fields are copied to the [text] field. | |
312 | + Another way to map multiple source fields to the same | |
313 | + destination field is to use the dynamic field syntax. | |
314 | + copyField also supports a maxChars to copy setting. --> | |
315 | + | |
316 | + <!-- <copyField source="*_t" dest="text" maxChars="3000"/> --> | |
317 | + | |
318 | + <!-- copy name to alphaNameSort, a field designed for sorting by name --> | |
319 | + <!-- <copyField source="name" dest="alphaNameSort"/> --> | |
320 | + | |
321 | + <types> | |
322 | + <!-- field type definitions. The "name" attribute is | |
323 | + just a label to be used by field definitions. The "class" | |
324 | + attribute and any other attributes determine the real | |
325 | + behavior of the fieldType. | |
326 | + Class names starting with "solr" refer to java classes in a | |
327 | + standard package such as org.apache.solr.analysis | |
328 | + --> | |
329 | + | |
330 | + <fieldType name="date" class="solr.DateField" omitNorms="true"/> | |
331 | + | |
332 | + <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> | |
333 | + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> | |
334 | + | |
335 | + <!-- boolean type: "true" or "false" --> | |
336 | + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |
337 | + | |
338 | + <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are | |
339 | + currently supported on types that are sorted internally as strings | |
340 | + and on numeric types. | |
341 | + This includes "string","boolean", and, as of 3.5 (and 4.x), | |
342 | + int, float, long, date, double, including the "Trie" variants. | |
343 | + - If sortMissingLast="true", then a sort on this field will cause documents | |
344 | + without the field to come after documents with the field, | |
345 | + regardless of the requested sort order (asc or desc). | |
346 | + - If sortMissingFirst="true", then a sort on this field will cause documents | |
347 | + without the field to come before documents with the field, | |
348 | + regardless of the requested sort order. | |
349 | + - If sortMissingLast="false" and sortMissingFirst="false" (the default), | |
350 | + then default lucene sorting will be used which places docs without the | |
351 | + field first in an ascending sort and last in a descending sort. | |
352 | + --> | |
353 | + | |
354 | + <!-- | |
355 | + Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. | |
356 | + --> | |
357 | + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> | |
358 | + <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> | |
359 | + <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> | |
360 | + <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> | |
361 | + | |
362 | + <!-- | |
363 | + Numeric field types that index each value at various levels of precision | |
364 | + to accelerate range queries when the number of values between the range | |
365 | + endpoints is large. See the javadoc for NumericRangeQuery for internal | |
366 | + implementation details. | |
367 | + | |
368 | + Smaller precisionStep values (specified in bits) will lead to more tokens | |
369 | + indexed per value, slightly larger index size, and faster range queries. | |
370 | + A precisionStep of 0 disables indexing at different precision levels. | |
371 | + --> | |
372 | + <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> | |
373 | + <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> | |
374 | + <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> | |
375 | + <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> | |
376 | + | |
377 | + <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and | |
378 | + is a more restricted form of the canonical representation of dateTime | |
379 | + http://www.w3.org/TR/xmlschema-2/#dateTime | |
380 | + The trailing "Z" designates UTC time and is mandatory. | |
381 | + Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z | |
382 | + All other components are mandatory. | |
383 | + | |
384 | + Expressions can also be used to denote calculations that should be | |
385 | + performed relative to "NOW" to determine the value, ie... | |
386 | + | |
387 | + NOW/HOUR | |
388 | + ... Round to the start of the current hour | |
389 | + NOW-1DAY | |
390 | + ... Exactly 1 day prior to now | |
391 | + NOW/DAY+6MONTHS+3DAYS | |
392 | + ... 6 months and 3 days in the future from the start of | |
393 | + the current day | |
394 | + | |
395 | + Consult the DateField javadocs for more information. | |
396 | + | |
397 | + Note: For faster range queries, consider the tdate type | |
398 | + --> | |
399 | +<!-- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> | |
400 | + | |
401 | + A Trie based date field for faster date range queries and date faceting. --> | |
402 | + <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> | |
403 | + | |
404 | + | |
405 | + <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> | |
406 | + <fieldtype name="binary" class="solr.BinaryField"/> | |
407 | + | |
408 | + <!-- | |
409 | + Note: | |
410 | + These should only be used for compatibility with existing indexes (created with lucene or older Solr versions). | |
411 | + Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last | |
412 | + | |
413 | + Plain numeric field types that store and index the text | |
414 | + value verbatim (and hence don't correctly support range queries, since the | |
415 | + lexicographic ordering isn't equal to the numeric ordering) | |
416 | + --> | |
417 | + <fieldType name="pint" class="solr.IntField"/> | |
418 | + <fieldType name="plong" class="solr.LongField"/> | |
419 | + <fieldType name="pfloat" class="solr.FloatField"/> | |
420 | + <fieldType name="pdouble" class="solr.DoubleField"/> | |
421 | + <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/> | |
422 | + | |
423 | + <!-- The "RandomSortField" is not used to store or search any | |
424 | + data. You can declare fields of this type it in your schema | |
425 | + to generate pseudo-random orderings of your docs for sorting | |
426 | + or function purposes. The ordering is generated based on the field | |
427 | + name and the version of the index. As long as the index version | |
428 | + remains unchanged, and the same field name is reused, | |
429 | + the ordering of the docs will be consistent. | |
430 | + If you want different psuedo-random orderings of documents, | |
431 | + for the same version of the index, use a dynamicField and | |
432 | + change the field name in the request. | |
433 | + --> | |
434 | + <fieldType name="random" class="solr.RandomSortField" indexed="true" /> | |
435 | + | |
436 | + <!-- solr.TextField allows the specification of custom text analyzers | |
437 | + specified as a tokenizer and a list of token filters. Different | |
438 | + analyzers may be specified for indexing and querying. | |
439 | + | |
440 | + The optional positionIncrementGap puts space between multiple fields of | |
441 | + this type on the same document, with the purpose of preventing false phrase | |
442 | + matching across fields. | |
443 | + | |
444 | + For more info on customizing your analyzer chain, please see | |
445 | + http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters | |
446 | + --> | |
447 | + | |
448 | + <!-- One can also specify an existing Analyzer class that has a | |
449 | + default constructor via the class attribute on the analyzer element. | |
450 | + Example: | |
451 | + <fieldType name="text_greek" class="solr.TextField"> | |
452 | + <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> | |
453 | + </fieldType> | |
454 | + --> | |
455 | + | |
456 | + <!-- A text field that only splits on whitespace for exact matching of words --> | |
457 | + <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |
458 | + <analyzer> | |
459 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
460 | + </analyzer> | |
461 | + </fieldType> | |
462 | + | |
463 | + <!-- A general text field that has reasonable, generic | |
464 | + cross-language defaults: it tokenizes with StandardTokenizer, | |
465 | + removes stop words from case-insensitive "stopwords.txt" | |
466 | + (empty by default), and down cases. At query time only, it | |
467 | + also applies synonyms. --> | |
468 | + <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> | |
469 | + <analyzer type="index"> | |
470 | + | |
471 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
472 | + | |
473 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" /> | |
474 | + <!-- in this example, we will only use synonyms at query time | |
475 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
476 | + --> | |
477 | + <filter class="solr.LowerCaseFilterFactory"/> | |
478 | + | |
479 | + <!-- mairon --> | |
480 | + <filter class="solr.ASCIIFoldingFilterFactory"/> | |
481 | + <!-- mairon --> | |
482 | + | |
483 | + | |
484 | + </analyzer> | |
485 | + | |
486 | + <analyzer type="query"> | |
487 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
488 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" /> | |
489 | + <filter class="solr.LowerCaseFilterFactory"/> | |
490 | + | |
491 | + <!-- mairon --> | |
492 | + <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> --> | |
493 | + <!-- <filter class="solr.ASCIIFoldingFilterFactory"/> --> | |
494 | + <!-- <filter class="solr.BrazilianStemFilterFactory"/> --> | |
495 | + <!-- mairon --> | |
496 | + | |
497 | + </analyzer> | |
498 | + | |
499 | + </fieldType> | |
500 | + | |
501 | + <!-- A text field with defaults appropriate for English: it | |
502 | + tokenizes with StandardTokenizer, removes English stop words | |
503 | + (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and | |
504 | + finally applies Porter's stemming. The query time analyzer | |
505 | + also applies synonyms from synonyms.txt. --> | |
506 | + <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |
507 | + <analyzer type="index"> | |
508 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
509 | + <!-- in this example, we will only use synonyms at query time | |
510 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
511 | + --> | |
512 | + <!-- Case insensitive stop word removal. | |
513 | + add enablePositionIncrements=true in both the index and query | |
514 | + analyzers to leave a 'gap' for more accurate phrase queries. | |
515 | + --> | |
516 | + <filter class="solr.StopFilterFactory" | |
517 | + ignoreCase="true" | |
518 | + words="lang/stopwords_en.txt" | |
519 | + enablePositionIncrements="true" | |
520 | + /> | |
521 | + <filter class="solr.LowerCaseFilterFactory"/> | |
522 | + <filter class="solr.EnglishPossessiveFilterFactory"/> | |
523 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
524 | + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | |
525 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
526 | + --> | |
527 | + <filter class="solr.PorterStemFilterFactory"/> | |
528 | + </analyzer> | |
529 | + <analyzer type="query"> | |
530 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
531 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
532 | + <filter class="solr.StopFilterFactory" | |
533 | + ignoreCase="true" | |
534 | + words="lang/stopwords_en.txt" | |
535 | + enablePositionIncrements="true" | |
536 | + /> | |
537 | + <filter class="solr.LowerCaseFilterFactory"/> | |
538 | + <filter class="solr.EnglishPossessiveFilterFactory"/> | |
539 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
540 | + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | |
541 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
542 | + --> | |
543 | + <filter class="solr.PorterStemFilterFactory"/> | |
544 | + </analyzer> | |
545 | + </fieldType> | |
546 | + | |
547 | + <!-- A text field with defaults appropriate for English, plus | |
548 | + aggressive word-splitting and autophrase features enabled. | |
549 | + This field is just like text_en, except it adds | |
550 | + WordDelimiterFilter to enable splitting and matching of | |
551 | + words on case-change, alpha numeric boundaries, and | |
552 | + non-alphanumeric chars. This means certain compound word | |
553 | + cases will work, for example query "wi fi" will match | |
554 | + document "WiFi" or "wi-fi". | |
555 | + --> | |
556 | + <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
557 | + <analyzer type="index"> | |
558 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
559 | + <!-- in this example, we will only use synonyms at query time | |
560 | + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
561 | + --> | |
562 | + <!-- Case insensitive stop word removal. | |
563 | + add enablePositionIncrements=true in both the index and query | |
564 | + analyzers to leave a 'gap' for more accurate phrase queries. | |
565 | + --> | |
566 | + <filter class="solr.StopFilterFactory" | |
567 | + ignoreCase="true" | |
568 | + words="lang/stopwords_en.txt" | |
569 | + enablePositionIncrements="true" | |
570 | + /> | |
571 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> | |
572 | + <filter class="solr.LowerCaseFilterFactory"/> | |
573 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
574 | + <filter class="solr.PorterStemFilterFactory"/> | |
575 | + | |
576 | + | |
577 | + </analyzer> | |
578 | + <analyzer type="query"> | |
579 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
580 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
581 | + <filter class="solr.StopFilterFactory" | |
582 | + ignoreCase="true" | |
583 | + words="lang/stopwords_en.txt" | |
584 | + enablePositionIncrements="true" | |
585 | + /> | |
586 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | |
587 | + <filter class="solr.LowerCaseFilterFactory"/> | |
588 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
589 | + <filter class="solr.PorterStemFilterFactory"/> | |
590 | + </analyzer> | |
591 | + </fieldType> | |
592 | + | |
593 | + <!-- Less flexible matching, but less false matches. Probably not ideal for product names, | |
594 | + but may be good for SKUs. Can insert dashes in the wrong place and still match. --> | |
595 | + <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
596 | + <analyzer> | |
597 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
598 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | |
599 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> | |
600 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | |
601 | + <filter class="solr.LowerCaseFilterFactory"/> | |
602 | + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
603 | + <filter class="solr.EnglishMinimalStemFilterFactory"/> | |
604 | + <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes | |
605 | + possible with WordDelimiterFilter in conjuncton with stemming. --> | |
606 | + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
607 | + </analyzer> | |
608 | + </fieldType> | |
609 | + | |
610 | + <!-- Just like text_general except it reverses the characters of | |
611 | + each token, to enable more efficient leading wildcard queries. --> | |
612 | + <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |
613 | + <analyzer type="index"> | |
614 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
615 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
616 | + <filter class="solr.LowerCaseFilterFactory"/> | |
617 | + <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" | |
618 | + maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> | |
619 | + </analyzer> | |
620 | + <analyzer type="query"> | |
621 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
622 | + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
623 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
624 | + <filter class="solr.LowerCaseFilterFactory"/> | |
625 | + </analyzer> | |
626 | + </fieldType> | |
627 | + | |
628 | + <!-- charFilter + WhitespaceTokenizer --> | |
629 | + <!-- | |
630 | + <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > | |
631 | + <analyzer> | |
632 | + <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
633 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
634 | + </analyzer> | |
635 | + </fieldType> | |
636 | + --> | |
637 | + | |
638 | + <!-- This is an example of using the KeywordTokenizer along | |
639 | + With various TokenFilterFactories to produce a sortable field | |
640 | + that does not include some properties of the source text | |
641 | + --> | |
642 | + <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
643 | + <analyzer> | |
644 | + <!-- KeywordTokenizer does no actual tokenizing, so the entire | |
645 | + input string is preserved as a single token | |
646 | + --> | |
647 | + <tokenizer class="solr.KeywordTokenizerFactory"/> | |
648 | + <!-- The LowerCase TokenFilter does what you expect, which can be | |
649 | + when you want your sorting to be case insensitive | |
650 | + --> | |
651 | + <filter class="solr.LowerCaseFilterFactory" /> | |
652 | + <!-- The TrimFilter removes any leading or trailing whitespace --> | |
653 | + <filter class="solr.TrimFilterFactory" /> | |
654 | + <!-- The PatternReplaceFilter gives you the flexibility to use | |
655 | + Java Regular expression to replace any sequence of characters | |
656 | + matching a pattern with an arbitrary replacement string, | |
657 | + which may include back references to portions of the original | |
658 | + string matched by the pattern. | |
659 | + | |
660 | + See the Java Regular Expression documentation for more | |
661 | + information on pattern and replacement string syntax. | |
662 | + | |
663 | + http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html | |
664 | + --> | |
665 | + <filter class="solr.PatternReplaceFilterFactory" | |
666 | + pattern="([^a-z])" replacement="" replace="all" | |
667 | + /> | |
668 | + </analyzer> | |
669 | + </fieldType> | |
670 | + | |
671 | + <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" > | |
672 | + <analyzer> | |
673 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
674 | + <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |
675 | + </analyzer> | |
676 | + </fieldtype> | |
677 | + | |
678 | + <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" > | |
679 | + <analyzer> | |
680 | + <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
681 | + <!-- | |
682 | + The DelimitedPayloadTokenFilter can put payloads on tokens... for example, | |
683 | + a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f | |
684 | + Attributes of the DelimitedPayloadTokenFilterFactory : | |
685 | + "delimiter" - a one character delimiter. Default is | (pipe) | |
686 | + "encoder" - how to encode the following value into a playload | |
687 | + float -> org.apache.lucene.analysis.payloads.FloatEncoder, | |
688 | + integer -> o.a.l.a.p.IntegerEncoder | |
689 | + identity -> o.a.l.a.p.IdentityEncoder | |
690 | + Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. | |
691 | + --> | |
692 | + <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> | |
693 | + </analyzer> | |
694 | + </fieldtype> | |
695 | + | |
696 | + <!-- lowercases the entire field value, keeping it as a single token. --> | |
697 | + <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |
698 | + <analyzer> | |
699 | + <tokenizer class="solr.KeywordTokenizerFactory"/> | |
700 | + <filter class="solr.LowerCaseFilterFactory" /> | |
701 | + </analyzer> | |
702 | + </fieldType> | |
703 | + | |
704 | + <!-- | |
705 | + Example of using PathHierarchyTokenizerFactory at index time, so | |
706 | + queries for paths match documents at that path, or in descendent paths | |
707 | + --> | |
708 | + <fieldType name="descendent_path" class="solr.TextField"> | |
709 | + <analyzer type="index"> | |
710 | + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
711 | + </analyzer> | |
712 | + <analyzer type="query"> | |
713 | + <tokenizer class="solr.KeywordTokenizerFactory" /> | |
714 | + </analyzer> | |
715 | + </fieldType> | |
716 | + <!-- | |
717 | + Example of using PathHierarchyTokenizerFactory at query time, so | |
718 | + queries for paths match documents at that path, or in ancestor paths | |
719 | + --> | |
720 | + <fieldType name="ancestor_path" class="solr.TextField"> | |
721 | + <analyzer type="index"> | |
722 | + <tokenizer class="solr.KeywordTokenizerFactory" /> | |
723 | + </analyzer> | |
724 | + <analyzer type="query"> | |
725 | + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
726 | + </analyzer> | |
727 | + </fieldType> | |
728 | + | |
729 | + <!-- since fields of this type are by default not stored or indexed, | |
730 | + any data added to them will be ignored outright. --> | |
731 | + <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> | |
732 | + | |
733 | + <!-- This point type indexes the coordinates as separate fields (subFields) | |
734 | + If subFieldType is defined, it references a type, and a dynamic field | |
735 | + definition is created matching *___<typename>. Alternately, if | |
736 | + subFieldSuffix is defined, that is used to create the subFields. | |
737 | + Example: if subFieldType="double", then the coordinates would be | |
738 | + indexed in fields myloc_0___double,myloc_1___double. | |
739 | + Example: if subFieldSuffix="_d" then the coordinates would be indexed | |
740 | + in fields myloc_0_d,myloc_1_d | |
741 | + The subFields are an implementation detail of the fieldType, and end | |
742 | + users normally should not need to know about them. | |
743 | + --> | |
744 | + <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> | |
745 | + | |
746 | + <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. --> | |
747 | + <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |
748 | + | |
749 | + <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes. | |
750 | + For more information about this and other Spatial fields new to Solr 4, see: | |
751 | + http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 | |
752 | + --> | |
753 | + <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" | |
754 | + geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" /> | |
755 | + | |
756 | + <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType | |
757 | + Parameters: | |
758 | + defaultCurrency: Specifies the default currency if none specified. Defaults to "USD" | |
759 | + precisionStep: Specifies the precisionStep for the TrieLong field used for the amount | |
760 | + providerClass: Lets you plug in other exchange provider backend: | |
761 | + solr.FileExchangeRateProvider is the default and takes one parameter: | |
762 | + currencyConfig: name of an xml file holding exchange rates | |
763 | + solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org: | |
764 | + ratesFileLocation: URL or path to rates JSON file (default latest.json on the web) | |
765 | + refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60) | |
766 | + --> | |
767 | + <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" /> | |
768 | + | |
769 | + | |
770 | + | |
771 | + <!-- some examples for different languages (generally ordered by ISO code) --> | |
772 | + | |
773 | + <!-- Arabic --> | |
774 | + <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> | |
775 | + <analyzer> | |
776 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
777 | + <!-- for any non-arabic --> | |
778 | + <filter class="solr.LowerCaseFilterFactory"/> | |
779 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/> | |
780 | + <!-- normalizes ﻯ to ﻱ, etc --> | |
781 | + <filter class="solr.ArabicNormalizationFilterFactory"/> | |
782 | + <filter class="solr.ArabicStemFilterFactory"/> | |
783 | + </analyzer> | |
784 | + </fieldType> | |
785 | + | |
786 | + <!-- Bulgarian --> | |
787 | + <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> | |
788 | + <analyzer> | |
789 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
790 | + <filter class="solr.LowerCaseFilterFactory"/> | |
791 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/> | |
792 | + <filter class="solr.BulgarianStemFilterFactory"/> | |
793 | + </analyzer> | |
794 | + </fieldType> | |
795 | + | |
796 | + <!-- Catalan --> | |
797 | + <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> | |
798 | + <analyzer> | |
799 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
800 | + <!-- removes l', etc --> | |
801 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> | |
802 | + <filter class="solr.LowerCaseFilterFactory"/> | |
803 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/> | |
804 | + <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> | |
805 | + </analyzer> | |
806 | + </fieldType> | |
807 | + | |
808 | + <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> | |
809 | + <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> | |
810 | + <analyzer> | |
811 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
812 | + <!-- normalize width before bigram, as e.g. half-width dakuten combine --> | |
813 | + <filter class="solr.CJKWidthFilterFactory"/> | |
814 | + <!-- for any non-CJK --> | |
815 | + <filter class="solr.LowerCaseFilterFactory"/> | |
816 | + <filter class="solr.CJKBigramFilterFactory"/> | |
817 | + </analyzer> | |
818 | + </fieldType> | |
819 | + | |
820 | + <!-- Czech --> | |
821 | + <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> | |
822 | + <analyzer> | |
823 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
824 | + <filter class="solr.LowerCaseFilterFactory"/> | |
825 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/> | |
826 | + <filter class="solr.CzechStemFilterFactory"/> | |
827 | + </analyzer> | |
828 | + </fieldType> | |
829 | + | |
830 | + <!-- Danish --> | |
831 | + <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> | |
832 | + <analyzer> | |
833 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
834 | + <filter class="solr.LowerCaseFilterFactory"/> | |
835 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/> | |
836 | + <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> | |
837 | + </analyzer> | |
838 | + </fieldType> | |
839 | + | |
840 | + <!-- German --> | |
841 | + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> | |
842 | + <analyzer> | |
843 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
844 | + <filter class="solr.LowerCaseFilterFactory"/> | |
845 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/> | |
846 | + <filter class="solr.GermanNormalizationFilterFactory"/> | |
847 | + <filter class="solr.GermanLightStemFilterFactory"/> | |
848 | + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> | |
849 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> | |
850 | + </analyzer> | |
851 | + </fieldType> | |
852 | + | |
853 | + <!-- Greek --> | |
854 | + <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> | |
855 | + <analyzer> | |
856 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
857 | + <!-- greek specific lowercase for sigma --> | |
858 | + <filter class="solr.GreekLowerCaseFilterFactory"/> | |
859 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/> | |
860 | + <filter class="solr.GreekStemFilterFactory"/> | |
861 | + </analyzer> | |
862 | + </fieldType> | |
863 | + | |
864 | + <!-- Spanish --> | |
865 | + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> | |
866 | + <analyzer> | |
867 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
868 | + <filter class="solr.LowerCaseFilterFactory"/> | |
869 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/> | |
870 | + <filter class="solr.SpanishLightStemFilterFactory"/> | |
871 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> | |
872 | + </analyzer> | |
873 | + </fieldType> | |
874 | + | |
875 | + <!-- Basque --> | |
876 | + <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> | |
877 | + <analyzer> | |
878 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
879 | + <filter class="solr.LowerCaseFilterFactory"/> | |
880 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/> | |
881 | + <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> | |
882 | + </analyzer> | |
883 | + </fieldType> | |
884 | + | |
885 | + <!-- Persian --> | |
886 | + <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> | |
887 | + <analyzer> | |
888 | + <!-- for ZWNJ --> | |
889 | + <charFilter class="solr.PersianCharFilterFactory"/> | |
890 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
891 | + <filter class="solr.LowerCaseFilterFactory"/> | |
892 | + <filter class="solr.ArabicNormalizationFilterFactory"/> | |
893 | + <filter class="solr.PersianNormalizationFilterFactory"/> | |
894 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/> | |
895 | + </analyzer> | |
896 | + </fieldType> | |
897 | + | |
898 | + <!-- Finnish --> | |
899 | + <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> | |
900 | + <analyzer> | |
901 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
902 | + <filter class="solr.LowerCaseFilterFactory"/> | |
903 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/> | |
904 | + <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> | |
905 | + <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> | |
906 | + </analyzer> | |
907 | + </fieldType> | |
908 | + | |
909 | + <!-- French --> | |
910 | + <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> | |
911 | + <analyzer> | |
912 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
913 | + <!-- removes l', etc --> | |
914 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> | |
915 | + <filter class="solr.LowerCaseFilterFactory"/> | |
916 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/> | |
917 | + <filter class="solr.FrenchLightStemFilterFactory"/> | |
918 | + <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> | |
919 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> | |
920 | + </analyzer> | |
921 | + </fieldType> | |
922 | + | |
923 | + <!-- Irish --> | |
924 | + <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> | |
925 | + <analyzer> | |
926 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
927 | + <!-- removes d', etc --> | |
928 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> | |
929 | + <!-- removes n-, etc. position increments is intentionally false! --> | |
930 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/> | |
931 | + <filter class="solr.IrishLowerCaseFilterFactory"/> | |
932 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/> | |
933 | + <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> | |
934 | + </analyzer> | |
935 | + </fieldType> | |
936 | + | |
937 | + <!-- Galician --> | |
938 | + <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> | |
939 | + <analyzer> | |
940 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
941 | + <filter class="solr.LowerCaseFilterFactory"/> | |
942 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/> | |
943 | + <filter class="solr.GalicianStemFilterFactory"/> | |
944 | + <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> | |
945 | + </analyzer> | |
946 | + </fieldType> | |
947 | + | |
948 | + <!-- Hindi --> | |
949 | + <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> | |
950 | + <analyzer> | |
951 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
952 | + <filter class="solr.LowerCaseFilterFactory"/> | |
953 | + <!-- normalizes unicode representation --> | |
954 | + <filter class="solr.IndicNormalizationFilterFactory"/> | |
955 | + <!-- normalizes variation in spelling --> | |
956 | + <filter class="solr.HindiNormalizationFilterFactory"/> | |
957 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/> | |
958 | + <filter class="solr.HindiStemFilterFactory"/> | |
959 | + </analyzer> | |
960 | + </fieldType> | |
961 | + | |
962 | + <!-- Hungarian --> | |
963 | + <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> | |
964 | + <analyzer> | |
965 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
966 | + <filter class="solr.LowerCaseFilterFactory"/> | |
967 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/> | |
968 | + <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> | |
969 | + <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> | |
970 | + </analyzer> | |
971 | + </fieldType> | |
972 | + | |
973 | + <!-- Armenian --> | |
974 | + <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> | |
975 | + <analyzer> | |
976 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
977 | + <filter class="solr.LowerCaseFilterFactory"/> | |
978 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/> | |
979 | + <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> | |
980 | + </analyzer> | |
981 | + </fieldType> | |
982 | + | |
983 | + <!-- Indonesian --> | |
984 | + <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> | |
985 | + <analyzer> | |
986 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
987 | + <filter class="solr.LowerCaseFilterFactory"/> | |
988 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/> | |
989 | + <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> | |
990 | + <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> | |
991 | + </analyzer> | |
992 | + </fieldType> | |
993 | + | |
994 | + <!-- Italian --> | |
995 | + <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> | |
996 | + <analyzer> | |
997 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
998 | + <!-- removes l', etc --> | |
999 | + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> | |
1000 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1001 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/> | |
1002 | + <filter class="solr.ItalianLightStemFilterFactory"/> | |
1003 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> | |
1004 | + </analyzer> | |
1005 | + </fieldType> | |
1006 | + | |
1007 | + <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming) | |
1008 | + | |
1009 | + NOTE: If you want to optimize search for precision, use default operator AND in your query | |
1010 | + parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use | |
1011 | + OR if you would like to optimize for recall (default). | |
1012 | + --> | |
1013 | + <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> | |
1014 | + <analyzer> | |
1015 | + <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer) | |
1016 | + | |
1017 | + Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic | |
1018 | + is used to segment compounds into its parts and the compound itself is kept as synonym. | |
1019 | + | |
1020 | + Valid values for attribute mode are: | |
1021 | + normal: regular segmentation | |
1022 | + search: segmentation useful for search with synonyms compounds (default) | |
1023 | + extended: same as search mode, but unigrams unknown words (experimental) | |
1024 | + | |
1025 | + For some applications it might be good to use search mode for indexing and normal mode for | |
1026 | + queries to reduce recall and prevent parts of compounds from being matched and highlighted. | |
1027 | + Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query. | |
1028 | + | |
1029 | + Kuromoji also has a convenient user dictionary feature that allows overriding the statistical | |
1030 | + model with your own entries for segmentation, part-of-speech tags and readings without a need | |
1031 | + to specify weights. Notice that user dictionaries have not been subject to extensive testing. | |
1032 | + | |
1033 | + User dictionary attributes are: | |
1034 | + userDictionary: user dictionary filename | |
1035 | + userDictionaryEncoding: user dictionary encoding (default is UTF-8) | |
1036 | + | |
1037 | + See lang/userdict_ja.txt for a sample user dictionary file. | |
1038 | + | |
1039 | + Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them. | |
1040 | + | |
1041 | + See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. | |
1042 | + --> | |
1043 | + <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> | |
1044 | + <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> | |
1045 | + <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> | |
1046 | + <filter class="solr.JapaneseBaseFormFilterFactory"/> | |
1047 | + <!-- Removes tokens with certain part-of-speech tags --> | |
1048 | + <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/> | |
1049 | + <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> | |
1050 | + <filter class="solr.CJKWidthFilterFactory"/> | |
1051 | + <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> | |
1052 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" /> | |
1053 | + <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> | |
1054 | + <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> | |
1055 | + <!-- Lower-cases romaji characters --> | |
1056 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1057 | + </analyzer> | |
1058 | + </fieldType> | |
1059 | + | |
1060 | + <!-- Latvian --> | |
1061 | + <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> | |
1062 | + <analyzer> | |
1063 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1064 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1065 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/> | |
1066 | + <filter class="solr.LatvianStemFilterFactory"/> | |
1067 | + </analyzer> | |
1068 | + </fieldType> | |
1069 | + | |
1070 | + <!-- Dutch --> | |
1071 | + <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> | |
1072 | + <analyzer> | |
1073 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1074 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1075 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/> | |
1076 | + <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> | |
1077 | + <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> | |
1078 | + </analyzer> | |
1079 | + </fieldType> | |
1080 | + | |
1081 | + <!-- Norwegian --> | |
1082 | + <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> | |
1083 | + <analyzer> | |
1084 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1085 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1086 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/> | |
1087 | + <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> | |
1088 | + <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> | |
1089 | + <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> | |
1090 | + </analyzer> | |
1091 | + </fieldType> | |
1092 | + | |
1093 | + <!-- Portuguese --> | |
1094 | + <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | |
1095 | + <analyzer> | |
1096 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1097 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1098 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/> | |
1099 | + | |
1100 | + <!-- mairon --> | |
1101 | + <!-- <filter class="solr.PortugueseLightStemFilterFactory"/> --> | |
1102 | + <filter class="solr.BrazilianStemFilterFactory"/> | |
1103 | + <filter class="solr.ASCIIFoldingFilterFactory"/> | |
1104 | + <!-- mairon --> | |
1105 | + | |
1106 | + <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> | |
1107 | + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> | |
1108 | + <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> | |
1109 | + </analyzer> | |
1110 | + </fieldType> | |
1111 | + | |
1112 | + <!-- Romanian --> | |
1113 | + <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> | |
1114 | + <analyzer> | |
1115 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1116 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1117 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/> | |
1118 | + <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> | |
1119 | + </analyzer> | |
1120 | + </fieldType> | |
1121 | + | |
1122 | + <!-- Russian --> | |
1123 | + <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> | |
1124 | + <analyzer> | |
1125 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1126 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1127 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/> | |
1128 | + <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> | |
1129 | + <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> | |
1130 | + </analyzer> | |
1131 | + </fieldType> | |
1132 | + | |
1133 | + <!-- Swedish --> | |
1134 | + <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |
1135 | + <analyzer> | |
1136 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1137 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1138 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/> | |
1139 | + <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |
1140 | + <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> | |
1141 | + </analyzer> | |
1142 | + </fieldType> | |
1143 | + | |
1144 | + <!-- Thai --> | |
1145 | + <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> | |
1146 | + <analyzer> | |
1147 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1148 | + <filter class="solr.LowerCaseFilterFactory"/> | |
1149 | + <filter class="solr.ThaiWordFilterFactory"/> | |
1150 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/> | |
1151 | + </analyzer> | |
1152 | + </fieldType> | |
1153 | + | |
1154 | + <!-- Turkish --> | |
1155 | + <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> | |
1156 | + <analyzer> | |
1157 | + <tokenizer class="solr.StandardTokenizerFactory"/> | |
1158 | + <filter class="solr.TurkishLowerCaseFilterFactory"/> | |
1159 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/> | |
1160 | + <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> | |
1161 | + </analyzer> | |
1162 | + </fieldType> | |
1163 | + | |
1164 | + </types> | |
1165 | + | |
1166 | + <!-- Similarity is the scoring routine for each document vs. a query. | |
1167 | + A custom Similarity or SimilarityFactory may be specified here, but | |
1168 | + the default is fine for most applications. | |
1169 | + For more info: http://wiki.apache.org/solr/SchemaXml#Similarity | |
1170 | + --> | |
1171 | + <!-- | |
1172 | + <similarity class="com.example.solr.CustomSimilarityFactory"> | |
1173 | + <str name="paramkey">param value</str> | |
1174 | + </similarity> | |
1175 | + --> | |
1176 | + | |
1177 | +</schema> | |
0 | 1178 | \ No newline at end of file | ... | ... |