Commit eae469285098a323fd86383f6fa0f3b4d21c6b19

Authored by guilherme.cantoni
0 parents

Commit inicial do projeto

Dockerfile 0 → 100644
  1 +++ a/Dockerfile
... ... @@ -0,0 +1,111 @@
  1 +###########################################################
  2 +# Dockerfile de construção do container Solr
  3 +# utilizado pelo SEI e pelo SIP
  4 +#
  5 +# Baseado no CentOS 6.X
  6 +############################################################
  7 +
  8 +# Define a imagem de base para o CentOS 6.X
  9 +FROM centos:centos6
  10 +
  11 +# Autor e mantenedor do container
  12 +MAINTAINER MPOG\Guilherme Andrade Del Cantoni
  13 +
  14 +# Atualização do source list do repositório
  15 +RUN yum update -y && yum upgrade -y
  16 +
  17 +################## INICIO DA INSTALACAO ######################
  18 +# Instalação do MySQL seguindo as recomendações descritas na documentação do SEI
  19 +# Ref: http://<WIKI DO PROJETO>#<SESSAO DE INSTALAÇÃO DO MYSQL>
  20 +
  21 +# Instalação do pacote Java JDK
  22 +RUN yum install -y java-1.7.0-openjdk-devel
  23 +
  24 +# Instalação dos utilitários utilizados no provisionamento
  25 +RUN yum install -y wget tar which curl
  26 +
  27 +# Criação de usuário de execução do serviço
  28 +RUN useradd -r -s /bin/bash solr
  29 +
  30 +# Obtenção do pacote de instalação do Solr 4.9
  31 +RUN wget -q -O /opt/solr-4.9.0.tgz http://archive.apache.org/dist/lucene/solr/4.9.0/solr-4.9.0.tgz
  32 +
  33 +# Instalação do Solr
  34 +RUN tar xvz -C /opt -f /opt/solr-4.9.0.tgz && chown -R solr:solr /opt/solr-4.9.0 && rm /opt/solr-4.9.0.tgz
  35 +
  36 +# Configuração dos parâmetros default
  37 +RUN echo "JAVA_OPTIONS='-Dsolr.solr.home=/opt/solr-4.9.0/example/solr $JAVA_OPTIONS'" | tee -a /etc/default/jetty && \
  38 + echo "JETTY_HOME=/opt/solr-4.9.0/example" | tee -a /etc/default/jetty && \
  39 + echo "JETTY_USER=solr" | tee -a /etc/default/jetty
  40 +
  41 +# Configuração do script de inicialização automática
  42 +RUN wget -q -O /etc/init.d/solr http://dev.eclipse.org/svnroot/rt/org.eclipse.jetty/jetty/trunk/jetty-distribution/src/main/resources/bin/jetty.sh
  43 +RUN chmod +x /etc/init.d/solr
  44 +
  45 +# Criação dos diretório de armazenamento de índices
  46 +RUN mkdir /var/opt/sei && \
  47 + mkdir /var/opt/sei/sei-protocolos && ln -s /var/opt/sei/sei-protocolos /opt/solr-4.9.0/example/solr/ && \
  48 + mkdir /var/opt/sei/sei-bases-conhecimento && ln -s /var/opt/sei/sei-bases-conhecimento /opt/solr-4.9.0/example/solr/ && \
  49 + mkdir /var/opt/sei/sei-publicacoes && ln -s /var/opt/sei/sei-publicacoes /opt/solr-4.9.0/example/solr/
  50 +
  51 +# Copia do conjunto de configurações padrão do Solr
  52 +RUN cp -R /opt/solr-4.9.0/example/solr/collection1/conf /var/opt/sei/sei-protocolos/ && \
  53 + cp -R /opt/solr-4.9.0/example/solr/collection1/conf /var/opt/sei/sei-bases-conhecimento/ && \
  54 + cp -R /opt/solr-4.9.0/example/solr/collection1/conf /var/opt/sei/sei-publicacoes/
  55 +
  56 +# Removeção dos arquivos schema.xml e solrconfig.xml das cópias
  57 +RUN rm /var/opt/sei/sei-protocolos/conf/schema.xml && \
  58 + rm /var/opt/sei/sei-bases-conhecimento/conf/schema.xml && \
  59 + rm /var/opt/sei/sei-publicacoes/conf/schema.xml && \
  60 + rm /var/opt/sei/sei-protocolos/conf/solrconfig.xml && \
  61 + rm /var/opt/sei/sei-bases-conhecimento/conf/solrconfig.xml && \
  62 + rm /var/opt/sei/sei-publicacoes/conf/solrconfig.xml
  63 +
  64 +# Configuração dos índices do SEI
  65 +ADD index/sei-protocolos-schema.xml /var/opt/sei/sei-protocolos/conf/sei-protocolos-schema.xml
  66 +ADD index/sei-bases-conhecimento-schema.xml /var/opt/sei/sei-bases-conhecimento/conf/sei-bases-conhecimento-schema.xml
  67 +ADD index/sei-publicacoes-schema.xml /var/opt/sei/sei-publicacoes/conf/sei-publicacoes-schema.xml
  68 +ADD index/sei-protocolos-config.xml /var/opt/sei/sei-protocolos/conf/sei-protocolos-config.xml
  69 +ADD index/sei-bases-conhecimento-config.xml /var/opt/sei/sei-bases-conhecimento/conf/sei-bases-conhecimento-config.xml
  70 +ADD index/sei-publicacoes-config.xml /var/opt/sei/sei-publicacoes/conf/sei-publicacoes-config.xml
  71 +
  72 +# Criação de diretório de conteúdo para os índices
  73 +RUN mkdir /var/opt/sei/sei-protocolos/conteudo && \
  74 + mkdir /var/opt/sei/sei-bases-conhecimento/conteudo && \
  75 + mkdir /var/opt/sei/sei-publicacoes/conteudo
  76 +
  77 +# Configuração dos links a instalação do Solr e o índice Protocolo
  78 +RUN ln -s /opt/solr-4.9.0/contrib/ /var/opt/sei/sei-protocolos/contrib && \
  79 + ln -s /opt/solr-4.9.0/dist/ /var/opt/sei/sei-protocolos/dist && \
  80 + ln -s /opt/solr-4.9.0/example/lib/ /var/opt/sei/sei-protocolos/lib
  81 +
  82 +# Configuração dos links a instalação do Solr e o índice Base de Conhecimento
  83 +RUN ln -s /opt/solr-4.9.0/contrib/ /var/opt/sei/sei-bases-conhecimento/contrib && \
  84 + ln -s /opt/solr-4.9.0/dist/ /var/opt/sei/sei-bases-conhecimento/dist && \
  85 + ln -s /opt/solr-4.9.0/example/lib/ /var/opt/sei/sei-bases-conhecimento/lib
  86 +
  87 +# Configuração dos links a instalação do Solr e o índice Publicações
  88 +RUN ln -s /opt/solr-4.9.0/contrib/ /var/opt/sei/sei-publicacoes/contrib && \
  89 + ln -s /opt/solr-4.9.0/dist/ /var/opt/sei/sei-publicacoes/dist && \
  90 + ln -s /opt/solr-4.9.0/example/lib/ /var/opt/sei/sei-publicacoes/lib
  91 +
  92 +# Configuração de log e permissões de pastas
  93 +RUN mkdir /var/log/solr
  94 +RUN chown solr:solr -R /opt/solr-4.9.0 /var/opt/sei /var/log/solr
  95 +
  96 +# Construção dos índices do SEI
  97 +RUN service solr start && sleep 6 && \
  98 + curl http://localhost/solr/admin/cores?action=CREATE&name=seiprotocolos&instanceDir=/var/opt/sei/sei-protocolos&config=sei-protocolosconfig.xml&schema=sei-protocolos-schema.xml&dataDir=/var/opt/sei/sei-protocolos/conteudo && \
  99 + curl http://localhost/solr/admin/cores?action=CREATE&name=sei-basesconhecimento&instanceDir=/var/opt/sei/sei-bases-conhecimento&config=sei-bases-conhecimentoconfig.xml&schema=sei-bases-conhecimento-schema.xml&dataDir=/var/opt/sei/sei-basesconhecimento/conteudo && \
  100 + curl http://localhost/solr/admin/cores?action=CREATE&name=seipublicacoes&instanceDir=/var/opt/sei/sei-publicacoes&config=sei-publicacoesconfig.xml&schema=sei-publicacoes-schema.xml&dataDir=/var/opt/sei/sei-publicacoes/conteudo
  101 +
  102 +#RUN service solr start && sleep 6 && curl http://localhost/solr/admin/cores?action=RELOAD
  103 +
  104 +##################### FIM DA INSTALACAO #####################
  105 +WORKDIR /opt/solr-4.9.0/example
  106 +
  107 +# Expõe a porta padrão do MySQL Server
  108 +EXPOSE 8983
  109 +
  110 +# Default port to execute the entrypoint (MongoDB)
  111 +CMD ["java", "-jar", "start.jar"]
0 112 \ No newline at end of file
... ...
index/sei-bases-conhecimento-config.xml 0 → 100755
  1 +++ a/index/sei-bases-conhecimento-config.xml
... ... @@ -0,0 +1,1773 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +
  19 +<!--
  20 + For more details about configurations options that may appear in
  21 + this file, see http://wiki.apache.org/solr/SolrConfigXml.
  22 +-->
  23 +<config>
  24 + <!-- In all configuration below, a prefix of "solr." for class names
  25 + is an alias that causes solr to search appropriate packages,
  26 + including org.apache.solr.(search|update|request|core|analysis)
  27 +
  28 + You may also specify a fully qualified Java classname if you
  29 + have your own custom plugins.
  30 + -->
  31 +
  32 + <!-- Controls what version of Lucene various components of Solr
  33 + adhere to. Generally, you want to use the latest version to
  34 + get all bug fixes and improvements. It is highly recommended
  35 + that you fully re-index after changing this setting as it can
  36 + affect both how text is indexed and queried.
  37 + -->
  38 + <luceneMatchVersion>LUCENE_40</luceneMatchVersion>
  39 +
  40 + <!-- <lib/> directives can be used to instruct Solr to load an Jars
  41 + identified and use them to resolve any "plugins" specified in
  42 + your solrconfig.xml or schema.xml (ie: Analyzers, Request
  43 + Handlers, etc...).
  44 +
  45 + All directories and paths are resolved relative to the
  46 + instanceDir.
  47 +
  48 + Please note that <lib/> directives are processed in the order
  49 + that they appear in your solrconfig.xml file, and are "stacked"
  50 + on top of each other when building a ClassLoader - so if you have
  51 + plugin jars with dependencies on other jars, the "lower level"
  52 + dependency jars should be loaded first.
  53 +
  54 + If a "./lib" directory exists in your instanceDir, all files
  55 + found in it are included as if you had used the following
  56 + syntax...
  57 +
  58 +-->
  59 + <lib dir="./lib" />
  60 +
  61 +
  62 + <!-- A 'dir' option by itself adds any files found in the directory
  63 + to the classpath, this is useful for including all jars in a
  64 + directory.
  65 +
  66 + When a 'regex' is specified in addition to a 'dir', only the
  67 + files in that directory which completely match the regex
  68 + (anchored on both ends) will be included.
  69 +
  70 + The examples below can be used to load some solr-contribs along
  71 + with their external dependencies.
  72 + -->
  73 + <lib dir="./contrib/extraction/lib" regex=".*\.jar" />
  74 + <lib dir="./dist/" regex="apache-solr-cell-\d.*\.jar" />
  75 +
  76 + <lib dir="./contrib/clustering/lib/" regex=".*\.jar" />
  77 + <lib dir="./dist/" regex="apache-solr-clustering-\d.*\.jar" />
  78 +
  79 + <lib dir="./contrib/langid/lib/" regex=".*\.jar" />
  80 + <lib dir="./dist/" regex="apache-solr-langid-\d.*\.jar" />
  81 +
  82 + <lib dir="./contrib/velocity/lib" regex=".*\.jar" />
  83 + <lib dir="./dist/" regex="apache-solr-velocity-\d.*\.jar" />
  84 +
  85 + <!-- If a 'dir' option (with or without a regex) is used and nothing
  86 + is found that matches, it will be ignored
  87 + -->
  88 + <lib dir="/total/crap/dir/ignored" />
  89 +
  90 + <!-- an exact 'path' can be used instead of a 'dir' to specify a
  91 + specific jar file. This will cause a serious error to be logged
  92 + if it can't be loaded.
  93 + -->
  94 + <!--
  95 + <lib path="../a-jar-that-does-not-exist.jar" />
  96 + -->
  97 +
  98 + <!-- Data Directory
  99 +
  100 + Used to specify an alternate directory to hold all index data
  101 + other than the default ./data under the Solr home. If
  102 + replication is in use, this should match the replication
  103 + configuration.
  104 + -->
  105 + <dataDir>${solr.data.dir:}</dataDir>
  106 +
  107 +
  108 + <!-- The DirectoryFactory to use for indexes.
  109 +
  110 + solr.StandardDirectoryFactory is filesystem
  111 + based and tries to pick the best implementation for the current
  112 + JVM and platform. solr.NRTCachingDirectoryFactory, the default,
  113 + wraps solr.StandardDirectoryFactory and caches small files in memory
  114 + for better NRT performance.
  115 +
  116 + One can force a particular implementation via solr.MMapDirectoryFactory,
  117 + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
  118 +
  119 + solr.RAMDirectoryFactory is memory based, not
  120 + persistent, and doesn't work with replication.
  121 + -->
  122 + <directoryFactory name="DirectoryFactory"
  123 + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
  124 +
  125 + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  126 + Index Config - These settings control low-level behavior of indexing
  127 + Most example settings here show the default value, but are commented
  128 + out, to more easily see where customizations have been made.
  129 +
  130 + Note: This replaces <indexDefaults> and <mainIndex> from older versions
  131 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
  132 + <indexConfig>
  133 + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
  134 + LimitTokenCountFilterFactory in your fieldType definition. E.g.
  135 + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
  136 + -->
  137 + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
  138 + <!-- <writeLockTimeout>1000</writeLockTimeout> -->
  139 +
  140 + <!-- Expert: Enabling compound file will use less files for the index,
  141 + using fewer file descriptors on the expense of performance decrease.
  142 + Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
  143 + <!-- <useCompoundFile>false</useCompoundFile> -->
  144 +
  145 + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
  146 + indexing for buffering added documents and deletions before they are
  147 + flushed to the Directory.
  148 + maxBufferedDocs sets a limit on the number of documents buffered
  149 + before flushing.
  150 + If both ramBufferSizeMB and maxBufferedDocs is set, then
  151 + Lucene will flush based on whichever limit is hit first. -->
  152 + <!-- <ramBufferSizeMB>32</ramBufferSizeMB> -->
  153 + <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
  154 +
  155 + <!-- Expert: Merge Policy
  156 + The Merge Policy in Lucene controls how merging of segments is done.
  157 + The default since Solr/Lucene 3.3 is TieredMergePolicy.
  158 + The default since Lucene 2.3 was the LogByteSizeMergePolicy,
  159 + Even older versions of Lucene used LogDocMergePolicy.
  160 + -->
  161 + <!--
  162 + <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
  163 + <int name="maxMergeAtOnce">10</int>
  164 + <int name="segmentsPerTier">10</int>
  165 + </mergePolicy>
  166 + -->
  167 +
  168 + <!-- Merge Factor
  169 + The merge factor controls how many segments will get merged at a time.
  170 + For TieredMergePolicy, mergeFactor is a convenience parameter which
  171 + will set both MaxMergeAtOnce and SegmentsPerTier at once.
  172 + For LogByteSizeMergePolicy, mergeFactor decides how many new segments
  173 + will be allowed before they are merged into one.
  174 + Default is 10 for both merge policies.
  175 + -->
  176 + <!--
  177 + <mergeFactor>10</mergeFactor>
  178 + -->
  179 +
  180 + <!-- Expert: Merge Scheduler
  181 + The Merge Scheduler in Lucene controls how merges are
  182 + performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
  183 + can perform merges in the background using separate threads.
  184 + The SerialMergeScheduler (Lucene 2.2 default) does not.
  185 + -->
  186 + <!--
  187 + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
  188 + -->
  189 +
  190 + <!-- LockFactory
  191 +
  192 + This option specifies which Lucene LockFactory implementation
  193 + to use.
  194 +
  195 + single = SingleInstanceLockFactory - suggested for a
  196 + read-only index or when there is no possibility of
  197 + another process trying to modify the index.
  198 + native = NativeFSLockFactory - uses OS native file locking.
  199 + Do not use when multiple solr webapps in the same
  200 + JVM are attempting to share a single index.
  201 + simple = SimpleFSLockFactory - uses a plain file for locking
  202 +
  203 + Defaults: 'native' is default for Solr3.6 and later, otherwise
  204 + 'simple' is the default
  205 +
  206 + More details on the nuances of each LockFactory...
  207 + http://wiki.apache.org/lucene-java/AvailableLockFactories
  208 + -->
  209 + <!-- <lockType>native</lockType> -->
  210 +
  211 + <!-- Unlock On Startup
  212 +
  213 + If true, unlock any held write or commit locks on startup.
  214 + This defeats the locking mechanism that allows multiple
  215 + processes to safely access a lucene index, and should be used
  216 + with care. Default is "false".
  217 +
  218 + This is not needed if lock type is 'none' or 'single'
  219 + -->
  220 + <!--
  221 + <unlockOnStartup>false</unlockOnStartup>
  222 + -->
  223 +
  224 + <!-- Expert: Controls how often Lucene loads terms into memory
  225 + Default is 128 and is likely good for most everyone.
  226 + -->
  227 + <!-- <termIndexInterval>128</termIndexInterval> -->
  228 +
  229 + <!-- If true, IndexReaders will be reopened (often more efficient)
  230 + instead of closed and then opened. Default: true
  231 + -->
  232 + <!--
  233 + <reopenReaders>true</reopenReaders>
  234 + -->
  235 +
  236 + <!-- Commit Deletion Policy
  237 +
  238 + Custom deletion policies can be specified here. The class must
  239 + implement org.apache.lucene.index.IndexDeletionPolicy.
  240 +
  241 + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html
  242 +
  243 + The default Solr IndexDeletionPolicy implementation supports
  244 + deleting index commit points on number of commits, age of
  245 + commit point and optimized status.
  246 +
  247 + The latest commit point should always be preserved regardless
  248 + of the criteria.
  249 + -->
  250 + <!--
  251 + <deletionPolicy class="solr.SolrDeletionPolicy">
  252 + -->
  253 + <!-- The number of commit points to be kept -->
  254 + <!-- <str name="maxCommitsToKeep">1</str> -->
  255 + <!-- The number of optimized commit points to be kept -->
  256 + <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
  257 + <!--
  258 + Delete all commit points once they have reached the given age.
  259 + Supports DateMathParser syntax e.g.
  260 + -->
  261 + <!--
  262 + <str name="maxCommitAge">30MINUTES</str>
  263 + <str name="maxCommitAge">1DAY</str>
  264 + -->
  265 + <!--
  266 + </deletionPolicy>
  267 + -->
  268 +
  269 + <!-- Lucene Infostream
  270 +
  271 + To aid in advanced debugging, Lucene provides an "InfoStream"
  272 + of detailed information when indexing.
  273 +
  274 + Setting The value to true will instruct the underlying Lucene
  275 + IndexWriter to write its debugging info the specified file
  276 + -->
  277 + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
  278 + </indexConfig>
  279 +
  280 +
  281 + <!-- JMX
  282 +
  283 + This example enables JMX if and only if an existing MBeanServer
  284 + is found, use this if you want to configure JMX through JVM
  285 + parameters. Remove this to disable exposing Solr configuration
  286 + and statistics to JMX.
  287 +
  288 + For more details see http://wiki.apache.org/solr/SolrJmx
  289 + -->
  290 + <jmx />
  291 + <!-- If you want to connect to a particular server, specify the
  292 + agentId
  293 + -->
  294 + <!-- <jmx agentId="myAgent" /> -->
  295 + <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
  296 + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
  297 + -->
  298 +
  299 + <!-- The default high-performance update handler -->
  300 + <updateHandler class="solr.DirectUpdateHandler2">
  301 +
  302 + <!-- AutoCommit
  303 +
  304 + Perform a hard commit automatically under certain conditions.
  305 + Instead of enabling autoCommit, consider using "commitWithin"
  306 + when adding documents.
  307 +
  308 + http://wiki.apache.org/solr/UpdateXmlMessages
  309 +
  310 + maxDocs - Maximum number of documents to add since the last
  311 + commit before automatically triggering a new commit.
  312 +
  313 + maxTime - Maximum amount of time in ms that is allowed to pass
  314 + since a document was added before automaticly
  315 + triggering a new commit.
  316 + openSearcher - if false, the commit causes recent index changes
  317 + to be flushed to stable storage, but does not cause a new
  318 + searcher to be opened to make those changes visible.
  319 + -->
  320 + <autoCommit>
  321 + <maxTime>300000</maxTime>
  322 + <openSearcher>false</openSearcher>
  323 + </autoCommit>
  324 +
  325 + <!-- softAutoCommit is like autoCommit except it causes a
  326 + 'soft' commit which only ensures that changes are visible
  327 + but does not ensure that data is synced to disk. This is
  328 + faster and more near-realtime friendly than a hard commit.
  329 + -->
  330 + <!--
  331 + <autoSoftCommit>
  332 + <maxTime>1000</maxTime>
  333 + </autoSoftCommit>
  334 + -->
  335 +
  336 + <!-- Update Related Event Listeners
  337 +
  338 + Various IndexWriter related events can trigger Listeners to
  339 + take actions.
  340 +
  341 + postCommit - fired after every commit or optimize command
  342 + postOptimize - fired after every optimize command
  343 + -->
  344 + <!-- The RunExecutableListener executes an external command from a
  345 + hook such as postCommit or postOptimize.
  346 +
  347 + exe - the name of the executable to run
  348 + dir - dir to use as the current working directory. (default=".")
  349 + wait - the calling thread waits until the executable returns.
  350 + (default="true")
  351 + args - the arguments to pass to the program. (default is none)
  352 + env - environment variables to set. (default is none)
  353 + -->
  354 + <!-- This example shows how RunExecutableListener could be used
  355 + with the script based replication...
  356 + http://wiki.apache.org/solr/CollectionDistribution
  357 + -->
  358 + <!--
  359 + <listener event="postCommit" class="solr.RunExecutableListener">
  360 + <str name="exe">solr/bin/snapshooter</str>
  361 + <str name="dir">.</str>
  362 + <bool name="wait">true</bool>
  363 + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
  364 + <arr name="env"> <str>MYVAR=val1</str> </arr>
  365 + </listener>
  366 + -->
  367 +
  368 + <!-- Enables a transaction log, currently used for real-time get.
  369 + "dir" - the target directory for transaction logs, defaults to the
  370 + solr data directory. -->
  371 + <updateLog>
  372 + <str name="dir">${solr.data.dir:}</str>
  373 + </updateLog>
  374 +
  375 +
  376 + </updateHandler>
  377 +
  378 + <!-- IndexReaderFactory
  379 +
  380 + Use the following format to specify a custom IndexReaderFactory,
  381 + which allows for alternate IndexReader implementations.
  382 +
  383 + ** Experimental Feature **
  384 +
  385 + Please note - Using a custom IndexReaderFactory may prevent
  386 + certain other features from working. The API to
  387 + IndexReaderFactory may change without warning or may even be
  388 + removed from future releases if the problems cannot be
  389 + resolved.
  390 +
  391 +
  392 + ** Features that may not work with custom IndexReaderFactory **
  393 +
  394 + The ReplicationHandler assumes a disk-resident index. Using a
  395 + custom IndexReader implementation may cause incompatibility
  396 + with ReplicationHandler and may cause replication to not work
  397 + correctly. See SOLR-1366 for details.
  398 +
  399 + -->
  400 + <!--
  401 + <indexReaderFactory name="IndexReaderFactory" class="package.class">
  402 + <str name="someArg">Some Value</str>
  403 + </indexReaderFactory >
  404 + -->
  405 + <!-- By explicitly declaring the Factory, the termIndexDivisor can
  406 + be specified.
  407 + -->
  408 + <!--
  409 + <indexReaderFactory name="IndexReaderFactory"
  410 + class="solr.StandardIndexReaderFactory">
  411 + <int name="setTermIndexDivisor">12</int>
  412 + </indexReaderFactory >
  413 + -->
  414 +
  415 + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  416 + Query section - these settings control query time things like caches
  417 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
  418 + <query>
  419 + <!-- Max Boolean Clauses
  420 +
  421 + Maximum number of clauses in each BooleanQuery, an exception
  422 + is thrown if exceeded.
  423 +
  424 + ** WARNING **
  425 +
  426 + This option actually modifies a global Lucene property that
  427 + will affect all SolrCores. If multiple solrconfig.xml files
  428 + disagree on this property, the value at any given moment will
  429 + be based on the last SolrCore to be initialized.
  430 +
  431 + -->
  432 + <maxBooleanClauses>1024</maxBooleanClauses>
  433 +
  434 +
  435 + <!-- Solr Internal Query Caches
  436 +
  437 + There are two implementations of cache available for Solr,
  438 + LRUCache, based on a synchronized LinkedHashMap, and
  439 + FastLRUCache, based on a ConcurrentHashMap.
  440 +
  441 + FastLRUCache has faster gets and slower puts in single
  442 + threaded operation and thus is generally faster than LRUCache
  443 + when the hit ratio of the cache is high (> 75%), and may be
  444 + faster under other scenarios on multi-cpu systems.
  445 + -->
  446 +
  447 + <!-- Filter Cache
  448 +
  449 + Cache used by SolrIndexSearcher for filters (DocSets),
  450 + unordered sets of *all* documents that match a query. When a
  451 + new searcher is opened, its caches may be prepopulated or
  452 + "autowarmed" using data from caches in the old searcher.
  453 + autowarmCount is the number of items to prepopulate. For
  454 + LRUCache, the autowarmed items will be the most recently
  455 + accessed items.
  456 +
  457 + Parameters:
  458 + class - the SolrCache implementation LRUCache or
  459 + (LRUCache or FastLRUCache)
  460 + size - the maximum number of entries in the cache
  461 + initialSize - the initial capacity (number of entries) of
  462 + the cache. (see java.util.HashMap)
  463 + autowarmCount - the number of entries to prepopulate from
  464 + and old cache.
  465 + -->
  466 + <filterCache class="solr.FastLRUCache"
  467 + size="512"
  468 + initialSize="512"
  469 + autowarmCount="0"/>
  470 +
  471 + <!-- Query Result Cache
  472 +
  473 + Caches results of searches - ordered lists of document ids
  474 + (DocList) based on a query, a sort, and the range of documents requested.
  475 + -->
  476 + <queryResultCache class="solr.LRUCache"
  477 + size="512"
  478 + initialSize="512"
  479 + autowarmCount="0"/>
  480 +
  481 + <!-- Document Cache
  482 +
  483 + Caches Lucene Document objects (the stored fields for each
  484 + document). Since Lucene internal document ids are transient,
  485 + this cache will not be autowarmed.
  486 + -->
  487 + <documentCache class="solr.LRUCache"
  488 + size="512"
  489 + initialSize="512"
  490 + autowarmCount="0"/>
  491 +
  492 + <!-- Field Value Cache
  493 +
  494 + Cache used to hold field values that are quickly accessible
  495 + by document id. The fieldValueCache is created by default
  496 + even if not configured here.
  497 + -->
  498 + <!--
  499 + <fieldValueCache class="solr.FastLRUCache"
  500 + size="512"
  501 + autowarmCount="128"
  502 + showItems="32" />
  503 + -->
  504 +
  505 + <!-- Custom Cache
  506 +
  507 + Example of a generic cache. These caches may be accessed by
  508 + name through SolrIndexSearcher.getCache(),cacheLookup(), and
  509 + cacheInsert(). The purpose is to enable easy caching of
  510 + user/application level data. The regenerator argument should
  511 + be specified as an implementation of solr.CacheRegenerator
  512 + if autowarming is desired.
  513 + -->
  514 + <!--
  515 + <cache name="myUserCache"
  516 + class="solr.LRUCache"
  517 + size="4096"
  518 + initialSize="1024"
  519 + autowarmCount="1024"
  520 + regenerator="com.mycompany.MyRegenerator"
  521 + />
  522 + -->
  523 +
  524 +
  525 + <!-- Lazy Field Loading
  526 +
  527 + If true, stored fields that are not requested will be loaded
  528 + lazily. This can result in a significant speed improvement
  529 + if the usual case is to not load all stored fields,
  530 + especially if the skipped fields are large compressed text
  531 + fields.
  532 + -->
  533 + <enableLazyFieldLoading>true</enableLazyFieldLoading>
  534 +
  535 + <!-- Use Filter For Sorted Query
  536 +
  537 + A possible optimization that attempts to use a filter to
  538 + satisfy a search. If the requested sort does not include
  539 + score, then the filterCache will be checked for a filter
  540 + matching the query. If found, the filter will be used as the
  541 + source of document ids, and then the sort will be applied to
  542 + that.
  543 +
  544 + For most situations, this will not be useful unless you
  545 + frequently get the same search repeatedly with different sort
  546 + options, and none of them ever use "score"
  547 + -->
  548 + <!--
  549 + <useFilterForSortedQuery>true</useFilterForSortedQuery>
  550 + -->
  551 +
  552 + <!-- Result Window Size
  553 +
  554 + An optimization for use with the queryResultCache. When a search
  555 + is requested, a superset of the requested number of document ids
  556 + are collected. For example, if a search for a particular query
  557 + requests matching documents 10 through 19, and queryWindowSize is 50,
  558 + then documents 0 through 49 will be collected and cached. Any further
  559 + requests in that range can be satisfied via the cache.
  560 + -->
  561 + <queryResultWindowSize>20</queryResultWindowSize>
  562 +
  563 + <!-- Maximum number of documents to cache for any entry in the
  564 + queryResultCache.
  565 + -->
  566 + <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
  567 +
  568 + <!-- Query Related Event Listeners
  569 +
  570 + Various IndexSearcher related events can trigger Listeners to
  571 + take actions.
  572 +
  573 + newSearcher - fired whenever a new searcher is being prepared
  574 + and there is a current searcher handling requests (aka
  575 + registered). It can be used to prime certain caches to
  576 + prevent long request times for certain requests.
  577 +
  578 + firstSearcher - fired whenever a new searcher is being
  579 + prepared but there is no current registered searcher to handle
  580 + requests or to gain autowarming data from.
  581 +
  582 +
  583 + -->
  584 + <!-- QuerySenderListener takes an array of NamedList and executes a
  585 + local query request for each NamedList in sequence.
  586 + -->
  587 + <listener event="newSearcher" class="solr.QuerySenderListener">
  588 + <arr name="queries">
  589 + <!--
  590 + <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
  591 + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
  592 + -->
  593 + </arr>
  594 + </listener>
  595 + <listener event="firstSearcher" class="solr.QuerySenderListener">
  596 + <arr name="queries">
  597 + <lst>
  598 + <str name="q">static firstSearcher warming in solrconfig.xml</str>
  599 + </lst>
  600 + </arr>
  601 + </listener>
  602 +
  603 + <!-- Use Cold Searcher
  604 +
  605 + If a search request comes in and there is no current
  606 + registered searcher, then immediately register the still
  607 + warming searcher and use it. If "false" then all requests
  608 + will block until the first searcher is done warming.
  609 + -->
  610 + <useColdSearcher>false</useColdSearcher>
  611 +
  612 + <!-- Max Warming Searchers
  613 +
  614 + Maximum number of searchers that may be warming in the
  615 + background concurrently. An error is returned if this limit
  616 + is exceeded.
  617 +
  618 + Recommend values of 1-2 for read-only slaves, higher for
  619 + masters w/o cache warming.
  620 + -->
  621 + <maxWarmingSearchers>2</maxWarmingSearchers>
  622 +
  623 + </query>
  624 +
  625 +
  626 + <!-- Request Dispatcher
  627 +
  628 + This section contains instructions for how the SolrDispatchFilter
  629 + should behave when processing requests for this SolrCore.
  630 +
  631 + handleSelect is a legacy option that affects the behavior of requests
  632 + such as /select?qt=XXX
  633 +
  634 + handleSelect="true" will cause the SolrDispatchFilter to process
  635 + the request and dispatch the query to a handler specified by the
  636 + "qt" param, assuming "/select" isn't already registered.
  637 +
  638 + handleSelect="false" will cause the SolrDispatchFilter to
  639 + ignore "/select" requests, resulting in a 404 unless a handler
  640 + is explicitly registered with the name "/select"
  641 +
  642 + handleSelect="true" is not recommended for new users, but is the default
  643 + for backwards compatibility
  644 + -->
  645 + <requestDispatcher handleSelect="false" >
  646 + <!-- Request Parsing
  647 +
  648 + These settings indicate how Solr Requests may be parsed, and
  649 + what restrictions may be placed on the ContentStreams from
  650 + those requests
  651 +
  652 + enableRemoteStreaming - enables use of the stream.file
  653 + and stream.url parameters for specifying remote streams.
  654 +
  655 + multipartUploadLimitInKB - specifies the max size of
  656 + Multipart File Uploads that Solr will allow in a Request.
  657 +
  658 + *** WARNING ***
  659 + The settings below authorize Solr to fetch remote files, You
  660 + should make sure your system has some authentication before
  661 + using enableRemoteStreaming="true"
  662 +
  663 + -->
  664 + <requestParsers enableRemoteStreaming="true"
  665 + multipartUploadLimitInKB="2048000" />
  666 +
  667 + <!-- HTTP Caching
  668 +
  669 + Set HTTP caching related parameters (for proxy caches and clients).
  670 +
  671 + The options below instruct Solr not to output any HTTP Caching
  672 + related headers
  673 + -->
  674 + <httpCaching never304="true" />
  675 + <!-- If you include a <cacheControl> directive, it will be used to
  676 + generate a Cache-Control header (as well as an Expires header
  677 + if the value contains "max-age=")
  678 +
  679 + By default, no Cache-Control header is generated.
  680 +
  681 + You can use the <cacheControl> option even if you have set
  682 + never304="true"
  683 + -->
  684 + <!--
  685 + <httpCaching never304="true" >
  686 + <cacheControl>max-age=30, public</cacheControl>
  687 + </httpCaching>
  688 + -->
  689 + <!-- To enable Solr to respond with automatically generated HTTP
  690 + Caching headers, and to response to Cache Validation requests
  691 + correctly, set the value of never304="false"
  692 +
  693 + This will cause Solr to generate Last-Modified and ETag
  694 + headers based on the properties of the Index.
  695 +
  696 + The following options can also be specified to affect the
  697 + values of these headers...
  698 +
  699 + lastModFrom - the default value is "openTime" which means the
  700 + Last-Modified value (and validation against If-Modified-Since
  701 + requests) will all be relative to when the current Searcher
  702 + was opened. You can change it to lastModFrom="dirLastMod" if
  703 + you want the value to exactly correspond to when the physical
  704 + index was last modified.
  705 +
  706 + etagSeed="..." is an option you can change to force the ETag
  707 + header (and validation against If-None-Match requests) to be
  708 + different even if the index has not changed (ie: when making
  709 + significant changes to your config file)
  710 +
  711 + (lastModifiedFrom and etagSeed are both ignored if you use
  712 + the never304="true" option)
  713 + -->
  714 + <!--
  715 + <httpCaching lastModifiedFrom="openTime"
  716 + etagSeed="Solr">
  717 + <cacheControl>max-age=30, public</cacheControl>
  718 + </httpCaching>
  719 + -->
  720 + </requestDispatcher>
  721 +
  722 + <!-- Request Handlers
  723 +
  724 + http://wiki.apache.org/solr/SolrRequestHandler
  725 +
  726 + Incoming queries will be dispatched to a specific handler by name
  727 + based on the path specified in the request.
  728 +
  729 + Legacy behavior: If the request path uses "/select" but no Request
  730 + Handler has that name, and if handleSelect="true" has been specified in
  731 + the requestDispatcher, then the Request Handler is dispatched based on
  732 + the qt parameter. Handlers without a leading '/' are accessed this way
  733 + like so: http://host/app/[core/]select?qt=name If no qt is
  734 + given, then the requestHandler that declares default="true" will be
  735 + used or the one named "standard".
  736 +
  737 + If a Request Handler is declared with startup="lazy", then it will
  738 + not be initialized until the first request that uses it.
  739 +
  740 + -->
  741 + <!-- SearchHandler
  742 +
  743 + http://wiki.apache.org/solr/SearchHandler
  744 +
  745 + For processing Search Queries, the primary Request Handler
  746 + provided with Solr is "SearchHandler" It delegates to a sequent
  747 + of SearchComponents (see below) and supports distributed
  748 + queries across multiple shards
  749 + -->
  750 + <requestHandler name="/select" class="solr.SearchHandler">
  751 + <!-- default values for query parameters can be specified, these
  752 + will be overridden by parameters in the request
  753 + -->
  754 + <lst name="defaults">
  755 + <str name="echoParams">explicit</str>
  756 + <int name="rows">10</int>
  757 + <str name="df">text</str>
  758 + </lst>
  759 + <!-- In addition to defaults, "appends" params can be specified
  760 + to identify values which should be appended to the list of
  761 + multi-val params from the query (or the existing "defaults").
  762 + -->
  763 + <!-- In this example, the param "fq=instock:true" would be appended to
  764 + any query time fq params the user may specify, as a mechanism for
  765 + partitioning the index, independent of any user selected filtering
  766 + that may also be desired (perhaps as a result of faceted searching).
  767 +
  768 + NOTE: there is *absolutely* nothing a client can do to prevent these
  769 + "appends" values from being used, so don't use this mechanism
  770 + unless you are sure you always want it.
  771 + -->
  772 + <!--
  773 + <lst name="appends">
  774 + <str name="fq">inStock:true</str>
  775 + </lst>
  776 + -->
  777 + <!-- "invariants" are a way of letting the Solr maintainer lock down
  778 + the options available to Solr clients. Any params values
  779 + specified here are used regardless of what values may be specified
  780 + in either the query, the "defaults", or the "appends" params.
  781 +
  782 + In this example, the facet.field and facet.query params would
  783 + be fixed, limiting the facets clients can use. Faceting is
  784 + not turned on by default - but if the client does specify
  785 + facet=true in the request, these are the only facets they
  786 + will be able to see counts for; regardless of what other
  787 + facet.field or facet.query params they may specify.
  788 +
  789 + NOTE: there is *absolutely* nothing a client can do to prevent these
  790 + "invariants" values from being used, so don't use this mechanism
  791 + unless you are sure you always want it.
  792 + -->
  793 + <!--
  794 + <lst name="invariants">
  795 + <str name="facet.field">cat</str>
  796 + <str name="facet.field">manu_exact</str>
  797 + <str name="facet.query">price:[* TO 500]</str>
  798 + <str name="facet.query">price:[500 TO *]</str>
  799 + </lst>
  800 + -->
  801 + <!-- If the default list of SearchComponents is not desired, that
  802 + list can either be overridden completely, or components can be
  803 + prepended or appended to the default list. (see below)
  804 + -->
  805 + <!--
  806 + <arr name="components">
  807 + <str>nameOfCustomComponent1</str>
  808 + <str>nameOfCustomComponent2</str>
  809 + </arr>
  810 + -->
  811 +
  812 + </requestHandler>
  813 +
  814 + <!-- A request handler that returns indented JSON by default -->
  815 + <requestHandler name="/query" class="solr.SearchHandler">
  816 + <lst name="defaults">
  817 + <str name="echoParams">explicit</str>
  818 + <str name="wt">json</str>
  819 + <str name="indent">true</str>
  820 + <str name="df">text</str>
  821 + </lst>
  822 + </requestHandler>
  823 +
  824 +
  825 + <!-- realtime get handler, guaranteed to return the latest stored fields of
  826 + any document, without the need to commit or open a new searcher. The
  827 + current implementation relies on the updateLog feature being enabled. -->
  828 + <requestHandler name="/get" class="solr.RealTimeGetHandler">
  829 + <lst name="defaults">
  830 + <str name="omitHeader">true</str>
  831 + <str name="wt">json</str>
  832 + <str name="indent">true</str>
  833 + </lst>
  834 + </requestHandler>
  835 +
  836 +
  837 + <!-- A Robust Example
  838 +
  839 + This example SearchHandler declaration shows off usage of the
  840 + SearchHandler with many defaults declared
  841 +
  842 + Note that multiple instances of the same Request Handler
  843 + (SearchHandler) can be registered multiple times with different
  844 + names (and different init parameters)
  845 + -->
  846 + <requestHandler name="/browse" class="solr.SearchHandler">
  847 + <lst name="defaults">
  848 + <str name="echoParams">explicit</str>
  849 +
  850 + <!-- VelocityResponseWriter settings -->
  851 + <str name="wt">velocity</str>
  852 + <str name="v.template">browse</str>
  853 + <str name="v.layout">layout</str>
  854 + <str name="title">Solritas</str>
  855 +
  856 + <!-- Query settings -->
  857 + <str name="defType">edismax</str>
  858 + <str name="qf">
  859 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  860 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
  861 + </str>
  862 + <str name="df">text</str>
  863 + <str name="mm">100%</str>
  864 + <str name="q.alt">*:*</str>
  865 + <str name="rows">10</str>
  866 + <str name="fl">*,score</str>
  867 +
  868 + <str name="mlt.qf">
  869 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  870 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
  871 + </str>
  872 + <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
  873 + <int name="mlt.count">3</int>
  874 +
  875 + <!-- Faceting defaults -->
  876 + <str name="facet">on</str>
  877 + <str name="facet.field">cat</str>
  878 + <str name="facet.field">manu_exact</str>
  879 + <str name="facet.field">content_type</str>
  880 + <str name="facet.field">author_s</str>
  881 + <str name="facet.query">ipod</str>
  882 + <str name="facet.query">GB</str>
  883 + <str name="facet.mincount">1</str>
  884 + <str name="facet.pivot">cat,inStock</str>
  885 + <str name="facet.range.other">after</str>
  886 + <str name="facet.range">price</str>
  887 + <int name="f.price.facet.range.start">0</int>
  888 + <int name="f.price.facet.range.end">600</int>
  889 + <int name="f.price.facet.range.gap">50</int>
  890 + <str name="facet.range">popularity</str>
  891 + <int name="f.popularity.facet.range.start">0</int>
  892 + <int name="f.popularity.facet.range.end">10</int>
  893 + <int name="f.popularity.facet.range.gap">3</int>
  894 + <str name="facet.range">manufacturedate_dt</str>
  895 + <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
  896 + <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
  897 + <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
  898 + <str name="f.manufacturedate_dt.facet.range.other">before</str>
  899 + <str name="f.manufacturedate_dt.facet.range.other">after</str>
  900 +
  901 + <!-- Highlighting defaults -->
  902 + <str name="hl">on</str>
  903 + <str name="hl.fl">content</str>
  904 + <str name="hl.encoder">html</str>
  905 + <str name="hl.simple.pre"><![CDATA[<b>]]></str>
  906 + <str name="hl.simple.post"><![CDATA[</b>]]></str>
  907 + <str name="f.title.hl.fragsize">0</str>
  908 + <str name="f.title.hl.alternateField">title</str>
  909 + <str name="f.name.hl.fragsize">0</str>
  910 + <str name="f.name.hl.alternateField">name</str>
  911 + <str name="f.content.hl.snippets">3</str>
  912 + <str name="f.content.hl.fragsize">1000</str>
  913 + <str name="f.content.hl.alternateField">content</str>
  914 + <str name="f.content.hl.maxAlternateFieldLength">250</str>
  915 +
  916 +
  917 + <!-- Spell checking defaults -->
  918 + <str name="spellcheck">on</str>
  919 + <str name="spellcheck.extendedResults">false</str>
  920 + <str name="spellcheck.count">5</str>
  921 + <str name="spellcheck.alternativeTermCount">2</str>
  922 + <str name="spellcheck.maxResultsForSuggest">5</str>
  923 + <str name="spellcheck.collate">true</str>
  924 + <str name="spellcheck.collateExtendedResults">true</str>
  925 + <str name="spellcheck.maxCollationTries">5</str>
  926 + <str name="spellcheck.maxCollations">3</str>
  927 + </lst>
  928 +
  929 + <!-- append spellchecking to our list of components -->
  930 + <arr name="last-components">
  931 + <str>spellcheck</str>
  932 + </arr>
  933 + </requestHandler>
  934 +
  935 +
  936 + <!-- Update Request Handler.
  937 +
  938 + http://wiki.apache.org/solr/UpdateXmlMessages
  939 +
  940 + The canonical Request Handler for Modifying the Index through
  941 + commands specified using XML, JSON, CSV, or JAVABIN
  942 +
  943 + Note: Since solr1.1 requestHandlers requires a valid content
  944 + type header if posted in the body. For example, curl now
  945 + requires: -H 'Content-type:text/xml; charset=utf-8'
  946 +
  947 + To override the request content type and force a specific
  948 + Content-type, use the request parameter:
  949 + ?update.contentType=text/csv
  950 +
  951 + This handler will pick a response format to match the input
  952 + if the 'wt' parameter is not explicit
  953 + -->
  954 + <requestHandler name="/update" class="solr.UpdateRequestHandler">
  955 + <!-- See below for information on defining
  956 + updateRequestProcessorChains that can be used by name
  957 + on each Update Request
  958 + -->
  959 + <!--
  960 + <lst name="defaults">
  961 + <str name="update.chain">dedupe</str>
  962 + </lst>
  963 + -->
  964 + </requestHandler>
  965 +
  966 + <!-- for back compat with clients using /update/json and /update/csv -->
  967 + <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
  968 + <lst name="defaults">
  969 + <str name="stream.contentType">application/json</str>
  970 + </lst>
  971 + </requestHandler>
  972 + <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
  973 + <lst name="defaults">
  974 + <str name="stream.contentType">application/csv</str>
  975 + </lst>
  976 + </requestHandler>
  977 +
  978 + <!-- Solr Cell Update Request Handler
  979 +
  980 + http://wiki.apache.org/solr/ExtractingRequestHandler
  981 +
  982 + -->
  983 + <requestHandler name="/update/extract"
  984 + startup="lazy"
  985 + class="solr.extraction.ExtractingRequestHandler" >
  986 + <lst name="defaults">
  987 + <str name="lowernames">true</str>
  988 + <str name="uprefix">ignored_</str>
  989 +
  990 + <!-- capture link hrefs but ignore div attributes -->
  991 + <str name="captureAttr">true</str>
  992 + <str name="fmap.a">links</str>
  993 + <str name="fmap.div">ignored_</str>
  994 + </lst>
  995 + </requestHandler>
  996 +
  997 +
  998 + <!-- Field Analysis Request Handler
  999 +
  1000 + RequestHandler that provides much the same functionality as
  1001 + analysis.jsp. Provides the ability to specify multiple field
  1002 + types and field names in the same request and outputs
  1003 + index-time and query-time analysis for each of them.
  1004 +
  1005 + Request parameters are:
  1006 + analysis.fieldname - field name whose analyzers are to be used
  1007 +
  1008 + analysis.fieldtype - field type whose analyzers are to be used
  1009 + analysis.fieldvalue - text for index-time analysis
  1010 + q (or analysis.q) - text for query time analysis
  1011 + analysis.showmatch (true|false) - When set to true and when
  1012 + query analysis is performed, the produced tokens of the
  1013 + field value analysis will be marked as "matched" for every
  1014 + token that is produces by the query analysis
  1015 + -->
  1016 + <requestHandler name="/analysis/field"
  1017 + startup="lazy"
  1018 + class="solr.FieldAnalysisRequestHandler" />
  1019 +
  1020 +
  1021 + <!-- Document Analysis Handler
  1022 +
  1023 + http://wiki.apache.org/solr/AnalysisRequestHandler
  1024 +
  1025 + An analysis handler that provides a breakdown of the analysis
  1026 + process of provided documents. This handler expects a (single)
  1027 + content stream with the following format:
  1028 +
  1029 + <docs>
  1030 + <doc>
  1031 + <field name="id">1</field>
  1032 + <field name="name">The Name</field>
  1033 + <field name="text">The Text Value</field>
  1034 + </doc>
  1035 + <doc>...</doc>
  1036 + <doc>...</doc>
  1037 + ...
  1038 + </docs>
  1039 +
  1040 + Note: Each document must contain a field which serves as the
  1041 + unique key. This key is used in the returned response to associate
  1042 + an analysis breakdown to the analyzed document.
  1043 +
  1044 + Like the FieldAnalysisRequestHandler, this handler also supports
  1045 + query analysis by sending either an "analysis.query" or "q"
  1046 + request parameter that holds the query text to be analyzed. It
  1047 + also supports the "analysis.showmatch" parameter which when set to
  1048 + true, all field tokens that match the query tokens will be marked
  1049 + as a "match".
  1050 + -->
  1051 + <requestHandler name="/analysis/document"
  1052 + class="solr.DocumentAnalysisRequestHandler"
  1053 + startup="lazy" />
  1054 +
  1055 + <!-- Admin Handlers
  1056 +
  1057 + Admin Handlers - This will register all the standard admin
  1058 + RequestHandlers.
  1059 + -->
  1060 + <requestHandler name="/admin/"
  1061 + class="solr.admin.AdminHandlers" />
  1062 + <!-- This single handler is equivalent to the following... -->
  1063 + <!--
  1064 + <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />
  1065 + <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />
  1066 + <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />
  1067 + <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />
  1068 + <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
  1069 + <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >
  1070 + -->
  1071 + <!-- If you wish to hide files under ${solr.home}/conf, explicitly
  1072 + register the ShowFileRequestHandler using:
  1073 + -->
  1074 + <!--
  1075 + <requestHandler name="/admin/file"
  1076 + class="solr.admin.ShowFileRequestHandler" >
  1077 + <lst name="invariants">
  1078 + <str name="hidden">synonyms.txt</str>
  1079 + <str name="hidden">anotherfile.txt</str>
  1080 + </lst>
  1081 + </requestHandler>
  1082 + -->
  1083 +
  1084 + <!-- ping/healthcheck -->
  1085 + <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
  1086 + <lst name="invariants">
  1087 + <str name="q">solrpingquery</str>
  1088 + </lst>
  1089 + <lst name="defaults">
  1090 + <str name="echoParams">all</str>
  1091 + </lst>
  1092 + <!-- An optional feature of the PingRequestHandler is to configure the
  1093 + handler with a "healthcheckFile" which can be used to enable/disable
  1094 + the PingRequestHandler.
  1095 + relative paths are resolved against the data dir
  1096 + -->
  1097 + <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
  1098 + </requestHandler>
  1099 +
  1100 + <!-- Echo the request contents back to the client -->
  1101 + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
  1102 + <lst name="defaults">
  1103 + <str name="echoParams">explicit</str>
  1104 + <str name="echoHandler">true</str>
  1105 + </lst>
  1106 + </requestHandler>
  1107 +
  1108 + <!-- Solr Replication
  1109 +
  1110 + The SolrReplicationHandler supports replicating indexes from a
  1111 + "master" used for indexing and "slaves" used for queries.
  1112 +
  1113 + http://wiki.apache.org/solr/SolrReplication
  1114 +
  1115 + It is also neccessary for SolrCloud to function (in Cloud mode, the
  1116 + replication handler is used to bulk transfer segments when nodes
  1117 + are added or need to recover).
  1118 +
  1119 + https://wiki.apache.org/solr/SolrCloud/
  1120 + -->
  1121 + <requestHandler name="/replication" class="solr.ReplicationHandler" >
  1122 + <!--
  1123 + To enable simple master/slave replication, uncomment one of the
  1124 + sections below, depending on wether this solr instance should be
  1125 + the "master" or a "slave". If this instance is a "slave" you will
  1126 + also need to fill in the masterUrl to point to a real machine.
  1127 + -->
  1128 + <!--
  1129 + <lst name="master">
  1130 + <str name="replicateAfter">commit</str>
  1131 + <str name="replicateAfter">startup</str>
  1132 + <str name="confFiles">schema.xml,stopwords.txt</str>
  1133 + </lst>
  1134 + -->
  1135 + <!--
  1136 + <lst name="slave">
  1137 + <str name="masterUrl">http://your-master-hostname:8983/solr</str>
  1138 + <str name="pollInterval">00:00:60</str>
  1139 + </lst>
  1140 + -->
  1141 + </requestHandler>
  1142 +
  1143 + <!-- Search Components
  1144 +
  1145 + Search components are registered to SolrCore and used by
  1146 + instances of SearchHandler (which can access them by name)
  1147 +
  1148 + By default, the following components are available:
  1149 +
  1150 + <searchComponent name="query" class="solr.QueryComponent" />
  1151 + <searchComponent name="facet" class="solr.FacetComponent" />
  1152 + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
  1153 + <searchComponent name="highlight" class="solr.HighlightComponent" />
  1154 + <searchComponent name="stats" class="solr.StatsComponent" />
  1155 + <searchComponent name="debug" class="solr.DebugComponent" />
  1156 +
  1157 + Default configuration in a requestHandler would look like:
  1158 +
  1159 + <arr name="components">
  1160 + <str>query</str>
  1161 + <str>facet</str>
  1162 + <str>mlt</str>
  1163 + <str>highlight</str>
  1164 + <str>stats</str>
  1165 + <str>debug</str>
  1166 + </arr>
  1167 +
  1168 + If you register a searchComponent to one of the standard names,
  1169 + that will be used instead of the default.
  1170 +
  1171 + To insert components before or after the 'standard' components, use:
  1172 +
  1173 + <arr name="first-components">
  1174 + <str>myFirstComponentName</str>
  1175 + </arr>
  1176 +
  1177 + <arr name="last-components">
  1178 + <str>myLastComponentName</str>
  1179 + </arr>
  1180 +
  1181 + NOTE: The component registered with the name "debug" will
  1182 + always be executed after the "last-components"
  1183 +
  1184 + -->
  1185 +
  1186 + <!-- Spell Check
  1187 +
  1188 + The spell check component can return a list of alternative spelling
  1189 + suggestions.
  1190 +
  1191 + http://wiki.apache.org/solr/SpellCheckComponent
  1192 + -->
  1193 + <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
  1194 +
  1195 + <str name="queryAnalyzerFieldType">textSpell</str>
  1196 +
  1197 + <!-- Multiple "Spell Checkers" can be declared and used by this
  1198 + component
  1199 + -->
  1200 +
  1201 + <!-- a spellchecker built from a field of the main index -->
  1202 + <lst name="spellchecker">
  1203 + <str name="name">default</str>
  1204 + <str name="field">name</str>
  1205 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1206 + <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
  1207 + <str name="distanceMeasure">internal</str>
  1208 + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
  1209 + <float name="accuracy">0.5</float>
  1210 + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
  1211 + <int name="maxEdits">2</int>
  1212 + <!-- the minimum shared prefix when enumerating terms -->
  1213 + <int name="minPrefix">1</int>
  1214 + <!-- maximum number of inspections per result. -->
  1215 + <int name="maxInspections">5</int>
  1216 + <!-- minimum length of a query term to be considered for correction -->
  1217 + <int name="minQueryLength">4</int>
  1218 + <!-- maximum threshold of documents a query term can appear to be considered for correction -->
  1219 + <float name="maxQueryFrequency">0.01</float>
  1220 + <!-- uncomment this to require suggestions to occur in 1% of the documents
  1221 + <float name="thresholdTokenFrequency">.01</float>
  1222 + -->
  1223 + </lst>
  1224 +
  1225 + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
  1226 + <lst name="spellchecker">
  1227 + <str name="name">wordbreak</str>
  1228 + <str name="classname">solr.WordBreakSolrSpellChecker</str>
  1229 + <str name="field">name</str>
  1230 + <str name="combineWords">true</str>
  1231 + <str name="breakWords">true</str>
  1232 + <int name="maxChanges">10</int>
  1233 + </lst>
  1234 +
  1235 + <!-- a spellchecker that uses a different distance measure -->
  1236 + <!--
  1237 + <lst name="spellchecker">
  1238 + <str name="name">jarowinkler</str>
  1239 + <str name="field">spell</str>
  1240 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1241 + <str name="distanceMeasure">
  1242 + org.apache.lucene.search.spell.JaroWinklerDistance
  1243 + </str>
  1244 + </lst>
  1245 + -->
  1246 +
  1247 + <!-- a spellchecker that use an alternate comparator
  1248 +
  1249 + comparatorClass be one of:
  1250 + 1. score (default)
  1251 + 2. freq (Frequency first, then score)
  1252 + 3. A fully qualified class name
  1253 + -->
  1254 + <!--
  1255 + <lst name="spellchecker">
  1256 + <str name="name">freq</str>
  1257 + <str name="field">lowerfilt</str>
  1258 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1259 + <str name="comparatorClass">freq</str>
  1260 + -->
  1261 +
  1262 + <!-- A spellchecker that reads the list of words from a file -->
  1263 + <!--
  1264 + <lst name="spellchecker">
  1265 + <str name="classname">solr.FileBasedSpellChecker</str>
  1266 + <str name="name">file</str>
  1267 + <str name="sourceLocation">spellings.txt</str>
  1268 + <str name="characterEncoding">UTF-8</str>
  1269 + <str name="spellcheckIndexDir">spellcheckerFile</str>
  1270 + </lst>
  1271 + -->
  1272 + </searchComponent>
  1273 +
  1274 + <!-- A request handler for demonstrating the spellcheck component.
  1275 +
  1276 + NOTE: This is purely as an example. The whole purpose of the
  1277 + SpellCheckComponent is to hook it into the request handler that
  1278 + handles your normal user queries so that a separate request is
  1279 + not needed to get suggestions.
  1280 +
  1281 + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
  1282 + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
  1283 +
  1284 + See http://wiki.apache.org/solr/SpellCheckComponent for details
  1285 + on the request parameters.
  1286 + -->
  1287 + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
  1288 + <lst name="defaults">
  1289 + <str name="df">text</str>
  1290 + <!-- Solr will use suggestions from both the 'default' spellchecker
  1291 + and from the 'wordbreak' spellchecker and combine them.
  1292 + collations (re-written queries) can include a combination of
  1293 + corrections from both spellcheckers -->
  1294 + <str name="spellcheck.dictionary">default</str>
  1295 + <str name="spellcheck.dictionary">wordbreak</str>
  1296 + <str name="spellcheck">on</str>
  1297 + <str name="spellcheck.extendedResults">true</str>
  1298 + <str name="spellcheck.count">10</str>
  1299 + <str name="spellcheck.alternativeTermCount">5</str>
  1300 + <str name="spellcheck.maxResultsForSuggest">5</str>
  1301 + <str name="spellcheck.collate">true</str>
  1302 + <str name="spellcheck.collateExtendedResults">true</str>
  1303 + <str name="spellcheck.maxCollationTries">10</str>
  1304 + <str name="spellcheck.maxCollations">5</str>
  1305 + </lst>
  1306 + <arr name="last-components">
  1307 + <str>spellcheck</str>
  1308 + </arr>
  1309 + </requestHandler>
  1310 +
  1311 + <!-- Term Vector Component
  1312 +
  1313 + http://wiki.apache.org/solr/TermVectorComponent
  1314 + -->
  1315 + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
  1316 +
  1317 + <!-- A request handler for demonstrating the term vector component
  1318 +
  1319 + This is purely as an example.
  1320 +
  1321 + In reality you will likely want to add the component to your
  1322 + already specified request handlers.
  1323 + -->
  1324 + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
  1325 + <lst name="defaults">
  1326 + <str name="df">text</str>
  1327 + <bool name="tv">true</bool>
  1328 + </lst>
  1329 + <arr name="last-components">
  1330 + <str>tvComponent</str>
  1331 + </arr>
  1332 + </requestHandler>
  1333 +
  1334 + <!-- Clustering Component
  1335 +
  1336 + http://wiki.apache.org/solr/ClusteringComponent
  1337 +
  1338 + You'll need to set the solr.cluster.enabled system property
  1339 + when running solr to run with clustering enabled:
  1340 +
  1341 + java -Dsolr.clustering.enabled=true -jar start.jar
  1342 +
  1343 + -->
  1344 + <searchComponent name="clustering"
  1345 + enable="${solr.clustering.enabled:false}"
  1346 + class="solr.clustering.ClusteringComponent" >
  1347 + <!-- Declare an engine -->
  1348 + <lst name="engine">
  1349 + <!-- The name, only one can be named "default" -->
  1350 + <str name="name">default</str>
  1351 +
  1352 + <!-- Class name of Carrot2 clustering algorithm.
  1353 +
  1354 + Currently available algorithms are:
  1355 +
  1356 + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
  1357 + * org.carrot2.clustering.stc.STCClusteringAlgorithm
  1358 + * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
  1359 +
  1360 + See http://project.carrot2.org/algorithms.html for the
  1361 + algorithm's characteristics.
  1362 + -->
  1363 + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
  1364 +
  1365 + <!-- Overriding values for Carrot2 default algorithm attributes.
  1366 +
  1367 + For a description of all available attributes, see:
  1368 + http://download.carrot2.org/stable/manual/#chapter.components.
  1369 + Use attribute key as name attribute of str elements
  1370 + below. These can be further overridden for individual
  1371 + requests by specifying attribute key as request parameter
  1372 + name and attribute value as parameter value.
  1373 + -->
  1374 + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
  1375 +
  1376 + <!-- Location of Carrot2 lexical resources.
  1377 +
  1378 + A directory from which to load Carrot2-specific stop words
  1379 + and stop labels. Absolute or relative to Solr config directory.
  1380 + If a specific resource (e.g. stopwords.en) is present in the
  1381 + specified dir, it will completely override the corresponding
  1382 + default one that ships with Carrot2.
  1383 +
  1384 + For an overview of Carrot2 lexical resources, see:
  1385 + http://download.carrot2.org/head/manual/#chapter.lexical-resources
  1386 + -->
  1387 + <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
  1388 +
  1389 + <!-- The language to assume for the documents.
  1390 +
  1391 + For a list of allowed values, see:
  1392 + http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
  1393 + -->
  1394 + <str name="MultilingualClustering.defaultLanguage">PORTUGUESE</str>
  1395 + </lst>
  1396 + <lst name="engine">
  1397 + <str name="name">stc</str>
  1398 + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
  1399 + </lst>
  1400 + </searchComponent>
  1401 +
  1402 + <!-- A request handler for demonstrating the clustering component
  1403 +
  1404 + This is purely as an example.
  1405 +
  1406 + In reality you will likely want to add the component to your
  1407 + already specified request handlers.
  1408 + -->
  1409 + <requestHandler name="/clustering"
  1410 + startup="lazy"
  1411 + enable="${solr.clustering.enabled:false}"
  1412 + class="solr.SearchHandler">
  1413 + <lst name="defaults">
  1414 + <bool name="clustering">true</bool>
  1415 + <str name="clustering.engine">default</str>
  1416 + <bool name="clustering.results">true</bool>
  1417 + <!-- The title field -->
  1418 + <str name="carrot.title">name</str>
  1419 + <str name="carrot.url">id</str>
  1420 + <!-- The field to cluster on -->
  1421 + <str name="carrot.snippet">features</str>
  1422 + <!-- produce summaries -->
  1423 + <bool name="carrot.produceSummary">true</bool>
  1424 + <!-- the maximum number of labels per cluster -->
  1425 + <!--<int name="carrot.numDescriptions">5</int>-->
  1426 + <!-- produce sub clusters -->
  1427 + <bool name="carrot.outputSubClusters">false</bool>
  1428 +
  1429 + <str name="defType">edismax</str>
  1430 + <str name="qf">
  1431 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  1432 + </str>
  1433 + <str name="q.alt">*:*</str>
  1434 + <str name="rows">10</str>
  1435 + <str name="fl">*,score</str>
  1436 + </lst>
  1437 + <arr name="last-components">
  1438 + <str>clustering</str>
  1439 + </arr>
  1440 + </requestHandler>
  1441 +
  1442 + <!-- Terms Component
  1443 +
  1444 + http://wiki.apache.org/solr/TermsComponent
  1445 +
  1446 + A component to return terms and document frequency of those
  1447 + terms
  1448 + -->
  1449 + <searchComponent name="terms" class="solr.TermsComponent"/>
  1450 +
  1451 + <!-- A request handler for demonstrating the terms component -->
  1452 + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
  1453 + <lst name="defaults">
  1454 + <bool name="terms">true</bool>
  1455 + <bool name="distrib">false</bool>
  1456 + </lst>
  1457 + <arr name="components">
  1458 + <str>terms</str>
  1459 + </arr>
  1460 + </requestHandler>
  1461 +
  1462 +
  1463 + <!-- Query Elevation Component
  1464 +
  1465 + http://wiki.apache.org/solr/QueryElevationComponent
  1466 +
  1467 + a search component that enables you to configure the top
  1468 + results for a given query regardless of the normal lucene
  1469 + scoring.
  1470 + -->
  1471 + <searchComponent name="elevator" class="solr.QueryElevationComponent" >
  1472 + <!-- pick a fieldType to analyze queries -->
  1473 + <str name="queryFieldType">string</str>
  1474 + <str name="config-file">elevate.xml</str>
  1475 + </searchComponent>
  1476 +
  1477 + <!-- A request handler for demonstrating the elevator component -->
  1478 + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
  1479 + <lst name="defaults">
  1480 + <str name="echoParams">explicit</str>
  1481 + <str name="df">text</str>
  1482 + </lst>
  1483 + <arr name="last-components">
  1484 + <str>elevator</str>
  1485 + </arr>
  1486 + </requestHandler>
  1487 +
  1488 + <!-- Highlighting Component
  1489 +
  1490 + http://wiki.apache.org/solr/HighlightingParameters
  1491 + -->
  1492 + <searchComponent class="solr.HighlightComponent" name="highlight">
  1493 + <highlighting>
  1494 + <!-- Configure the standard fragmenter -->
  1495 + <!-- This could most likely be commented out in the "default" case -->
  1496 + <fragmenter name="gap"
  1497 + default="true"
  1498 + class="solr.highlight.GapFragmenter">
  1499 + <lst name="defaults">
  1500 + <int name="hl.fragsize">250</int>
  1501 + </lst>
  1502 + </fragmenter>
  1503 +
  1504 + <!-- A regular-expression-based fragmenter
  1505 + (for sentence extraction)
  1506 + -->
  1507 + <fragmenter name="regex"
  1508 + class="solr.highlight.RegexFragmenter">
  1509 + <lst name="defaults">
  1510 + <!-- slightly smaller fragsizes work better because of slop -->
  1511 + <int name="hl.fragsize">250</int>
  1512 + <!-- allow 50% slop on fragment sizes -->
  1513 + <float name="hl.regex.slop">0.5</float>
  1514 + <!-- a basic sentence pattern -->
  1515 + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
  1516 + </lst>
  1517 + </fragmenter>
  1518 +
  1519 + <!-- Configure the standard formatter -->
  1520 + <formatter name="html"
  1521 + default="true"
  1522 + class="solr.highlight.HtmlFormatter">
  1523 + <lst name="defaults">
  1524 + <str name="hl.simple.pre"><![CDATA[<b>]]></str>
  1525 + <str name="hl.simple.post"><![CDATA[</b>]]></str>
  1526 + </lst>
  1527 + </formatter>
  1528 +
  1529 + <!-- Configure the standard encoder -->
  1530 + <encoder name="html"
  1531 + class="solr.highlight.HtmlEncoder" />
  1532 +
  1533 + <!-- Configure the standard fragListBuilder -->
  1534 + <fragListBuilder name="simple"
  1535 + class="solr.highlight.SimpleFragListBuilder"/>
  1536 +
  1537 + <!-- Configure the single fragListBuilder -->
  1538 + <fragListBuilder name="single"
  1539 + class="solr.highlight.SingleFragListBuilder"/>
  1540 +
  1541 + <!-- Configure the weighted fragListBuilder -->
  1542 + <fragListBuilder name="weighted"
  1543 + default="true"
  1544 + class="solr.highlight.WeightedFragListBuilder"/>
  1545 +
  1546 + <!-- default tag FragmentsBuilder -->
  1547 + <fragmentsBuilder name="default"
  1548 + default="true"
  1549 + class="solr.highlight.ScoreOrderFragmentsBuilder">
  1550 + <!--
  1551 + <lst name="defaults">
  1552 + <str name="hl.multiValuedSeparatorChar">/</str>
  1553 + </lst>
  1554 + -->
  1555 + </fragmentsBuilder>
  1556 +
  1557 + <!-- multi-colored tag FragmentsBuilder -->
  1558 + <fragmentsBuilder name="colored"
  1559 + class="solr.highlight.ScoreOrderFragmentsBuilder">
  1560 + <lst name="defaults">
  1561 + <str name="hl.tag.pre"><![CDATA[
  1562 + <b style="background:yellow">,<b style="background:lawgreen">,
  1563 + <b style="background:aquamarine">,<b style="background:magenta">,
  1564 + <b style="background:palegreen">,<b style="background:coral">,
  1565 + <b style="background:wheat">,<b style="background:khaki">,
  1566 + <b style="background:lime">,<b style="background:deepskyblue">]]></str>
  1567 + <str name="hl.tag.post"><![CDATA[</b>]]></str>
  1568 + </lst>
  1569 + </fragmentsBuilder>
  1570 +
  1571 + <boundaryScanner name="default"
  1572 + default="true"
  1573 + class="solr.highlight.SimpleBoundaryScanner">
  1574 + <lst name="defaults">
  1575 + <str name="hl.bs.maxScan">10</str>
  1576 + <str name="hl.bs.chars">.,!?
  1577 +
  1578 +</str>
  1579 + </lst>
  1580 + </boundaryScanner>
  1581 +
  1582 + <boundaryScanner name="breakIterator"
  1583 + class="solr.highlight.BreakIteratorBoundaryScanner">
  1584 + <lst name="defaults">
  1585 + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
  1586 + <str name="hl.bs.type">WORD</str>
  1587 + <!-- language and country are used when constructing Locale object. -->
  1588 + <!-- And the Locale object will be used when getting instance of BreakIterator -->
  1589 + <str name="hl.bs.language">pt</str>
  1590 + <str name="hl.bs.country">BR</str>
  1591 + </lst>
  1592 + </boundaryScanner>
  1593 + </highlighting>
  1594 + </searchComponent>
  1595 +
  1596 + <!-- Update Processors
  1597 +
  1598 + Chains of Update Processor Factories for dealing with Update
  1599 + Requests can be declared, and then used by name in Update
  1600 + Request Processors
  1601 +
  1602 + http://wiki.apache.org/solr/UpdateRequestProcessor
  1603 +
  1604 + -->
  1605 + <!-- Deduplication
  1606 +
  1607 + An example dedup update processor that creates the "id" field
  1608 + on the fly based on the hash code of some other fields. This
  1609 + example has overwriteDupes set to false since we are using the
  1610 + id field as the signatureField and Solr will maintain
  1611 + uniqueness based on that anyway.
  1612 +
  1613 + -->
  1614 + <!--
  1615 + <updateRequestProcessorChain name="dedupe">
  1616 + <processor class="solr.processor.SignatureUpdateProcessorFactory">
  1617 + <bool name="enabled">true</bool>
  1618 + <str name="signatureField">id</str>
  1619 + <bool name="overwriteDupes">false</bool>
  1620 + <str name="fields">name,features,cat</str>
  1621 + <str name="signatureClass">solr.processor.Lookup3Signature</str>
  1622 + </processor>
  1623 + <processor class="solr.LogUpdateProcessorFactory" />
  1624 + <processor class="solr.RunUpdateProcessorFactory" />
  1625 + </updateRequestProcessorChain>
  1626 + -->
  1627 +
  1628 + <!-- Language identification
  1629 +
  1630 + This example update chain identifies the language of the incoming
  1631 + documents using the langid contrib. The detected language is
  1632 + written to field language_s. No field name mapping is done.
  1633 + The fields used for detection are text, title, subject and description,
  1634 + making this example suitable for detecting languages form full-text
  1635 + rich documents injected via ExtractingRequestHandler.
  1636 + See more about langId at http://wiki.apache.org/solr/LanguageDetection
  1637 + -->
  1638 + <!--
  1639 + <updateRequestProcessorChain name="langid">
  1640 + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
  1641 + <str name="langid.fl">text,title,subject,description</str>
  1642 + <str name="langid.langField">language_s</str>
  1643 + <str name="langid.fallback">en</str>
  1644 + </processor>
  1645 + <processor class="solr.LogUpdateProcessorFactory" />
  1646 + <processor class="solr.RunUpdateProcessorFactory" />
  1647 + </updateRequestProcessorChain>
  1648 + -->
  1649 +
  1650 + <!-- Script update processor
  1651 +
  1652 + This example hooks in an update processor implemented using JavaScript.
  1653 +
  1654 + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
  1655 + -->
  1656 + <!--
  1657 + <updateRequestProcessorChain name="script">
  1658 + <processor class="solr.StatelessScriptUpdateProcessorFactory">
  1659 + <str name="script">update-script.js</str>
  1660 + <lst name="params">
  1661 + <str name="config_param">example config parameter</str>
  1662 + </lst>
  1663 + </processor>
  1664 + <processor class="solr.RunUpdateProcessorFactory" />
  1665 + </updateRequestProcessorChain>
  1666 + -->
  1667 +
  1668 + <!-- Response Writers
  1669 +
  1670 + http://wiki.apache.org/solr/QueryResponseWriter
  1671 +
  1672 + Request responses will be written using the writer specified by
  1673 + the 'wt' request parameter matching the name of a registered
  1674 + writer.
  1675 +
  1676 + The "default" writer is the default and will be used if 'wt' is
  1677 + not specified in the request.
  1678 + -->
  1679 + <!-- The following response writers are implicitly configured unless
  1680 + overridden...
  1681 + -->
  1682 + <!--
  1683 + <queryResponseWriter name="xml"
  1684 + default="true"
  1685 + class="solr.XMLResponseWriter" />
  1686 + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
  1687 + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
  1688 + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
  1689 + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
  1690 + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
  1691 + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
  1692 + -->
  1693 +
  1694 + <queryResponseWriter name="json" class="solr.JSONResponseWriter">
  1695 + <!-- For the purposes of the tutorial, JSON responses are written as
  1696 + plain text so that they are easy to read in *any* browser.
  1697 + If you expect a MIME type of "application/json" just remove this override.
  1698 + -->
  1699 + <str name="content-type">text/plain; charset=UTF-8</str>
  1700 + </queryResponseWriter>
  1701 +
  1702 + <!--
  1703 + Custom response writers can be declared as needed...
  1704 + -->
  1705 + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
  1706 +
  1707 +
  1708 + <!-- XSLT response writer transforms the XML output by any xslt file found
  1709 + in Solr's conf/xslt directory. Changes to xslt files are checked for
  1710 + every xsltCacheLifetimeSeconds.
  1711 + -->
  1712 + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
  1713 + <int name="xsltCacheLifetimeSeconds">5</int>
  1714 + </queryResponseWriter>
  1715 +
  1716 + <!-- Query Parsers
  1717 +
  1718 + http://wiki.apache.org/solr/SolrQuerySyntax
  1719 +
  1720 + Multiple QParserPlugins can be registered by name, and then
  1721 + used in either the "defType" param for the QueryComponent (used
  1722 + by SearchHandler) or in LocalParams
  1723 + -->
  1724 + <!-- example of registering a query parser -->
  1725 + <!--
  1726 + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
  1727 + -->
  1728 +
  1729 + <!-- Function Parsers
  1730 +
  1731 + http://wiki.apache.org/solr/FunctionQuery
  1732 +
  1733 + Multiple ValueSourceParsers can be registered by name, and then
  1734 + used as function names when using the "func" QParser.
  1735 + -->
  1736 + <!-- example of registering a custom function parser -->
  1737 + <!--
  1738 + <valueSourceParser name="myfunc"
  1739 + class="com.mycompany.MyValueSourceParser" />
  1740 + -->
  1741 +
  1742 +
  1743 + <!-- Document Transformers
  1744 + http://wiki.apache.org/solr/DocTransformers
  1745 + -->
  1746 + <!--
  1747 + Could be something like:
  1748 + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
  1749 + <int name="connection">jdbc://....</int>
  1750 + </transformer>
  1751 +
  1752 + To add a constant value to all docs, use:
  1753 + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
  1754 + <int name="value">5</int>
  1755 + </transformer>
  1756 +
  1757 + If you want the user to still be able to change it with _value:something_ use this:
  1758 + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
  1759 + <double name="defaultValue">5</double>
  1760 + </transformer>
  1761 +
  1762 + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
  1763 + EditorialMarkerFactory will do exactly that:
  1764 + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
  1765 + -->
  1766 +
  1767 +
  1768 + <!-- Legacy config for the admin interface -->
  1769 + <admin>
  1770 + <defaultQuery>*:*</defaultQuery>
  1771 + </admin>
  1772 +
  1773 +</config>
... ...
index/sei-bases-conhecimento-schema.xml 0 → 100755
  1 +++ a/index/sei-bases-conhecimento-schema.xml
... ... @@ -0,0 +1,1147 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +
  19 +<!--
  20 + This is the Solr schema file. This file should be named "schema.xml" and
  21 + should be in the conf directory under the solr home
  22 + (i.e. ./solr/conf/schema.xml by default)
  23 + or located where the classloader for the Solr webapp can find it.
  24 +
  25 + This example schema is the recommended starting point for users.
  26 + It should be kept correct and concise, usable out-of-the-box.
  27 +
  28 + For more information, on how to customize this file, please see
  29 + http://wiki.apache.org/solr/SchemaXml
  30 +
  31 + PERFORMANCE NOTE: this schema includes many optional features and should not
  32 + be used for benchmarking. To improve performance one could
  33 + - set stored="false" for all fields possible (esp large fields) when you
  34 + only need to search on the field but don't need to return the original
  35 + value.
  36 + - set indexed="false" if you don't need to search on the field, but only
  37 + return the field as a result of searching on other indexed fields.
  38 + - remove all unneeded copyField statements
  39 + - for best index size and searching performance, set "index" to false
  40 + for all general text fields, use copyField to copy them to the
  41 + catchall "text" field, and use that for searching.
  42 + - For maximum indexing performance, use the StreamingUpdateSolrServer
  43 + java client.
  44 + - Remember to run the JVM in server mode, and use a higher logging level
  45 + that avoids logging every request
  46 +-->
  47 +
  48 +<schema name="sei-protocolos" version="1.5">
  49 + <!-- attribute "name" is the name of this schema and is only used for display purposes.
  50 + version="x.y" is Solr's version number for the schema syntax and
  51 + semantics. It should not normally be changed by applications.
  52 +
  53 + 1.0: multiValued attribute did not exist, all fields are multiValued
  54 + by nature
  55 + 1.1: multiValued attribute introduced, false by default
  56 + 1.2: omitTermFreqAndPositions attribute introduced, true by default
  57 + except for text fields.
  58 + 1.3: removed optional field compress feature
  59 + 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
  60 + behavior when a single string produces multiple tokens. Defaults
  61 + to off for version >= 1.4
  62 + 1.5: omitNorms defaults to true for primitive field types
  63 + (int, float, boolean, string...)
  64 + -->
  65 +
  66 + <fields>
  67 +
  68 + <field name="id_base_conhecimento" type="string" indexed="false" stored="true" />
  69 + <field name="id_documento_edoc" type="string" indexed="false" stored="true" />
  70 + <field name="descricao" type="string" indexed="false" stored="true" />
  71 + <field name="id_unidade" type="string" indexed="false" stored="true" />
  72 + <field name="sigla_unidade" type="string" indexed="false" stored="true" />
  73 + <field name="descricao_unidade" type="string" indexed="false" stored="true" />
  74 + <field name="dta_geracao" type="date" indexed="true" stored="true" />
  75 + <field name="id_anexo" type="string" indexed="false" stored="true" />
  76 + <field name="nome_anexo" type="string" indexed="false" stored="true" />
  77 + <field name="link_base_conhecimento" type="string" indexed="false" stored="true" />
  78 +
  79 + <!-- Valid attributes for fields:
  80 + name: mandatory - the name for the field
  81 + type: mandatory - the name of a field type from the
  82 + <types> fieldType section
  83 + indexed: true if this field should be indexed (searchable or sortable)
  84 + stored: true if this field should be retrievable
  85 + multiValued: true if this field may contain multiple values per document
  86 + omitNorms: (expert) set to true to omit the norms associated with
  87 + this field (this disables length normalization and index-time
  88 + boosting for the field, and saves some memory). Only full-text
  89 + fields or fields that need an index-time boost need norms.
  90 + Norms are omitted for primitive (non-analyzed) types by default.
  91 + termVectors: [false] set to true to store the term vector for a
  92 + given field.
  93 + When using MoreLikeThis, fields used for similarity should be
  94 + stored for best performance.
  95 + termPositions: Store position information with the term vector.
  96 + This will increase storage costs.
  97 + termOffsets: Store offset information with the term vector. This
  98 + will increase storage costs.
  99 + required: The field is required. It will throw an error if the
  100 + value does not exist
  101 + default: a value that should be used if no value is specified
  102 + when adding a document.
  103 + -->
  104 +
  105 + <!-- field names should consist of alphanumeric or underscore characters only and
  106 + not start with a digit. This is not currently strictly enforced,
  107 + but other field names will not have first class support from all components
  108 + and back compatibility is not guaranteed. Names with both leading and
  109 + trailing underscores (e.g. _version_) are reserved.
  110 + -->
  111 + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
  112 +
  113 + <!-- mairon
  114 + <field name="documento" type="string" indexed="true" stored="true" />
  115 + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
  116 + <field name="name" type="text_general" indexed="true" stored="true"/>
  117 + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
  118 + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
  119 + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
  120 + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
  121 +
  122 + <field name="weight" type="float" indexed="true" stored="true"/>
  123 + <field name="price" type="float" indexed="true" stored="true"/>
  124 + <field name="popularity" type="int" indexed="true" stored="true" />
  125 + <field name="inStock" type="boolean" indexed="true" stored="true" />
  126 +
  127 + <field name="store" type="location" indexed="true" stored="true"/>
  128 + -->
  129 +
  130 + <!-- Common metadata fields, named specifically to match up with
  131 + SolrCell metadata when parsing rich documents such as Word, PDF.
  132 + Some fields are multiValued only because Tika currently may return
  133 + multiple values for them. Some metadata is parsed from the documents,
  134 + but there are some which come from the client context:
  135 + "content_type": From the HTTP headers of incoming stream
  136 + "resourcename": From SolrCell request param resource.name
  137 + -->
  138 + <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
  139 + <field name="subject" type="text_general" indexed="true" stored="true"/>
  140 + <field name="description" type="text_general" indexed="true" stored="true"/>
  141 + <field name="comments" type="text_general" indexed="true" stored="true"/>
  142 + <field name="author" type="text_general" indexed="true" stored="true"/>
  143 + <field name="keywords" type="text_general" indexed="true" stored="true"/>
  144 + <field name="category" type="text_general" indexed="true" stored="true"/>
  145 + <field name="resourcename" type="text_general" indexed="true" stored="true"/>
  146 + <field name="url" type="text_general" indexed="true" stored="true"/>
  147 + <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
  148 + <field name="last_modified" type="date" indexed="true" stored="true"/>
  149 + <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
  150 +
  151 + <!-- Main body of document extracted by SolrCell.
  152 + NOTE: This field is not indexed by default, since it is also copied to "text"
  153 + using copyField below. This is to save space. Use this field for returning and
  154 + highlighting document content. Use the "text" field to search the content. -->
  155 + <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
  156 +
  157 +
  158 + <!-- catchall field, containing all other searchable text fields (implemented
  159 + via copyField further on in this schema -->
  160 + <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
  161 +
  162 + <!-- catchall text field that indexes tokens both normally and in reverse for efficient
  163 + leading wildcard queries. -->
  164 + <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
  165 +
  166 + <!-- non-tokenized version of manufacturer to make it easier to sort or group
  167 + results by manufacturer. copied from "manu" via copyField -->
  168 + <field name="manu_exact" type="string" indexed="true" stored="false"/>
  169 +
  170 + <field name="payloads" type="payloads" indexed="true" stored="true"/>
  171 +
  172 + <field name="_version_" type="long" indexed="true" stored="true"/>
  173 +
  174 + <!-- Uncommenting the following will create a "timestamp" field using
  175 + a default value of "NOW" to indicate when each document was indexed.
  176 + -->
  177 + <!--
  178 + <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
  179 + -->
  180 +
  181 + <!-- Dynamic field definitions allow using convention over configuration
  182 + for fields via the specification of patterns to match field names.
  183 + EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
  184 + RESTRICTION: the glob-like pattern in the name attribute must have
  185 + a "*" only at the start or the end. -->
  186 +
  187 + <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
  188 + <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
  189 + <dynamicField name="*_s" type="string" indexed="true" stored="true" />
  190 + <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
  191 + <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
  192 + <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
  193 + <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
  194 + <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
  195 + <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
  196 + <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
  197 + <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
  198 + <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
  199 + <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
  200 + <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
  201 + <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
  202 +
  203 + <!-- Type used to index the lat and lon components for the "location" FieldType -->
  204 + <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
  205 +
  206 + <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
  207 + <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
  208 + <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
  209 +
  210 + <!-- some trie-coded dynamic fields for faster range queries -->
  211 + <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
  212 + <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
  213 + <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
  214 + <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
  215 + <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
  216 +
  217 + <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
  218 + <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
  219 +
  220 + <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
  221 + <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
  222 +
  223 + <dynamicField name="random_*" type="random" />
  224 +
  225 + <!-- uncomment the following to ignore any fields that don't already match an existing
  226 + field name or dynamic field, rather than reporting them as an error.
  227 + alternately, change the type="ignored" to some other type e.g. "text" if you want
  228 + unknown fields indexed and/or stored by default -->
  229 + <!--dynamicField name="*" type="ignored" multiValued="true" /-->
  230 +
  231 + </fields>
  232 +
  233 +
  234 + <!-- Field to use to determine and enforce document uniqueness.
  235 + Unless this field is marked with required="false", it will be a required field
  236 + -->
  237 + <uniqueKey>id</uniqueKey>
  238 +
  239 + <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
  240 + parsing a query string that isn't explicit about the field. Machine (non-user)
  241 + generated queries are best made explicit, or they can use the "df" request parameter
  242 + which takes precedence over this.
  243 + Note: Un-commenting defaultSearchField will be insufficient if your request handler
  244 + in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
  245 + <defaultSearchField>text</defaultSearchField> -->
  246 +
  247 + <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
  248 + when parsing a query string to determine if a clause of the query should be marked as
  249 + required or optional, assuming the clause isn't already marked by some operator.
  250 + The default is OR, which is generally assumed so it is not a good idea to change it
  251 + globally here. The "q.op" request parameter takes precedence over this.
  252 + <solrQueryParser defaultOperator="OR"/> -->
  253 +
  254 + <!-- copyField commands copy one field to another at the time a document
  255 + is added to the index. It's used either to index the same field differently,
  256 + or to add multiple fields to the same field for easier/faster searching. -->
  257 +
  258 + <!-- mairon
  259 + <copyField source="cat" dest="text"/>
  260 + <copyField source="name" dest="text"/>
  261 + <copyField source="manu" dest="text"/>
  262 + <copyField source="features" dest="text"/>
  263 + <copyField source="includes" dest="text"/>
  264 + <copyField source="manu" dest="manu_exact"/>
  265 + <copyField source="price" dest="price_c"/>
  266 + -->
  267 +
  268 + <!-- Text fields from SolrCell to search by default in our catch-all field -->
  269 + <copyField source="title" dest="text"/>
  270 + <copyField source="author" dest="text"/>
  271 + <copyField source="description" dest="text"/>
  272 + <copyField source="keywords" dest="text"/>
  273 + <copyField source="content" dest="text"/>
  274 + <copyField source="content_type" dest="text"/>
  275 + <copyField source="resourcename" dest="text"/>
  276 + <copyField source="url" dest="text"/>
  277 +
  278 + <!-- Create a string version of author for faceting -->
  279 + <copyField source="author" dest="author_s"/>
  280 +
  281 + <!-- Above, multiple source fields are copied to the [text] field.
  282 + Another way to map multiple source fields to the same
  283 + destination field is to use the dynamic field syntax.
  284 + copyField also supports a maxChars to copy setting. -->
  285 +
  286 + <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
  287 +
  288 + <!-- copy name to alphaNameSort, a field designed for sorting by name -->
  289 + <!-- <copyField source="name" dest="alphaNameSort"/> -->
  290 +
  291 + <types>
  292 + <!-- field type definitions. The "name" attribute is
  293 + just a label to be used by field definitions. The "class"
  294 + attribute and any other attributes determine the real
  295 + behavior of the fieldType.
  296 + Class names starting with "solr" refer to java classes in a
  297 + standard package such as org.apache.solr.analysis
  298 + -->
  299 +
  300 + <fieldType name="date" class="solr.DateField" omitNorms="true"/>
  301 +
  302 + <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
  303 + <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
  304 +
  305 + <!-- boolean type: "true" or "false" -->
  306 + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  307 +
  308 + <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
  309 + currently supported on types that are sorted internally as strings
  310 + and on numeric types.
  311 + This includes "string","boolean", and, as of 3.5 (and 4.x),
  312 + int, float, long, date, double, including the "Trie" variants.
  313 + - If sortMissingLast="true", then a sort on this field will cause documents
  314 + without the field to come after documents with the field,
  315 + regardless of the requested sort order (asc or desc).
  316 + - If sortMissingFirst="true", then a sort on this field will cause documents
  317 + without the field to come before documents with the field,
  318 + regardless of the requested sort order.
  319 + - If sortMissingLast="false" and sortMissingFirst="false" (the default),
  320 + then default lucene sorting will be used which places docs without the
  321 + field first in an ascending sort and last in a descending sort.
  322 + -->
  323 +
  324 + <!--
  325 + Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
  326 + -->
  327 + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
  328 + <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
  329 + <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
  330 + <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
  331 +
  332 + <!--
  333 + Numeric field types that index each value at various levels of precision
  334 + to accelerate range queries when the number of values between the range
  335 + endpoints is large. See the javadoc for NumericRangeQuery for internal
  336 + implementation details.
  337 +
  338 + Smaller precisionStep values (specified in bits) will lead to more tokens
  339 + indexed per value, slightly larger index size, and faster range queries.
  340 + A precisionStep of 0 disables indexing at different precision levels.
  341 + -->
  342 + <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
  343 + <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
  344 + <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
  345 + <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
  346 +
  347 + <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
  348 + is a more restricted form of the canonical representation of dateTime
  349 + http://www.w3.org/TR/xmlschema-2/#dateTime
  350 + The trailing "Z" designates UTC time and is mandatory.
  351 + Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
  352 + All other components are mandatory.
  353 +
  354 + Expressions can also be used to denote calculations that should be
  355 + performed relative to "NOW" to determine the value, ie...
  356 +
  357 + NOW/HOUR
  358 + ... Round to the start of the current hour
  359 + NOW-1DAY
  360 + ... Exactly 1 day prior to now
  361 + NOW/DAY+6MONTHS+3DAYS
  362 + ... 6 months and 3 days in the future from the start of
  363 + the current day
  364 +
  365 + Consult the DateField javadocs for more information.
  366 +
  367 + Note: For faster range queries, consider the tdate type
  368 + -->
  369 +<!-- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
  370 +
  371 + A Trie based date field for faster date range queries and date faceting. -->
  372 + <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
  373 +
  374 +
  375 + <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
  376 + <fieldtype name="binary" class="solr.BinaryField"/>
  377 +
  378 + <!--
  379 + Note:
  380 + These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
  381 + Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
  382 +
  383 + Plain numeric field types that store and index the text
  384 + value verbatim (and hence don't correctly support range queries, since the
  385 + lexicographic ordering isn't equal to the numeric ordering)
  386 + -->
  387 + <fieldType name="pint" class="solr.IntField"/>
  388 + <fieldType name="plong" class="solr.LongField"/>
  389 + <fieldType name="pfloat" class="solr.FloatField"/>
  390 + <fieldType name="pdouble" class="solr.DoubleField"/>
  391 + <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
  392 +
  393 + <!-- The "RandomSortField" is not used to store or search any
  394 + data. You can declare fields of this type it in your schema
  395 + to generate pseudo-random orderings of your docs for sorting
  396 + or function purposes. The ordering is generated based on the field
  397 + name and the version of the index. As long as the index version
  398 + remains unchanged, and the same field name is reused,
  399 + the ordering of the docs will be consistent.
  400 + If you want different psuedo-random orderings of documents,
  401 + for the same version of the index, use a dynamicField and
  402 + change the field name in the request.
  403 + -->
  404 + <fieldType name="random" class="solr.RandomSortField" indexed="true" />
  405 +
  406 + <!-- solr.TextField allows the specification of custom text analyzers
  407 + specified as a tokenizer and a list of token filters. Different
  408 + analyzers may be specified for indexing and querying.
  409 +
  410 + The optional positionIncrementGap puts space between multiple fields of
  411 + this type on the same document, with the purpose of preventing false phrase
  412 + matching across fields.
  413 +
  414 + For more info on customizing your analyzer chain, please see
  415 + http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
  416 + -->
  417 +
  418 + <!-- One can also specify an existing Analyzer class that has a
  419 + default constructor via the class attribute on the analyzer element.
  420 + Example:
  421 + <fieldType name="text_greek" class="solr.TextField">
  422 + <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
  423 + </fieldType>
  424 + -->
  425 +
  426 + <!-- A text field that only splits on whitespace for exact matching of words -->
  427 + <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  428 + <analyzer>
  429 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  430 + </analyzer>
  431 + </fieldType>
  432 +
  433 + <!-- A general text field that has reasonable, generic
  434 + cross-language defaults: it tokenizes with StandardTokenizer,
  435 + removes stop words from case-insensitive "stopwords.txt"
  436 + (empty by default), and down cases. At query time only, it
  437 + also applies synonyms. -->
  438 + <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
  439 + <analyzer type="index">
  440 +
  441 + <tokenizer class="solr.StandardTokenizerFactory"/>
  442 +
  443 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" />
  444 + <!-- in this example, we will only use synonyms at query time
  445 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  446 + -->
  447 + <filter class="solr.LowerCaseFilterFactory"/>
  448 +
  449 + <!-- mairon -->
  450 + <filter class="solr.ASCIIFoldingFilterFactory"/>
  451 + <!-- mairon -->
  452 +
  453 +
  454 + </analyzer>
  455 +
  456 + <analyzer type="query">
  457 + <tokenizer class="solr.StandardTokenizerFactory"/>
  458 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" />
  459 + <filter class="solr.LowerCaseFilterFactory"/>
  460 +
  461 + <!-- mairon -->
  462 + <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -->
  463 + <!-- <filter class="solr.ASCIIFoldingFilterFactory"/> -->
  464 + <!-- <filter class="solr.BrazilianStemFilterFactory"/> -->
  465 + <!-- mairon -->
  466 +
  467 + </analyzer>
  468 +
  469 + </fieldType>
  470 +
  471 + <!-- A text field with defaults appropriate for English: it
  472 + tokenizes with StandardTokenizer, removes English stop words
  473 + (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
  474 + finally applies Porter's stemming. The query time analyzer
  475 + also applies synonyms from synonyms.txt. -->
  476 + <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  477 + <analyzer type="index">
  478 + <tokenizer class="solr.StandardTokenizerFactory"/>
  479 + <!-- in this example, we will only use synonyms at query time
  480 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  481 + -->
  482 + <!-- Case insensitive stop word removal.
  483 + add enablePositionIncrements=true in both the index and query
  484 + analyzers to leave a 'gap' for more accurate phrase queries.
  485 + -->
  486 + <filter class="solr.StopFilterFactory"
  487 + ignoreCase="true"
  488 + words="lang/stopwords_en.txt"
  489 + enablePositionIncrements="true"
  490 + />
  491 + <filter class="solr.LowerCaseFilterFactory"/>
  492 + <filter class="solr.EnglishPossessiveFilterFactory"/>
  493 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  494 + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  495 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  496 + -->
  497 + <filter class="solr.PorterStemFilterFactory"/>
  498 + </analyzer>
  499 + <analyzer type="query">
  500 + <tokenizer class="solr.StandardTokenizerFactory"/>
  501 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  502 + <filter class="solr.StopFilterFactory"
  503 + ignoreCase="true"
  504 + words="lang/stopwords_en.txt"
  505 + enablePositionIncrements="true"
  506 + />
  507 + <filter class="solr.LowerCaseFilterFactory"/>
  508 + <filter class="solr.EnglishPossessiveFilterFactory"/>
  509 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  510 + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  511 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  512 + -->
  513 + <filter class="solr.PorterStemFilterFactory"/>
  514 + </analyzer>
  515 + </fieldType>
  516 +
  517 + <!-- A text field with defaults appropriate for English, plus
  518 + aggressive word-splitting and autophrase features enabled.
  519 + This field is just like text_en, except it adds
  520 + WordDelimiterFilter to enable splitting and matching of
  521 + words on case-change, alpha numeric boundaries, and
  522 + non-alphanumeric chars. This means certain compound word
  523 + cases will work, for example query "wi fi" will match
  524 + document "WiFi" or "wi-fi".
  525 + -->
  526 + <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  527 + <analyzer type="index">
  528 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  529 + <!-- in this example, we will only use synonyms at query time
  530 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  531 + -->
  532 + <!-- Case insensitive stop word removal.
  533 + add enablePositionIncrements=true in both the index and query
  534 + analyzers to leave a 'gap' for more accurate phrase queries.
  535 + -->
  536 + <filter class="solr.StopFilterFactory"
  537 + ignoreCase="true"
  538 + words="lang/stopwords_en.txt"
  539 + enablePositionIncrements="true"
  540 + />
  541 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
  542 + <filter class="solr.LowerCaseFilterFactory"/>
  543 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  544 + <filter class="solr.PorterStemFilterFactory"/>
  545 +
  546 +
  547 + </analyzer>
  548 + <analyzer type="query">
  549 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  550 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  551 + <filter class="solr.StopFilterFactory"
  552 + ignoreCase="true"
  553 + words="lang/stopwords_en.txt"
  554 + enablePositionIncrements="true"
  555 + />
  556 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
  557 + <filter class="solr.LowerCaseFilterFactory"/>
  558 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  559 + <filter class="solr.PorterStemFilterFactory"/>
  560 + </analyzer>
  561 + </fieldType>
  562 +
  563 + <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
  564 + but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
  565 + <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  566 + <analyzer>
  567 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  568 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
  569 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
  570 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
  571 + <filter class="solr.LowerCaseFilterFactory"/>
  572 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  573 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  574 + <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
  575 + possible with WordDelimiterFilter in conjuncton with stemming. -->
  576 + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  577 + </analyzer>
  578 + </fieldType>
  579 +
  580 + <!-- Just like text_general except it reverses the characters of
  581 + each token, to enable more efficient leading wildcard queries. -->
  582 + <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
  583 + <analyzer type="index">
  584 + <tokenizer class="solr.StandardTokenizerFactory"/>
  585 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  586 + <filter class="solr.LowerCaseFilterFactory"/>
  587 + <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
  588 + maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
  589 + </analyzer>
  590 + <analyzer type="query">
  591 + <tokenizer class="solr.StandardTokenizerFactory"/>
  592 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  593 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  594 + <filter class="solr.LowerCaseFilterFactory"/>
  595 + </analyzer>
  596 + </fieldType>
  597 +
  598 + <!-- charFilter + WhitespaceTokenizer -->
  599 + <!--
  600 + <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
  601 + <analyzer>
  602 + <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
  603 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  604 + </analyzer>
  605 + </fieldType>
  606 + -->
  607 +
  608 + <!-- This is an example of using the KeywordTokenizer along
  609 + With various TokenFilterFactories to produce a sortable field
  610 + that does not include some properties of the source text
  611 + -->
  612 + <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
  613 + <analyzer>
  614 + <!-- KeywordTokenizer does no actual tokenizing, so the entire
  615 + input string is preserved as a single token
  616 + -->
  617 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  618 + <!-- The LowerCase TokenFilter does what you expect, which can be
  619 + when you want your sorting to be case insensitive
  620 + -->
  621 + <filter class="solr.LowerCaseFilterFactory" />
  622 + <!-- The TrimFilter removes any leading or trailing whitespace -->
  623 + <filter class="solr.TrimFilterFactory" />
  624 + <!-- The PatternReplaceFilter gives you the flexibility to use
  625 + Java Regular expression to replace any sequence of characters
  626 + matching a pattern with an arbitrary replacement string,
  627 + which may include back references to portions of the original
  628 + string matched by the pattern.
  629 +
  630 + See the Java Regular Expression documentation for more
  631 + information on pattern and replacement string syntax.
  632 +
  633 + http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
  634 + -->
  635 + <filter class="solr.PatternReplaceFilterFactory"
  636 + pattern="([^a-z])" replacement="" replace="all"
  637 + />
  638 + </analyzer>
  639 + </fieldType>
  640 +
  641 + <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
  642 + <analyzer>
  643 + <tokenizer class="solr.StandardTokenizerFactory"/>
  644 + <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
  645 + </analyzer>
  646 + </fieldtype>
  647 +
  648 + <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
  649 + <analyzer>
  650 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  651 + <!--
  652 + The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
  653 + a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
  654 + Attributes of the DelimitedPayloadTokenFilterFactory :
  655 + "delimiter" - a one character delimiter. Default is | (pipe)
  656 + "encoder" - how to encode the following value into a playload
  657 + float -> org.apache.lucene.analysis.payloads.FloatEncoder,
  658 + integer -> o.a.l.a.p.IntegerEncoder
  659 + identity -> o.a.l.a.p.IdentityEncoder
  660 + Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
  661 + -->
  662 + <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
  663 + </analyzer>
  664 + </fieldtype>
  665 +
  666 + <!-- lowercases the entire field value, keeping it as a single token. -->
  667 + <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
  668 + <analyzer>
  669 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  670 + <filter class="solr.LowerCaseFilterFactory" />
  671 + </analyzer>
  672 + </fieldType>
  673 +
  674 + <!--
  675 + Example of using PathHierarchyTokenizerFactory at index time, so
  676 + queries for paths match documents at that path, or in descendent paths
  677 + -->
  678 + <fieldType name="descendent_path" class="solr.TextField">
  679 + <analyzer type="index">
  680 + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  681 + </analyzer>
  682 + <analyzer type="query">
  683 + <tokenizer class="solr.KeywordTokenizerFactory" />
  684 + </analyzer>
  685 + </fieldType>
  686 + <!--
  687 + Example of using PathHierarchyTokenizerFactory at query time, so
  688 + queries for paths match documents at that path, or in ancestor paths
  689 + -->
  690 + <fieldType name="ancestor_path" class="solr.TextField">
  691 + <analyzer type="index">
  692 + <tokenizer class="solr.KeywordTokenizerFactory" />
  693 + </analyzer>
  694 + <analyzer type="query">
  695 + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  696 + </analyzer>
  697 + </fieldType>
  698 +
  699 + <!-- since fields of this type are by default not stored or indexed,
  700 + any data added to them will be ignored outright. -->
  701 + <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
  702 +
  703 + <!-- This point type indexes the coordinates as separate fields (subFields)
  704 + If subFieldType is defined, it references a type, and a dynamic field
  705 + definition is created matching *___<typename>. Alternately, if
  706 + subFieldSuffix is defined, that is used to create the subFields.
  707 + Example: if subFieldType="double", then the coordinates would be
  708 + indexed in fields myloc_0___double,myloc_1___double.
  709 + Example: if subFieldSuffix="_d" then the coordinates would be indexed
  710 + in fields myloc_0_d,myloc_1_d
  711 + The subFields are an implementation detail of the fieldType, and end
  712 + users normally should not need to know about them.
  713 + -->
  714 + <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
  715 +
  716 + <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
  717 + <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
  718 +
  719 + <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
  720 + For more information about this and other Spatial fields new to Solr 4, see:
  721 + http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
  722 + -->
  723 + <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
  724 + geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
  725 +
  726 + <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
  727 + Parameters:
  728 + defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
  729 + precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
  730 + providerClass: Lets you plug in other exchange provider backend:
  731 + solr.FileExchangeRateProvider is the default and takes one parameter:
  732 + currencyConfig: name of an xml file holding exchange rates
  733 + solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
  734 + ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
  735 + refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
  736 + -->
  737 + <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
  738 +
  739 +
  740 +
  741 + <!-- some examples for different languages (generally ordered by ISO code) -->
  742 +
  743 + <!-- Arabic -->
  744 + <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
  745 + <analyzer>
  746 + <tokenizer class="solr.StandardTokenizerFactory"/>
  747 + <!-- for any non-arabic -->
  748 + <filter class="solr.LowerCaseFilterFactory"/>
  749 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
  750 + <!-- normalizes ﻯ to ﻱ, etc -->
  751 + <filter class="solr.ArabicNormalizationFilterFactory"/>
  752 + <filter class="solr.ArabicStemFilterFactory"/>
  753 + </analyzer>
  754 + </fieldType>
  755 +
  756 + <!-- Bulgarian -->
  757 + <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
  758 + <analyzer>
  759 + <tokenizer class="solr.StandardTokenizerFactory"/>
  760 + <filter class="solr.LowerCaseFilterFactory"/>
  761 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
  762 + <filter class="solr.BulgarianStemFilterFactory"/>
  763 + </analyzer>
  764 + </fieldType>
  765 +
  766 + <!-- Catalan -->
  767 + <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
  768 + <analyzer>
  769 + <tokenizer class="solr.StandardTokenizerFactory"/>
  770 + <!-- removes l', etc -->
  771 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
  772 + <filter class="solr.LowerCaseFilterFactory"/>
  773 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
  774 + <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
  775 + </analyzer>
  776 + </fieldType>
  777 +
  778 + <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
  779 + <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
  780 + <analyzer>
  781 + <tokenizer class="solr.StandardTokenizerFactory"/>
  782 + <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
  783 + <filter class="solr.CJKWidthFilterFactory"/>
  784 + <!-- for any non-CJK -->
  785 + <filter class="solr.LowerCaseFilterFactory"/>
  786 + <filter class="solr.CJKBigramFilterFactory"/>
  787 + </analyzer>
  788 + </fieldType>
  789 +
  790 + <!-- Czech -->
  791 + <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
  792 + <analyzer>
  793 + <tokenizer class="solr.StandardTokenizerFactory"/>
  794 + <filter class="solr.LowerCaseFilterFactory"/>
  795 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
  796 + <filter class="solr.CzechStemFilterFactory"/>
  797 + </analyzer>
  798 + </fieldType>
  799 +
  800 + <!-- Danish -->
  801 + <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
  802 + <analyzer>
  803 + <tokenizer class="solr.StandardTokenizerFactory"/>
  804 + <filter class="solr.LowerCaseFilterFactory"/>
  805 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
  806 + <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
  807 + </analyzer>
  808 + </fieldType>
  809 +
  810 + <!-- German -->
  811 + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
  812 + <analyzer>
  813 + <tokenizer class="solr.StandardTokenizerFactory"/>
  814 + <filter class="solr.LowerCaseFilterFactory"/>
  815 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
  816 + <filter class="solr.GermanNormalizationFilterFactory"/>
  817 + <filter class="solr.GermanLightStemFilterFactory"/>
  818 + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
  819 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
  820 + </analyzer>
  821 + </fieldType>
  822 +
  823 + <!-- Greek -->
  824 + <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
  825 + <analyzer>
  826 + <tokenizer class="solr.StandardTokenizerFactory"/>
  827 + <!-- greek specific lowercase for sigma -->
  828 + <filter class="solr.GreekLowerCaseFilterFactory"/>
  829 + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
  830 + <filter class="solr.GreekStemFilterFactory"/>
  831 + </analyzer>
  832 + </fieldType>
  833 +
  834 + <!-- Spanish -->
  835 + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
  836 + <analyzer>
  837 + <tokenizer class="solr.StandardTokenizerFactory"/>
  838 + <filter class="solr.LowerCaseFilterFactory"/>
  839 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
  840 + <filter class="solr.SpanishLightStemFilterFactory"/>
  841 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
  842 + </analyzer>
  843 + </fieldType>
  844 +
  845 + <!-- Basque -->
  846 + <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
  847 + <analyzer>
  848 + <tokenizer class="solr.StandardTokenizerFactory"/>
  849 + <filter class="solr.LowerCaseFilterFactory"/>
  850 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
  851 + <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
  852 + </analyzer>
  853 + </fieldType>
  854 +
  855 + <!-- Persian -->
  856 + <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
  857 + <analyzer>
  858 + <!-- for ZWNJ -->
  859 + <charFilter class="solr.PersianCharFilterFactory"/>
  860 + <tokenizer class="solr.StandardTokenizerFactory"/>
  861 + <filter class="solr.LowerCaseFilterFactory"/>
  862 + <filter class="solr.ArabicNormalizationFilterFactory"/>
  863 + <filter class="solr.PersianNormalizationFilterFactory"/>
  864 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
  865 + </analyzer>
  866 + </fieldType>
  867 +
  868 + <!-- Finnish -->
  869 + <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
  870 + <analyzer>
  871 + <tokenizer class="solr.StandardTokenizerFactory"/>
  872 + <filter class="solr.LowerCaseFilterFactory"/>
  873 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
  874 + <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
  875 + <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
  876 + </analyzer>
  877 + </fieldType>
  878 +
  879 + <!-- French -->
  880 + <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  881 + <analyzer>
  882 + <tokenizer class="solr.StandardTokenizerFactory"/>
  883 + <!-- removes l', etc -->
  884 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
  885 + <filter class="solr.LowerCaseFilterFactory"/>
  886 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
  887 + <filter class="solr.FrenchLightStemFilterFactory"/>
  888 + <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
  889 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
  890 + </analyzer>
  891 + </fieldType>
  892 +
  893 + <!-- Irish -->
  894 + <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
  895 + <analyzer>
  896 + <tokenizer class="solr.StandardTokenizerFactory"/>
  897 + <!-- removes d', etc -->
  898 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
  899 + <!-- removes n-, etc. position increments is intentionally false! -->
  900 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
  901 + <filter class="solr.IrishLowerCaseFilterFactory"/>
  902 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
  903 + <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
  904 + </analyzer>
  905 + </fieldType>
  906 +
  907 + <!-- Galician -->
  908 + <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
  909 + <analyzer>
  910 + <tokenizer class="solr.StandardTokenizerFactory"/>
  911 + <filter class="solr.LowerCaseFilterFactory"/>
  912 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
  913 + <filter class="solr.GalicianStemFilterFactory"/>
  914 + <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
  915 + </analyzer>
  916 + </fieldType>
  917 +
  918 + <!-- Hindi -->
  919 + <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
  920 + <analyzer>
  921 + <tokenizer class="solr.StandardTokenizerFactory"/>
  922 + <filter class="solr.LowerCaseFilterFactory"/>
  923 + <!-- normalizes unicode representation -->
  924 + <filter class="solr.IndicNormalizationFilterFactory"/>
  925 + <!-- normalizes variation in spelling -->
  926 + <filter class="solr.HindiNormalizationFilterFactory"/>
  927 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
  928 + <filter class="solr.HindiStemFilterFactory"/>
  929 + </analyzer>
  930 + </fieldType>
  931 +
  932 + <!-- Hungarian -->
  933 + <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
  934 + <analyzer>
  935 + <tokenizer class="solr.StandardTokenizerFactory"/>
  936 + <filter class="solr.LowerCaseFilterFactory"/>
  937 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
  938 + <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
  939 + <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
  940 + </analyzer>
  941 + </fieldType>
  942 +
  943 + <!-- Armenian -->
  944 + <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
  945 + <analyzer>
  946 + <tokenizer class="solr.StandardTokenizerFactory"/>
  947 + <filter class="solr.LowerCaseFilterFactory"/>
  948 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
  949 + <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
  950 + </analyzer>
  951 + </fieldType>
  952 +
  953 + <!-- Indonesian -->
  954 + <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
  955 + <analyzer>
  956 + <tokenizer class="solr.StandardTokenizerFactory"/>
  957 + <filter class="solr.LowerCaseFilterFactory"/>
  958 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
  959 + <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
  960 + <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
  961 + </analyzer>
  962 + </fieldType>
  963 +
  964 + <!-- Italian -->
  965 + <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
  966 + <analyzer>
  967 + <tokenizer class="solr.StandardTokenizerFactory"/>
  968 + <!-- removes l', etc -->
  969 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
  970 + <filter class="solr.LowerCaseFilterFactory"/>
  971 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
  972 + <filter class="solr.ItalianLightStemFilterFactory"/>
  973 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
  974 + </analyzer>
  975 + </fieldType>
  976 +
  977 + <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
  978 +
  979 + NOTE: If you want to optimize search for precision, use default operator AND in your query
  980 + parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
  981 + OR if you would like to optimize for recall (default).
  982 + -->
  983 + <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
  984 + <analyzer>
  985 + <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
  986 +
  987 + Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
  988 + is used to segment compounds into its parts and the compound itself is kept as synonym.
  989 +
  990 + Valid values for attribute mode are:
  991 + normal: regular segmentation
  992 + search: segmentation useful for search with synonyms compounds (default)
  993 + extended: same as search mode, but unigrams unknown words (experimental)
  994 +
  995 + For some applications it might be good to use search mode for indexing and normal mode for
  996 + queries to reduce recall and prevent parts of compounds from being matched and highlighted.
  997 + Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
  998 +
  999 + Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
  1000 + model with your own entries for segmentation, part-of-speech tags and readings without a need
  1001 + to specify weights. Notice that user dictionaries have not been subject to extensive testing.
  1002 +
  1003 + User dictionary attributes are:
  1004 + userDictionary: user dictionary filename
  1005 + userDictionaryEncoding: user dictionary encoding (default is UTF-8)
  1006 +
  1007 + See lang/userdict_ja.txt for a sample user dictionary file.
  1008 +
  1009 + Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
  1010 +
  1011 + See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
  1012 + -->
  1013 + <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
  1014 + <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
  1015 + <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
  1016 + <filter class="solr.JapaneseBaseFormFilterFactory"/>
  1017 + <!-- Removes tokens with certain part-of-speech tags -->
  1018 + <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
  1019 + <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
  1020 + <filter class="solr.CJKWidthFilterFactory"/>
  1021 + <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
  1022 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
  1023 + <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
  1024 + <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
  1025 + <!-- Lower-cases romaji characters -->
  1026 + <filter class="solr.LowerCaseFilterFactory"/>
  1027 + </analyzer>
  1028 + </fieldType>
  1029 +
  1030 + <!-- Latvian -->
  1031 + <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
  1032 + <analyzer>
  1033 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1034 + <filter class="solr.LowerCaseFilterFactory"/>
  1035 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
  1036 + <filter class="solr.LatvianStemFilterFactory"/>
  1037 + </analyzer>
  1038 + </fieldType>
  1039 +
  1040 + <!-- Dutch -->
  1041 + <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
  1042 + <analyzer>
  1043 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1044 + <filter class="solr.LowerCaseFilterFactory"/>
  1045 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
  1046 + <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
  1047 + <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
  1048 + </analyzer>
  1049 + </fieldType>
  1050 +
  1051 + <!-- Norwegian -->
  1052 + <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
  1053 + <analyzer>
  1054 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1055 + <filter class="solr.LowerCaseFilterFactory"/>
  1056 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
  1057 + <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
  1058 + <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
  1059 + <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
  1060 + </analyzer>
  1061 + </fieldType>
  1062 +
  1063 + <!-- Portuguese -->
  1064 + <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
  1065 + <analyzer>
  1066 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1067 + <filter class="solr.LowerCaseFilterFactory"/>
  1068 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
  1069 +
  1070 + <!-- mairon -->
  1071 + <!-- <filter class="solr.PortugueseLightStemFilterFactory"/> -->
  1072 + <filter class="solr.BrazilianStemFilterFactory"/>
  1073 + <filter class="solr.ASCIIFoldingFilterFactory"/>
  1074 + <!-- mairon -->
  1075 +
  1076 + <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
  1077 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
  1078 + <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
  1079 + </analyzer>
  1080 + </fieldType>
  1081 +
  1082 + <!-- Romanian -->
  1083 + <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
  1084 + <analyzer>
  1085 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1086 + <filter class="solr.LowerCaseFilterFactory"/>
  1087 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
  1088 + <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
  1089 + </analyzer>
  1090 + </fieldType>
  1091 +
  1092 + <!-- Russian -->
  1093 + <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
  1094 + <analyzer>
  1095 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1096 + <filter class="solr.LowerCaseFilterFactory"/>
  1097 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
  1098 + <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
  1099 + <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
  1100 + </analyzer>
  1101 + </fieldType>
  1102 +
  1103 + <!-- Swedish -->
  1104 + <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
  1105 + <analyzer>
  1106 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1107 + <filter class="solr.LowerCaseFilterFactory"/>
  1108 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
  1109 + <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
  1110 + <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
  1111 + </analyzer>
  1112 + </fieldType>
  1113 +
  1114 + <!-- Thai -->
  1115 + <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
  1116 + <analyzer>
  1117 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1118 + <filter class="solr.LowerCaseFilterFactory"/>
  1119 + <filter class="solr.ThaiWordFilterFactory"/>
  1120 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
  1121 + </analyzer>
  1122 + </fieldType>
  1123 +
  1124 + <!-- Turkish -->
  1125 + <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
  1126 + <analyzer>
  1127 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1128 + <filter class="solr.TurkishLowerCaseFilterFactory"/>
  1129 + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
  1130 + <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
  1131 + </analyzer>
  1132 + </fieldType>
  1133 +
  1134 + </types>
  1135 +
  1136 + <!-- Similarity is the scoring routine for each document vs. a query.
  1137 + A custom Similarity or SimilarityFactory may be specified here, but
  1138 + the default is fine for most applications.
  1139 + For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
  1140 + -->
  1141 + <!--
  1142 + <similarity class="com.example.solr.CustomSimilarityFactory">
  1143 + <str name="paramkey">param value</str>
  1144 + </similarity>
  1145 + -->
  1146 +
  1147 +</schema>
0 1148 \ No newline at end of file
... ...
index/sei-protocolos-config.xml 0 → 100755
  1 +++ a/index/sei-protocolos-config.xml
... ... @@ -0,0 +1,1773 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +
  19 +<!--
  20 + For more details about configurations options that may appear in
  21 + this file, see http://wiki.apache.org/solr/SolrConfigXml.
  22 +-->
  23 +<config>
  24 + <!-- In all configuration below, a prefix of "solr." for class names
  25 + is an alias that causes solr to search appropriate packages,
  26 + including org.apache.solr.(search|update|request|core|analysis)
  27 +
  28 + You may also specify a fully qualified Java classname if you
  29 + have your own custom plugins.
  30 + -->
  31 +
  32 + <!-- Controls what version of Lucene various components of Solr
  33 + adhere to. Generally, you want to use the latest version to
  34 + get all bug fixes and improvements. It is highly recommended
  35 + that you fully re-index after changing this setting as it can
  36 + affect both how text is indexed and queried.
  37 + -->
  38 + <luceneMatchVersion>LUCENE_40</luceneMatchVersion>
  39 +
  40 + <!-- <lib/> directives can be used to instruct Solr to load an Jars
  41 + identified and use them to resolve any "plugins" specified in
  42 + your solrconfig.xml or schema.xml (ie: Analyzers, Request
  43 + Handlers, etc...).
  44 +
  45 + All directories and paths are resolved relative to the
  46 + instanceDir.
  47 +
  48 + Please note that <lib/> directives are processed in the order
  49 + that they appear in your solrconfig.xml file, and are "stacked"
  50 + on top of each other when building a ClassLoader - so if you have
  51 + plugin jars with dependencies on other jars, the "lower level"
  52 + dependency jars should be loaded first.
  53 +
  54 + If a "./lib" directory exists in your instanceDir, all files
  55 + found in it are included as if you had used the following
  56 + syntax...
  57 +
  58 +-->
  59 + <lib dir="./lib" />
  60 +
  61 +
  62 + <!-- A 'dir' option by itself adds any files found in the directory
  63 + to the classpath, this is useful for including all jars in a
  64 + directory.
  65 +
  66 + When a 'regex' is specified in addition to a 'dir', only the
  67 + files in that directory which completely match the regex
  68 + (anchored on both ends) will be included.
  69 +
  70 + The examples below can be used to load some solr-contribs along
  71 + with their external dependencies.
  72 + -->
  73 + <lib dir="./contrib/extraction/lib" regex=".*\.jar" />
  74 + <lib dir="./dist/" regex="apache-solr-cell-\d.*\.jar" />
  75 +
  76 + <lib dir="./contrib/clustering/lib/" regex=".*\.jar" />
  77 + <lib dir="./dist/" regex="apache-solr-clustering-\d.*\.jar" />
  78 +
  79 + <lib dir="./contrib/langid/lib/" regex=".*\.jar" />
  80 + <lib dir="./dist/" regex="apache-solr-langid-\d.*\.jar" />
  81 +
  82 + <lib dir="./contrib/velocity/lib" regex=".*\.jar" />
  83 + <lib dir="./dist/" regex="apache-solr-velocity-\d.*\.jar" />
  84 +
  85 + <!-- If a 'dir' option (with or without a regex) is used and nothing
  86 + is found that matches, it will be ignored
  87 + -->
  88 + <lib dir="/total/crap/dir/ignored" />
  89 +
  90 + <!-- an exact 'path' can be used instead of a 'dir' to specify a
  91 + specific jar file. This will cause a serious error to be logged
  92 + if it can't be loaded.
  93 + -->
  94 + <!--
  95 + <lib path="../a-jar-that-does-not-exist.jar" />
  96 + -->
  97 +
  98 + <!-- Data Directory
  99 +
  100 + Used to specify an alternate directory to hold all index data
  101 + other than the default ./data under the Solr home. If
  102 + replication is in use, this should match the replication
  103 + configuration.
  104 + -->
  105 + <dataDir>${solr.data.dir:}</dataDir>
  106 +
  107 +
  108 + <!-- The DirectoryFactory to use for indexes.
  109 +
  110 + solr.StandardDirectoryFactory is filesystem
  111 + based and tries to pick the best implementation for the current
  112 + JVM and platform. solr.NRTCachingDirectoryFactory, the default,
  113 + wraps solr.StandardDirectoryFactory and caches small files in memory
  114 + for better NRT performance.
  115 +
  116 + One can force a particular implementation via solr.MMapDirectoryFactory,
  117 + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
  118 +
  119 + solr.RAMDirectoryFactory is memory based, not
  120 + persistent, and doesn't work with replication.
  121 + -->
  122 + <directoryFactory name="DirectoryFactory"
  123 + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
  124 +
  125 + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  126 + Index Config - These settings control low-level behavior of indexing
  127 + Most example settings here show the default value, but are commented
  128 + out, to more easily see where customizations have been made.
  129 +
  130 + Note: This replaces <indexDefaults> and <mainIndex> from older versions
  131 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
  132 + <indexConfig>
  133 + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
  134 + LimitTokenCountFilterFactory in your fieldType definition. E.g.
  135 + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
  136 + -->
  137 + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
  138 + <!-- <writeLockTimeout>1000</writeLockTimeout> -->
  139 +
  140 + <!-- Expert: Enabling compound file will use less files for the index,
  141 + using fewer file descriptors on the expense of performance decrease.
  142 + Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
  143 + <!-- <useCompoundFile>false</useCompoundFile> -->
  144 +
  145 + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
  146 + indexing for buffering added documents and deletions before they are
  147 + flushed to the Directory.
  148 + maxBufferedDocs sets a limit on the number of documents buffered
  149 + before flushing.
  150 + If both ramBufferSizeMB and maxBufferedDocs is set, then
  151 + Lucene will flush based on whichever limit is hit first. -->
  152 + <!-- <ramBufferSizeMB>32</ramBufferSizeMB> -->
  153 + <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
  154 +
  155 + <!-- Expert: Merge Policy
  156 + The Merge Policy in Lucene controls how merging of segments is done.
  157 + The default since Solr/Lucene 3.3 is TieredMergePolicy.
  158 + The default since Lucene 2.3 was the LogByteSizeMergePolicy,
  159 + Even older versions of Lucene used LogDocMergePolicy.
  160 + -->
  161 + <!--
  162 + <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
  163 + <int name="maxMergeAtOnce">10</int>
  164 + <int name="segmentsPerTier">10</int>
  165 + </mergePolicy>
  166 + -->
  167 +
  168 + <!-- Merge Factor
  169 + The merge factor controls how many segments will get merged at a time.
  170 + For TieredMergePolicy, mergeFactor is a convenience parameter which
  171 + will set both MaxMergeAtOnce and SegmentsPerTier at once.
  172 + For LogByteSizeMergePolicy, mergeFactor decides how many new segments
  173 + will be allowed before they are merged into one.
  174 + Default is 10 for both merge policies.
  175 + -->
  176 + <!--
  177 + <mergeFactor>10</mergeFactor>
  178 + -->
  179 +
  180 + <!-- Expert: Merge Scheduler
  181 + The Merge Scheduler in Lucene controls how merges are
  182 + performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
  183 + can perform merges in the background using separate threads.
  184 + The SerialMergeScheduler (Lucene 2.2 default) does not.
  185 + -->
  186 + <!--
  187 + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
  188 + -->
  189 +
  190 + <!-- LockFactory
  191 +
  192 + This option specifies which Lucene LockFactory implementation
  193 + to use.
  194 +
  195 + single = SingleInstanceLockFactory - suggested for a
  196 + read-only index or when there is no possibility of
  197 + another process trying to modify the index.
  198 + native = NativeFSLockFactory - uses OS native file locking.
  199 + Do not use when multiple solr webapps in the same
  200 + JVM are attempting to share a single index.
  201 + simple = SimpleFSLockFactory - uses a plain file for locking
  202 +
  203 + Defaults: 'native' is default for Solr3.6 and later, otherwise
  204 + 'simple' is the default
  205 +
  206 + More details on the nuances of each LockFactory...
  207 + http://wiki.apache.org/lucene-java/AvailableLockFactories
  208 + -->
  209 + <!-- <lockType>native</lockType> -->
  210 +
  211 + <!-- Unlock On Startup
  212 +
  213 + If true, unlock any held write or commit locks on startup.
  214 + This defeats the locking mechanism that allows multiple
  215 + processes to safely access a lucene index, and should be used
  216 + with care. Default is "false".
  217 +
  218 + This is not needed if lock type is 'none' or 'single'
  219 + -->
  220 + <!--
  221 + <unlockOnStartup>false</unlockOnStartup>
  222 + -->
  223 +
  224 + <!-- Expert: Controls how often Lucene loads terms into memory
  225 + Default is 128 and is likely good for most everyone.
  226 + -->
  227 + <!-- <termIndexInterval>128</termIndexInterval> -->
  228 +
  229 + <!-- If true, IndexReaders will be reopened (often more efficient)
  230 + instead of closed and then opened. Default: true
  231 + -->
  232 + <!--
  233 + <reopenReaders>true</reopenReaders>
  234 + -->
  235 +
  236 + <!-- Commit Deletion Policy
  237 +
  238 + Custom deletion policies can be specified here. The class must
  239 + implement org.apache.lucene.index.IndexDeletionPolicy.
  240 +
  241 + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html
  242 +
  243 + The default Solr IndexDeletionPolicy implementation supports
  244 + deleting index commit points on number of commits, age of
  245 + commit point and optimized status.
  246 +
  247 + The latest commit point should always be preserved regardless
  248 + of the criteria.
  249 + -->
  250 + <!--
  251 + <deletionPolicy class="solr.SolrDeletionPolicy">
  252 + -->
  253 + <!-- The number of commit points to be kept -->
  254 + <!-- <str name="maxCommitsToKeep">1</str> -->
  255 + <!-- The number of optimized commit points to be kept -->
  256 + <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
  257 + <!--
  258 + Delete all commit points once they have reached the given age.
  259 + Supports DateMathParser syntax e.g.
  260 + -->
  261 + <!--
  262 + <str name="maxCommitAge">30MINUTES</str>
  263 + <str name="maxCommitAge">1DAY</str>
  264 + -->
  265 + <!--
  266 + </deletionPolicy>
  267 + -->
  268 +
  269 + <!-- Lucene Infostream
  270 +
  271 + To aid in advanced debugging, Lucene provides an "InfoStream"
  272 + of detailed information when indexing.
  273 +
  274 + Setting The value to true will instruct the underlying Lucene
  275 + IndexWriter to write its debugging info the specified file
  276 + -->
  277 + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
  278 + </indexConfig>
  279 +
  280 +
  281 + <!-- JMX
  282 +
  283 + This example enables JMX if and only if an existing MBeanServer
  284 + is found, use this if you want to configure JMX through JVM
  285 + parameters. Remove this to disable exposing Solr configuration
  286 + and statistics to JMX.
  287 +
  288 + For more details see http://wiki.apache.org/solr/SolrJmx
  289 + -->
  290 + <jmx />
  291 + <!-- If you want to connect to a particular server, specify the
  292 + agentId
  293 + -->
  294 + <!-- <jmx agentId="myAgent" /> -->
  295 + <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
  296 + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
  297 + -->
  298 +
  299 + <!-- The default high-performance update handler -->
  300 + <updateHandler class="solr.DirectUpdateHandler2">
  301 +
  302 + <!-- AutoCommit
  303 +
  304 + Perform a hard commit automatically under certain conditions.
  305 + Instead of enabling autoCommit, consider using "commitWithin"
  306 + when adding documents.
  307 +
  308 + http://wiki.apache.org/solr/UpdateXmlMessages
  309 +
  310 + maxDocs - Maximum number of documents to add since the last
  311 + commit before automatically triggering a new commit.
  312 +
  313 + maxTime - Maximum amount of time in ms that is allowed to pass
  314 + since a document was added before automaticly
  315 + triggering a new commit.
  316 + openSearcher - if false, the commit causes recent index changes
  317 + to be flushed to stable storage, but does not cause a new
  318 + searcher to be opened to make those changes visible.
  319 + -->
  320 + <autoCommit>
  321 + <maxTime>300000</maxTime>
  322 + <openSearcher>false</openSearcher>
  323 + </autoCommit>
  324 +
  325 + <!-- softAutoCommit is like autoCommit except it causes a
  326 + 'soft' commit which only ensures that changes are visible
  327 + but does not ensure that data is synced to disk. This is
  328 + faster and more near-realtime friendly than a hard commit.
  329 + -->
  330 + <!--
  331 + <autoSoftCommit>
  332 + <maxTime>1000</maxTime>
  333 + </autoSoftCommit>
  334 + -->
  335 +
  336 + <!-- Update Related Event Listeners
  337 +
  338 + Various IndexWriter related events can trigger Listeners to
  339 + take actions.
  340 +
  341 + postCommit - fired after every commit or optimize command
  342 + postOptimize - fired after every optimize command
  343 + -->
  344 + <!-- The RunExecutableListener executes an external command from a
  345 + hook such as postCommit or postOptimize.
  346 +
  347 + exe - the name of the executable to run
  348 + dir - dir to use as the current working directory. (default=".")
  349 + wait - the calling thread waits until the executable returns.
  350 + (default="true")
  351 + args - the arguments to pass to the program. (default is none)
  352 + env - environment variables to set. (default is none)
  353 + -->
  354 + <!-- This example shows how RunExecutableListener could be used
  355 + with the script based replication...
  356 + http://wiki.apache.org/solr/CollectionDistribution
  357 + -->
  358 + <!--
  359 + <listener event="postCommit" class="solr.RunExecutableListener">
  360 + <str name="exe">solr/bin/snapshooter</str>
  361 + <str name="dir">.</str>
  362 + <bool name="wait">true</bool>
  363 + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
  364 + <arr name="env"> <str>MYVAR=val1</str> </arr>
  365 + </listener>
  366 + -->
  367 +
  368 + <!-- Enables a transaction log, currently used for real-time get.
  369 + "dir" - the target directory for transaction logs, defaults to the
  370 + solr data directory. -->
  371 + <updateLog>
  372 + <str name="dir">${solr.data.dir:}</str>
  373 + </updateLog>
  374 +
  375 +
  376 + </updateHandler>
  377 +
  378 + <!-- IndexReaderFactory
  379 +
  380 + Use the following format to specify a custom IndexReaderFactory,
  381 + which allows for alternate IndexReader implementations.
  382 +
  383 + ** Experimental Feature **
  384 +
  385 + Please note - Using a custom IndexReaderFactory may prevent
  386 + certain other features from working. The API to
  387 + IndexReaderFactory may change without warning or may even be
  388 + removed from future releases if the problems cannot be
  389 + resolved.
  390 +
  391 +
  392 + ** Features that may not work with custom IndexReaderFactory **
  393 +
  394 + The ReplicationHandler assumes a disk-resident index. Using a
  395 + custom IndexReader implementation may cause incompatibility
  396 + with ReplicationHandler and may cause replication to not work
  397 + correctly. See SOLR-1366 for details.
  398 +
  399 + -->
  400 + <!--
  401 + <indexReaderFactory name="IndexReaderFactory" class="package.class">
  402 + <str name="someArg">Some Value</str>
  403 + </indexReaderFactory >
  404 + -->
  405 + <!-- By explicitly declaring the Factory, the termIndexDivisor can
  406 + be specified.
  407 + -->
  408 + <!--
  409 + <indexReaderFactory name="IndexReaderFactory"
  410 + class="solr.StandardIndexReaderFactory">
  411 + <int name="setTermIndexDivisor">12</int>
  412 + </indexReaderFactory >
  413 + -->
  414 +
  415 + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  416 + Query section - these settings control query time things like caches
  417 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
  418 + <query>
  419 + <!-- Max Boolean Clauses
  420 +
  421 + Maximum number of clauses in each BooleanQuery, an exception
  422 + is thrown if exceeded.
  423 +
  424 + ** WARNING **
  425 +
  426 + This option actually modifies a global Lucene property that
  427 + will affect all SolrCores. If multiple solrconfig.xml files
  428 + disagree on this property, the value at any given moment will
  429 + be based on the last SolrCore to be initialized.
  430 +
  431 + -->
  432 + <maxBooleanClauses>1024</maxBooleanClauses>
  433 +
  434 +
  435 + <!-- Solr Internal Query Caches
  436 +
  437 + There are two implementations of cache available for Solr,
  438 + LRUCache, based on a synchronized LinkedHashMap, and
  439 + FastLRUCache, based on a ConcurrentHashMap.
  440 +
  441 + FastLRUCache has faster gets and slower puts in single
  442 + threaded operation and thus is generally faster than LRUCache
  443 + when the hit ratio of the cache is high (> 75%), and may be
  444 + faster under other scenarios on multi-cpu systems.
  445 + -->
  446 +
  447 + <!-- Filter Cache
  448 +
  449 + Cache used by SolrIndexSearcher for filters (DocSets),
  450 + unordered sets of *all* documents that match a query. When a
  451 + new searcher is opened, its caches may be prepopulated or
  452 + "autowarmed" using data from caches in the old searcher.
  453 + autowarmCount is the number of items to prepopulate. For
  454 + LRUCache, the autowarmed items will be the most recently
  455 + accessed items.
  456 +
  457 + Parameters:
  458 + class - the SolrCache implementation LRUCache or
  459 + (LRUCache or FastLRUCache)
  460 + size - the maximum number of entries in the cache
  461 + initialSize - the initial capacity (number of entries) of
  462 + the cache. (see java.util.HashMap)
  463 + autowarmCount - the number of entries to prepopulate from
  464 + and old cache.
  465 + -->
  466 + <filterCache class="solr.FastLRUCache"
  467 + size="512"
  468 + initialSize="512"
  469 + autowarmCount="0"/>
  470 +
  471 + <!-- Query Result Cache
  472 +
  473 + Caches results of searches - ordered lists of document ids
  474 + (DocList) based on a query, a sort, and the range of documents requested.
  475 + -->
  476 + <queryResultCache class="solr.LRUCache"
  477 + size="512"
  478 + initialSize="512"
  479 + autowarmCount="0"/>
  480 +
  481 + <!-- Document Cache
  482 +
  483 + Caches Lucene Document objects (the stored fields for each
  484 + document). Since Lucene internal document ids are transient,
  485 + this cache will not be autowarmed.
  486 + -->
  487 + <documentCache class="solr.LRUCache"
  488 + size="512"
  489 + initialSize="512"
  490 + autowarmCount="0"/>
  491 +
  492 + <!-- Field Value Cache
  493 +
  494 + Cache used to hold field values that are quickly accessible
  495 + by document id. The fieldValueCache is created by default
  496 + even if not configured here.
  497 + -->
  498 + <!--
  499 + <fieldValueCache class="solr.FastLRUCache"
  500 + size="512"
  501 + autowarmCount="128"
  502 + showItems="32" />
  503 + -->
  504 +
  505 + <!-- Custom Cache
  506 +
  507 + Example of a generic cache. These caches may be accessed by
  508 + name through SolrIndexSearcher.getCache(),cacheLookup(), and
  509 + cacheInsert(). The purpose is to enable easy caching of
  510 + user/application level data. The regenerator argument should
  511 + be specified as an implementation of solr.CacheRegenerator
  512 + if autowarming is desired.
  513 + -->
  514 + <!--
  515 + <cache name="myUserCache"
  516 + class="solr.LRUCache"
  517 + size="4096"
  518 + initialSize="1024"
  519 + autowarmCount="1024"
  520 + regenerator="com.mycompany.MyRegenerator"
  521 + />
  522 + -->
  523 +
  524 +
  525 + <!-- Lazy Field Loading
  526 +
  527 + If true, stored fields that are not requested will be loaded
  528 + lazily. This can result in a significant speed improvement
  529 + if the usual case is to not load all stored fields,
  530 + especially if the skipped fields are large compressed text
  531 + fields.
  532 + -->
  533 + <enableLazyFieldLoading>true</enableLazyFieldLoading>
  534 +
  535 + <!-- Use Filter For Sorted Query
  536 +
  537 + A possible optimization that attempts to use a filter to
  538 + satisfy a search. If the requested sort does not include
  539 + score, then the filterCache will be checked for a filter
  540 + matching the query. If found, the filter will be used as the
  541 + source of document ids, and then the sort will be applied to
  542 + that.
  543 +
  544 + For most situations, this will not be useful unless you
  545 + frequently get the same search repeatedly with different sort
  546 + options, and none of them ever use "score"
  547 + -->
  548 + <!--
  549 + <useFilterForSortedQuery>true</useFilterForSortedQuery>
  550 + -->
  551 +
  552 + <!-- Result Window Size
  553 +
  554 + An optimization for use with the queryResultCache. When a search
  555 + is requested, a superset of the requested number of document ids
  556 + are collected. For example, if a search for a particular query
  557 + requests matching documents 10 through 19, and queryWindowSize is 50,
  558 + then documents 0 through 49 will be collected and cached. Any further
  559 + requests in that range can be satisfied via the cache.
  560 + -->
  561 + <queryResultWindowSize>20</queryResultWindowSize>
  562 +
  563 + <!-- Maximum number of documents to cache for any entry in the
  564 + queryResultCache.
  565 + -->
  566 + <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
  567 +
  568 + <!-- Query Related Event Listeners
  569 +
  570 + Various IndexSearcher related events can trigger Listeners to
  571 + take actions.
  572 +
  573 + newSearcher - fired whenever a new searcher is being prepared
  574 + and there is a current searcher handling requests (aka
  575 + registered). It can be used to prime certain caches to
  576 + prevent long request times for certain requests.
  577 +
  578 + firstSearcher - fired whenever a new searcher is being
  579 + prepared but there is no current registered searcher to handle
  580 + requests or to gain autowarming data from.
  581 +
  582 +
  583 + -->
  584 + <!-- QuerySenderListener takes an array of NamedList and executes a
  585 + local query request for each NamedList in sequence.
  586 + -->
  587 + <listener event="newSearcher" class="solr.QuerySenderListener">
  588 + <arr name="queries">
  589 + <!--
  590 + <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
  591 + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
  592 + -->
  593 + </arr>
  594 + </listener>
  595 + <listener event="firstSearcher" class="solr.QuerySenderListener">
  596 + <arr name="queries">
  597 + <lst>
  598 + <str name="q">static firstSearcher warming in solrconfig.xml</str>
  599 + </lst>
  600 + </arr>
  601 + </listener>
  602 +
  603 + <!-- Use Cold Searcher
  604 +
  605 + If a search request comes in and there is no current
  606 + registered searcher, then immediately register the still
  607 + warming searcher and use it. If "false" then all requests
  608 + will block until the first searcher is done warming.
  609 + -->
  610 + <useColdSearcher>false</useColdSearcher>
  611 +
  612 + <!-- Max Warming Searchers
  613 +
  614 + Maximum number of searchers that may be warming in the
  615 + background concurrently. An error is returned if this limit
  616 + is exceeded.
  617 +
  618 + Recommend values of 1-2 for read-only slaves, higher for
  619 + masters w/o cache warming.
  620 + -->
  621 + <maxWarmingSearchers>2</maxWarmingSearchers>
  622 +
  623 + </query>
  624 +
  625 +
  626 + <!-- Request Dispatcher
  627 +
  628 + This section contains instructions for how the SolrDispatchFilter
  629 + should behave when processing requests for this SolrCore.
  630 +
  631 + handleSelect is a legacy option that affects the behavior of requests
  632 + such as /select?qt=XXX
  633 +
  634 + handleSelect="true" will cause the SolrDispatchFilter to process
  635 + the request and dispatch the query to a handler specified by the
  636 + "qt" param, assuming "/select" isn't already registered.
  637 +
  638 + handleSelect="false" will cause the SolrDispatchFilter to
  639 + ignore "/select" requests, resulting in a 404 unless a handler
  640 + is explicitly registered with the name "/select"
  641 +
  642 + handleSelect="true" is not recommended for new users, but is the default
  643 + for backwards compatibility
  644 + -->
  645 + <requestDispatcher handleSelect="false" >
  646 + <!-- Request Parsing
  647 +
  648 + These settings indicate how Solr Requests may be parsed, and
  649 + what restrictions may be placed on the ContentStreams from
  650 + those requests
  651 +
  652 + enableRemoteStreaming - enables use of the stream.file
  653 + and stream.url parameters for specifying remote streams.
  654 +
  655 + multipartUploadLimitInKB - specifies the max size of
  656 + Multipart File Uploads that Solr will allow in a Request.
  657 +
  658 + *** WARNING ***
  659 + The settings below authorize Solr to fetch remote files, You
  660 + should make sure your system has some authentication before
  661 + using enableRemoteStreaming="true"
  662 +
  663 + -->
  664 + <requestParsers enableRemoteStreaming="true"
  665 + multipartUploadLimitInKB="2048000" />
  666 +
  667 + <!-- HTTP Caching
  668 +
  669 + Set HTTP caching related parameters (for proxy caches and clients).
  670 +
  671 + The options below instruct Solr not to output any HTTP Caching
  672 + related headers
  673 + -->
  674 + <httpCaching never304="true" />
  675 + <!-- If you include a <cacheControl> directive, it will be used to
  676 + generate a Cache-Control header (as well as an Expires header
  677 + if the value contains "max-age=")
  678 +
  679 + By default, no Cache-Control header is generated.
  680 +
  681 + You can use the <cacheControl> option even if you have set
  682 + never304="true"
  683 + -->
  684 + <!--
  685 + <httpCaching never304="true" >
  686 + <cacheControl>max-age=30, public</cacheControl>
  687 + </httpCaching>
  688 + -->
  689 + <!-- To enable Solr to respond with automatically generated HTTP
  690 + Caching headers, and to response to Cache Validation requests
  691 + correctly, set the value of never304="false"
  692 +
  693 + This will cause Solr to generate Last-Modified and ETag
  694 + headers based on the properties of the Index.
  695 +
  696 + The following options can also be specified to affect the
  697 + values of these headers...
  698 +
  699 + lastModFrom - the default value is "openTime" which means the
  700 + Last-Modified value (and validation against If-Modified-Since
  701 + requests) will all be relative to when the current Searcher
  702 + was opened. You can change it to lastModFrom="dirLastMod" if
  703 + you want the value to exactly correspond to when the physical
  704 + index was last modified.
  705 +
  706 + etagSeed="..." is an option you can change to force the ETag
  707 + header (and validation against If-None-Match requests) to be
  708 + different even if the index has not changed (ie: when making
  709 + significant changes to your config file)
  710 +
  711 + (lastModifiedFrom and etagSeed are both ignored if you use
  712 + the never304="true" option)
  713 + -->
  714 + <!--
  715 + <httpCaching lastModifiedFrom="openTime"
  716 + etagSeed="Solr">
  717 + <cacheControl>max-age=30, public</cacheControl>
  718 + </httpCaching>
  719 + -->
  720 + </requestDispatcher>
  721 +
  722 + <!-- Request Handlers
  723 +
  724 + http://wiki.apache.org/solr/SolrRequestHandler
  725 +
  726 + Incoming queries will be dispatched to a specific handler by name
  727 + based on the path specified in the request.
  728 +
  729 + Legacy behavior: If the request path uses "/select" but no Request
  730 + Handler has that name, and if handleSelect="true" has been specified in
  731 + the requestDispatcher, then the Request Handler is dispatched based on
  732 + the qt parameter. Handlers without a leading '/' are accessed this way
  733 + like so: http://host/app/[core/]select?qt=name If no qt is
  734 + given, then the requestHandler that declares default="true" will be
  735 + used or the one named "standard".
  736 +
  737 + If a Request Handler is declared with startup="lazy", then it will
  738 + not be initialized until the first request that uses it.
  739 +
  740 + -->
  741 + <!-- SearchHandler
  742 +
  743 + http://wiki.apache.org/solr/SearchHandler
  744 +
  745 + For processing Search Queries, the primary Request Handler
  746 + provided with Solr is "SearchHandler" It delegates to a sequent
  747 + of SearchComponents (see below) and supports distributed
  748 + queries across multiple shards
  749 + -->
  750 + <requestHandler name="/select" class="solr.SearchHandler">
  751 + <!-- default values for query parameters can be specified, these
  752 + will be overridden by parameters in the request
  753 + -->
  754 + <lst name="defaults">
  755 + <str name="echoParams">explicit</str>
  756 + <int name="rows">10</int>
  757 + <str name="df">text</str>
  758 + </lst>
  759 + <!-- In addition to defaults, "appends" params can be specified
  760 + to identify values which should be appended to the list of
  761 + multi-val params from the query (or the existing "defaults").
  762 + -->
  763 + <!-- In this example, the param "fq=instock:true" would be appended to
  764 + any query time fq params the user may specify, as a mechanism for
  765 + partitioning the index, independent of any user selected filtering
  766 + that may also be desired (perhaps as a result of faceted searching).
  767 +
  768 + NOTE: there is *absolutely* nothing a client can do to prevent these
  769 + "appends" values from being used, so don't use this mechanism
  770 + unless you are sure you always want it.
  771 + -->
  772 + <!--
  773 + <lst name="appends">
  774 + <str name="fq">inStock:true</str>
  775 + </lst>
  776 + -->
  777 + <!-- "invariants" are a way of letting the Solr maintainer lock down
  778 + the options available to Solr clients. Any params values
  779 + specified here are used regardless of what values may be specified
  780 + in either the query, the "defaults", or the "appends" params.
  781 +
  782 + In this example, the facet.field and facet.query params would
  783 + be fixed, limiting the facets clients can use. Faceting is
  784 + not turned on by default - but if the client does specify
  785 + facet=true in the request, these are the only facets they
  786 + will be able to see counts for; regardless of what other
  787 + facet.field or facet.query params they may specify.
  788 +
  789 + NOTE: there is *absolutely* nothing a client can do to prevent these
  790 + "invariants" values from being used, so don't use this mechanism
  791 + unless you are sure you always want it.
  792 + -->
  793 + <!--
  794 + <lst name="invariants">
  795 + <str name="facet.field">cat</str>
  796 + <str name="facet.field">manu_exact</str>
  797 + <str name="facet.query">price:[* TO 500]</str>
  798 + <str name="facet.query">price:[500 TO *]</str>
  799 + </lst>
  800 + -->
  801 + <!-- If the default list of SearchComponents is not desired, that
  802 + list can either be overridden completely, or components can be
  803 + prepended or appended to the default list. (see below)
  804 + -->
  805 + <!--
  806 + <arr name="components">
  807 + <str>nameOfCustomComponent1</str>
  808 + <str>nameOfCustomComponent2</str>
  809 + </arr>
  810 + -->
  811 +
  812 + </requestHandler>
  813 +
  814 + <!-- A request handler that returns indented JSON by default -->
  815 + <requestHandler name="/query" class="solr.SearchHandler">
  816 + <lst name="defaults">
  817 + <str name="echoParams">explicit</str>
  818 + <str name="wt">json</str>
  819 + <str name="indent">true</str>
  820 + <str name="df">text</str>
  821 + </lst>
  822 + </requestHandler>
  823 +
  824 +
  825 + <!-- realtime get handler, guaranteed to return the latest stored fields of
  826 + any document, without the need to commit or open a new searcher. The
  827 + current implementation relies on the updateLog feature being enabled. -->
  828 + <requestHandler name="/get" class="solr.RealTimeGetHandler">
  829 + <lst name="defaults">
  830 + <str name="omitHeader">true</str>
  831 + <str name="wt">json</str>
  832 + <str name="indent">true</str>
  833 + </lst>
  834 + </requestHandler>
  835 +
  836 +
  837 + <!-- A Robust Example
  838 +
  839 + This example SearchHandler declaration shows off usage of the
  840 + SearchHandler with many defaults declared
  841 +
  842 + Note that multiple instances of the same Request Handler
  843 + (SearchHandler) can be registered multiple times with different
  844 + names (and different init parameters)
  845 + -->
  846 + <requestHandler name="/browse" class="solr.SearchHandler">
  847 + <lst name="defaults">
  848 + <str name="echoParams">explicit</str>
  849 +
  850 + <!-- VelocityResponseWriter settings -->
  851 + <str name="wt">velocity</str>
  852 + <str name="v.template">browse</str>
  853 + <str name="v.layout">layout</str>
  854 + <str name="title">Solritas</str>
  855 +
  856 + <!-- Query settings -->
  857 + <str name="defType">edismax</str>
  858 + <str name="qf">
  859 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  860 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
  861 + </str>
  862 + <str name="df">text</str>
  863 + <str name="mm">100%</str>
  864 + <str name="q.alt">*:*</str>
  865 + <str name="rows">10</str>
  866 + <str name="fl">*,score</str>
  867 +
  868 + <str name="mlt.qf">
  869 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  870 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
  871 + </str>
  872 + <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
  873 + <int name="mlt.count">3</int>
  874 +
  875 + <!-- Faceting defaults -->
  876 + <str name="facet">on</str>
  877 + <str name="facet.field">cat</str>
  878 + <str name="facet.field">manu_exact</str>
  879 + <str name="facet.field">content_type</str>
  880 + <str name="facet.field">author_s</str>
  881 + <str name="facet.query">ipod</str>
  882 + <str name="facet.query">GB</str>
  883 + <str name="facet.mincount">1</str>
  884 + <str name="facet.pivot">cat,inStock</str>
  885 + <str name="facet.range.other">after</str>
  886 + <str name="facet.range">price</str>
  887 + <int name="f.price.facet.range.start">0</int>
  888 + <int name="f.price.facet.range.end">600</int>
  889 + <int name="f.price.facet.range.gap">50</int>
  890 + <str name="facet.range">popularity</str>
  891 + <int name="f.popularity.facet.range.start">0</int>
  892 + <int name="f.popularity.facet.range.end">10</int>
  893 + <int name="f.popularity.facet.range.gap">3</int>
  894 + <str name="facet.range">manufacturedate_dt</str>
  895 + <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
  896 + <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
  897 + <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
  898 + <str name="f.manufacturedate_dt.facet.range.other">before</str>
  899 + <str name="f.manufacturedate_dt.facet.range.other">after</str>
  900 +
  901 + <!-- Highlighting defaults -->
  902 + <str name="hl">on</str>
  903 + <str name="hl.fl">content</str>
  904 + <str name="hl.encoder">html</str>
  905 + <str name="hl.simple.pre"><![CDATA[<b>]]></str>
  906 + <str name="hl.simple.post"><![CDATA[</b>]]></str>
  907 + <str name="f.title.hl.fragsize">0</str>
  908 + <str name="f.title.hl.alternateField">title</str>
  909 + <str name="f.name.hl.fragsize">0</str>
  910 + <str name="f.name.hl.alternateField">name</str>
  911 + <str name="f.content.hl.snippets">3</str>
  912 + <str name="f.content.hl.fragsize">1000</str>
  913 + <str name="f.content.hl.alternateField">content</str>
  914 + <str name="f.content.hl.maxAlternateFieldLength">250</str>
  915 +
  916 +
  917 + <!-- Spell checking defaults -->
  918 + <str name="spellcheck">on</str>
  919 + <str name="spellcheck.extendedResults">false</str>
  920 + <str name="spellcheck.count">5</str>
  921 + <str name="spellcheck.alternativeTermCount">2</str>
  922 + <str name="spellcheck.maxResultsForSuggest">5</str>
  923 + <str name="spellcheck.collate">true</str>
  924 + <str name="spellcheck.collateExtendedResults">true</str>
  925 + <str name="spellcheck.maxCollationTries">5</str>
  926 + <str name="spellcheck.maxCollations">3</str>
  927 + </lst>
  928 +
  929 + <!-- append spellchecking to our list of components -->
  930 + <arr name="last-components">
  931 + <str>spellcheck</str>
  932 + </arr>
  933 + </requestHandler>
  934 +
  935 +
  936 + <!-- Update Request Handler.
  937 +
  938 + http://wiki.apache.org/solr/UpdateXmlMessages
  939 +
  940 + The canonical Request Handler for Modifying the Index through
  941 + commands specified using XML, JSON, CSV, or JAVABIN
  942 +
  943 + Note: Since solr1.1 requestHandlers requires a valid content
  944 + type header if posted in the body. For example, curl now
  945 + requires: -H 'Content-type:text/xml; charset=utf-8'
  946 +
  947 + To override the request content type and force a specific
  948 + Content-type, use the request parameter:
  949 + ?update.contentType=text/csv
  950 +
  951 + This handler will pick a response format to match the input
  952 + if the 'wt' parameter is not explicit
  953 + -->
  954 + <requestHandler name="/update" class="solr.UpdateRequestHandler">
  955 + <!-- See below for information on defining
  956 + updateRequestProcessorChains that can be used by name
  957 + on each Update Request
  958 + -->
  959 + <!--
  960 + <lst name="defaults">
  961 + <str name="update.chain">dedupe</str>
  962 + </lst>
  963 + -->
  964 + </requestHandler>
  965 +
  966 + <!-- for back compat with clients using /update/json and /update/csv -->
  967 + <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
  968 + <lst name="defaults">
  969 + <str name="stream.contentType">application/json</str>
  970 + </lst>
  971 + </requestHandler>
  972 + <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
  973 + <lst name="defaults">
  974 + <str name="stream.contentType">application/csv</str>
  975 + </lst>
  976 + </requestHandler>
  977 +
  978 + <!-- Solr Cell Update Request Handler
  979 +
  980 + http://wiki.apache.org/solr/ExtractingRequestHandler
  981 +
  982 + -->
  983 + <requestHandler name="/update/extract"
  984 + startup="lazy"
  985 + class="solr.extraction.ExtractingRequestHandler" >
  986 + <lst name="defaults">
  987 + <str name="lowernames">true</str>
  988 + <str name="uprefix">ignored_</str>
  989 +
  990 + <!-- capture link hrefs but ignore div attributes -->
  991 + <str name="captureAttr">true</str>
  992 + <str name="fmap.a">links</str>
  993 + <str name="fmap.div">ignored_</str>
  994 + </lst>
  995 + </requestHandler>
  996 +
  997 +
  998 + <!-- Field Analysis Request Handler
  999 +
  1000 + RequestHandler that provides much the same functionality as
  1001 + analysis.jsp. Provides the ability to specify multiple field
  1002 + types and field names in the same request and outputs
  1003 + index-time and query-time analysis for each of them.
  1004 +
  1005 + Request parameters are:
  1006 + analysis.fieldname - field name whose analyzers are to be used
  1007 +
  1008 + analysis.fieldtype - field type whose analyzers are to be used
  1009 + analysis.fieldvalue - text for index-time analysis
  1010 + q (or analysis.q) - text for query time analysis
  1011 + analysis.showmatch (true|false) - When set to true and when
  1012 + query analysis is performed, the produced tokens of the
  1013 + field value analysis will be marked as "matched" for every
  1014 + token that is produces by the query analysis
  1015 + -->
  1016 + <requestHandler name="/analysis/field"
  1017 + startup="lazy"
  1018 + class="solr.FieldAnalysisRequestHandler" />
  1019 +
  1020 +
  1021 + <!-- Document Analysis Handler
  1022 +
  1023 + http://wiki.apache.org/solr/AnalysisRequestHandler
  1024 +
  1025 + An analysis handler that provides a breakdown of the analysis
  1026 + process of provided documents. This handler expects a (single)
  1027 + content stream with the following format:
  1028 +
  1029 + <docs>
  1030 + <doc>
  1031 + <field name="id">1</field>
  1032 + <field name="name">The Name</field>
  1033 + <field name="text">The Text Value</field>
  1034 + </doc>
  1035 + <doc>...</doc>
  1036 + <doc>...</doc>
  1037 + ...
  1038 + </docs>
  1039 +
  1040 + Note: Each document must contain a field which serves as the
  1041 + unique key. This key is used in the returned response to associate
  1042 + an analysis breakdown to the analyzed document.
  1043 +
  1044 + Like the FieldAnalysisRequestHandler, this handler also supports
  1045 + query analysis by sending either an "analysis.query" or "q"
  1046 + request parameter that holds the query text to be analyzed. It
  1047 + also supports the "analysis.showmatch" parameter which when set to
  1048 + true, all field tokens that match the query tokens will be marked
  1049 + as a "match".
  1050 + -->
  1051 + <requestHandler name="/analysis/document"
  1052 + class="solr.DocumentAnalysisRequestHandler"
  1053 + startup="lazy" />
  1054 +
  1055 + <!-- Admin Handlers
  1056 +
  1057 + Admin Handlers - This will register all the standard admin
  1058 + RequestHandlers.
  1059 + -->
  1060 + <requestHandler name="/admin/"
  1061 + class="solr.admin.AdminHandlers" />
  1062 + <!-- This single handler is equivalent to the following... -->
  1063 + <!--
  1064 + <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />
  1065 + <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />
  1066 + <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />
  1067 + <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />
  1068 + <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
  1069 + <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >
  1070 + -->
  1071 + <!-- If you wish to hide files under ${solr.home}/conf, explicitly
  1072 + register the ShowFileRequestHandler using:
  1073 + -->
  1074 + <!--
  1075 + <requestHandler name="/admin/file"
  1076 + class="solr.admin.ShowFileRequestHandler" >
  1077 + <lst name="invariants">
  1078 + <str name="hidden">synonyms.txt</str>
  1079 + <str name="hidden">anotherfile.txt</str>
  1080 + </lst>
  1081 + </requestHandler>
  1082 + -->
  1083 +
  1084 + <!-- ping/healthcheck -->
  1085 + <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
  1086 + <lst name="invariants">
  1087 + <str name="q">solrpingquery</str>
  1088 + </lst>
  1089 + <lst name="defaults">
  1090 + <str name="echoParams">all</str>
  1091 + </lst>
  1092 + <!-- An optional feature of the PingRequestHandler is to configure the
  1093 + handler with a "healthcheckFile" which can be used to enable/disable
  1094 + the PingRequestHandler.
  1095 + relative paths are resolved against the data dir
  1096 + -->
  1097 + <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
  1098 + </requestHandler>
  1099 +
  1100 + <!-- Echo the request contents back to the client -->
  1101 + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
  1102 + <lst name="defaults">
  1103 + <str name="echoParams">explicit</str>
  1104 + <str name="echoHandler">true</str>
  1105 + </lst>
  1106 + </requestHandler>
  1107 +
  1108 + <!-- Solr Replication
  1109 +
  1110 + The SolrReplicationHandler supports replicating indexes from a
  1111 + "master" used for indexing and "slaves" used for queries.
  1112 +
  1113 + http://wiki.apache.org/solr/SolrReplication
  1114 +
  1115 + It is also neccessary for SolrCloud to function (in Cloud mode, the
  1116 + replication handler is used to bulk transfer segments when nodes
  1117 + are added or need to recover).
  1118 +
  1119 + https://wiki.apache.org/solr/SolrCloud/
  1120 + -->
  1121 + <requestHandler name="/replication" class="solr.ReplicationHandler" >
  1122 + <!--
  1123 + To enable simple master/slave replication, uncomment one of the
  1124 + sections below, depending on wether this solr instance should be
  1125 + the "master" or a "slave". If this instance is a "slave" you will
  1126 + also need to fill in the masterUrl to point to a real machine.
  1127 + -->
  1128 + <!--
  1129 + <lst name="master">
  1130 + <str name="replicateAfter">commit</str>
  1131 + <str name="replicateAfter">startup</str>
  1132 + <str name="confFiles">schema.xml,stopwords.txt</str>
  1133 + </lst>
  1134 + -->
  1135 + <!--
  1136 + <lst name="slave">
  1137 + <str name="masterUrl">http://your-master-hostname:8983/solr</str>
  1138 + <str name="pollInterval">00:00:60</str>
  1139 + </lst>
  1140 + -->
  1141 + </requestHandler>
  1142 +
  1143 + <!-- Search Components
  1144 +
  1145 + Search components are registered to SolrCore and used by
  1146 + instances of SearchHandler (which can access them by name)
  1147 +
  1148 + By default, the following components are available:
  1149 +
  1150 + <searchComponent name="query" class="solr.QueryComponent" />
  1151 + <searchComponent name="facet" class="solr.FacetComponent" />
  1152 + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
  1153 + <searchComponent name="highlight" class="solr.HighlightComponent" />
  1154 + <searchComponent name="stats" class="solr.StatsComponent" />
  1155 + <searchComponent name="debug" class="solr.DebugComponent" />
  1156 +
  1157 + Default configuration in a requestHandler would look like:
  1158 +
  1159 + <arr name="components">
  1160 + <str>query</str>
  1161 + <str>facet</str>
  1162 + <str>mlt</str>
  1163 + <str>highlight</str>
  1164 + <str>stats</str>
  1165 + <str>debug</str>
  1166 + </arr>
  1167 +
  1168 + If you register a searchComponent to one of the standard names,
  1169 + that will be used instead of the default.
  1170 +
  1171 + To insert components before or after the 'standard' components, use:
  1172 +
  1173 + <arr name="first-components">
  1174 + <str>myFirstComponentName</str>
  1175 + </arr>
  1176 +
  1177 + <arr name="last-components">
  1178 + <str>myLastComponentName</str>
  1179 + </arr>
  1180 +
  1181 + NOTE: The component registered with the name "debug" will
  1182 + always be executed after the "last-components"
  1183 +
  1184 + -->
  1185 +
  1186 + <!-- Spell Check
  1187 +
  1188 + The spell check component can return a list of alternative spelling
  1189 + suggestions.
  1190 +
  1191 + http://wiki.apache.org/solr/SpellCheckComponent
  1192 + -->
  1193 + <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
  1194 +
  1195 + <str name="queryAnalyzerFieldType">textSpell</str>
  1196 +
  1197 + <!-- Multiple "Spell Checkers" can be declared and used by this
  1198 + component
  1199 + -->
  1200 +
  1201 + <!-- a spellchecker built from a field of the main index -->
  1202 + <lst name="spellchecker">
  1203 + <str name="name">default</str>
  1204 + <str name="field">name</str>
  1205 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1206 + <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
  1207 + <str name="distanceMeasure">internal</str>
  1208 + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
  1209 + <float name="accuracy">0.5</float>
  1210 + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
  1211 + <int name="maxEdits">2</int>
  1212 + <!-- the minimum shared prefix when enumerating terms -->
  1213 + <int name="minPrefix">1</int>
  1214 + <!-- maximum number of inspections per result. -->
  1215 + <int name="maxInspections">5</int>
  1216 + <!-- minimum length of a query term to be considered for correction -->
  1217 + <int name="minQueryLength">4</int>
  1218 + <!-- maximum threshold of documents a query term can appear to be considered for correction -->
  1219 + <float name="maxQueryFrequency">0.01</float>
  1220 + <!-- uncomment this to require suggestions to occur in 1% of the documents
  1221 + <float name="thresholdTokenFrequency">.01</float>
  1222 + -->
  1223 + </lst>
  1224 +
  1225 + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
  1226 + <lst name="spellchecker">
  1227 + <str name="name">wordbreak</str>
  1228 + <str name="classname">solr.WordBreakSolrSpellChecker</str>
  1229 + <str name="field">name</str>
  1230 + <str name="combineWords">true</str>
  1231 + <str name="breakWords">true</str>
  1232 + <int name="maxChanges">10</int>
  1233 + </lst>
  1234 +
  1235 + <!-- a spellchecker that uses a different distance measure -->
  1236 + <!--
  1237 + <lst name="spellchecker">
  1238 + <str name="name">jarowinkler</str>
  1239 + <str name="field">spell</str>
  1240 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1241 + <str name="distanceMeasure">
  1242 + org.apache.lucene.search.spell.JaroWinklerDistance
  1243 + </str>
  1244 + </lst>
  1245 + -->
  1246 +
  1247 + <!-- a spellchecker that use an alternate comparator
  1248 +
  1249 + comparatorClass be one of:
  1250 + 1. score (default)
  1251 + 2. freq (Frequency first, then score)
  1252 + 3. A fully qualified class name
  1253 + -->
  1254 + <!--
  1255 + <lst name="spellchecker">
  1256 + <str name="name">freq</str>
  1257 + <str name="field">lowerfilt</str>
  1258 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1259 + <str name="comparatorClass">freq</str>
  1260 + -->
  1261 +
  1262 + <!-- A spellchecker that reads the list of words from a file -->
  1263 + <!--
  1264 + <lst name="spellchecker">
  1265 + <str name="classname">solr.FileBasedSpellChecker</str>
  1266 + <str name="name">file</str>
  1267 + <str name="sourceLocation">spellings.txt</str>
  1268 + <str name="characterEncoding">UTF-8</str>
  1269 + <str name="spellcheckIndexDir">spellcheckerFile</str>
  1270 + </lst>
  1271 + -->
  1272 + </searchComponent>
  1273 +
  1274 + <!-- A request handler for demonstrating the spellcheck component.
  1275 +
  1276 + NOTE: This is purely as an example. The whole purpose of the
  1277 + SpellCheckComponent is to hook it into the request handler that
  1278 + handles your normal user queries so that a separate request is
  1279 + not needed to get suggestions.
  1280 +
  1281 + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
  1282 + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
  1283 +
  1284 + See http://wiki.apache.org/solr/SpellCheckComponent for details
  1285 + on the request parameters.
  1286 + -->
  1287 + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
  1288 + <lst name="defaults">
  1289 + <str name="df">text</str>
  1290 + <!-- Solr will use suggestions from both the 'default' spellchecker
  1291 + and from the 'wordbreak' spellchecker and combine them.
  1292 + collations (re-written queries) can include a combination of
  1293 + corrections from both spellcheckers -->
  1294 + <str name="spellcheck.dictionary">default</str>
  1295 + <str name="spellcheck.dictionary">wordbreak</str>
  1296 + <str name="spellcheck">on</str>
  1297 + <str name="spellcheck.extendedResults">true</str>
  1298 + <str name="spellcheck.count">10</str>
  1299 + <str name="spellcheck.alternativeTermCount">5</str>
  1300 + <str name="spellcheck.maxResultsForSuggest">5</str>
  1301 + <str name="spellcheck.collate">true</str>
  1302 + <str name="spellcheck.collateExtendedResults">true</str>
  1303 + <str name="spellcheck.maxCollationTries">10</str>
  1304 + <str name="spellcheck.maxCollations">5</str>
  1305 + </lst>
  1306 + <arr name="last-components">
  1307 + <str>spellcheck</str>
  1308 + </arr>
  1309 + </requestHandler>
  1310 +
  1311 + <!-- Term Vector Component
  1312 +
  1313 + http://wiki.apache.org/solr/TermVectorComponent
  1314 + -->
  1315 + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
  1316 +
  1317 + <!-- A request handler for demonstrating the term vector component
  1318 +
  1319 + This is purely as an example.
  1320 +
  1321 + In reality you will likely want to add the component to your
  1322 + already specified request handlers.
  1323 + -->
  1324 + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
  1325 + <lst name="defaults">
  1326 + <str name="df">text</str>
  1327 + <bool name="tv">true</bool>
  1328 + </lst>
  1329 + <arr name="last-components">
  1330 + <str>tvComponent</str>
  1331 + </arr>
  1332 + </requestHandler>
  1333 +
  1334 + <!-- Clustering Component
  1335 +
  1336 + http://wiki.apache.org/solr/ClusteringComponent
  1337 +
  1338 + You'll need to set the solr.cluster.enabled system property
  1339 + when running solr to run with clustering enabled:
  1340 +
  1341 + java -Dsolr.clustering.enabled=true -jar start.jar
  1342 +
  1343 + -->
  1344 + <searchComponent name="clustering"
  1345 + enable="${solr.clustering.enabled:false}"
  1346 + class="solr.clustering.ClusteringComponent" >
  1347 + <!-- Declare an engine -->
  1348 + <lst name="engine">
  1349 + <!-- The name, only one can be named "default" -->
  1350 + <str name="name">default</str>
  1351 +
  1352 + <!-- Class name of Carrot2 clustering algorithm.
  1353 +
  1354 + Currently available algorithms are:
  1355 +
  1356 + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
  1357 + * org.carrot2.clustering.stc.STCClusteringAlgorithm
  1358 + * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
  1359 +
  1360 + See http://project.carrot2.org/algorithms.html for the
  1361 + algorithm's characteristics.
  1362 + -->
  1363 + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
  1364 +
  1365 + <!-- Overriding values for Carrot2 default algorithm attributes.
  1366 +
  1367 + For a description of all available attributes, see:
  1368 + http://download.carrot2.org/stable/manual/#chapter.components.
  1369 + Use attribute key as name attribute of str elements
  1370 + below. These can be further overridden for individual
  1371 + requests by specifying attribute key as request parameter
  1372 + name and attribute value as parameter value.
  1373 + -->
  1374 + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
  1375 +
  1376 + <!-- Location of Carrot2 lexical resources.
  1377 +
  1378 + A directory from which to load Carrot2-specific stop words
  1379 + and stop labels. Absolute or relative to Solr config directory.
  1380 + If a specific resource (e.g. stopwords.en) is present in the
  1381 + specified dir, it will completely override the corresponding
  1382 + default one that ships with Carrot2.
  1383 +
  1384 + For an overview of Carrot2 lexical resources, see:
  1385 + http://download.carrot2.org/head/manual/#chapter.lexical-resources
  1386 + -->
  1387 + <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
  1388 +
  1389 + <!-- The language to assume for the documents.
  1390 +
  1391 + For a list of allowed values, see:
  1392 + http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
  1393 + -->
  1394 + <str name="MultilingualClustering.defaultLanguage">PORTUGUESE</str>
  1395 + </lst>
  1396 + <lst name="engine">
  1397 + <str name="name">stc</str>
  1398 + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
  1399 + </lst>
  1400 + </searchComponent>
  1401 +
  1402 + <!-- A request handler for demonstrating the clustering component
  1403 +
  1404 + This is purely as an example.
  1405 +
  1406 + In reality you will likely want to add the component to your
  1407 + already specified request handlers.
  1408 + -->
  1409 + <requestHandler name="/clustering"
  1410 + startup="lazy"
  1411 + enable="${solr.clustering.enabled:false}"
  1412 + class="solr.SearchHandler">
  1413 + <lst name="defaults">
  1414 + <bool name="clustering">true</bool>
  1415 + <str name="clustering.engine">default</str>
  1416 + <bool name="clustering.results">true</bool>
  1417 + <!-- The title field -->
  1418 + <str name="carrot.title">name</str>
  1419 + <str name="carrot.url">id</str>
  1420 + <!-- The field to cluster on -->
  1421 + <str name="carrot.snippet">features</str>
  1422 + <!-- produce summaries -->
  1423 + <bool name="carrot.produceSummary">true</bool>
  1424 + <!-- the maximum number of labels per cluster -->
  1425 + <!--<int name="carrot.numDescriptions">5</int>-->
  1426 + <!-- produce sub clusters -->
  1427 + <bool name="carrot.outputSubClusters">false</bool>
  1428 +
  1429 + <str name="defType">edismax</str>
  1430 + <str name="qf">
  1431 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  1432 + </str>
  1433 + <str name="q.alt">*:*</str>
  1434 + <str name="rows">10</str>
  1435 + <str name="fl">*,score</str>
  1436 + </lst>
  1437 + <arr name="last-components">
  1438 + <str>clustering</str>
  1439 + </arr>
  1440 + </requestHandler>
  1441 +
  1442 + <!-- Terms Component
  1443 +
  1444 + http://wiki.apache.org/solr/TermsComponent
  1445 +
  1446 + A component to return terms and document frequency of those
  1447 + terms
  1448 + -->
  1449 + <searchComponent name="terms" class="solr.TermsComponent"/>
  1450 +
  1451 + <!-- A request handler for demonstrating the terms component -->
  1452 + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
  1453 + <lst name="defaults">
  1454 + <bool name="terms">true</bool>
  1455 + <bool name="distrib">false</bool>
  1456 + </lst>
  1457 + <arr name="components">
  1458 + <str>terms</str>
  1459 + </arr>
  1460 + </requestHandler>
  1461 +
  1462 +
  1463 + <!-- Query Elevation Component
  1464 +
  1465 + http://wiki.apache.org/solr/QueryElevationComponent
  1466 +
  1467 + a search component that enables you to configure the top
  1468 + results for a given query regardless of the normal lucene
  1469 + scoring.
  1470 + -->
  1471 + <searchComponent name="elevator" class="solr.QueryElevationComponent" >
  1472 + <!-- pick a fieldType to analyze queries -->
  1473 + <str name="queryFieldType">string</str>
  1474 + <str name="config-file">elevate.xml</str>
  1475 + </searchComponent>
  1476 +
  1477 + <!-- A request handler for demonstrating the elevator component -->
  1478 + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
  1479 + <lst name="defaults">
  1480 + <str name="echoParams">explicit</str>
  1481 + <str name="df">text</str>
  1482 + </lst>
  1483 + <arr name="last-components">
  1484 + <str>elevator</str>
  1485 + </arr>
  1486 + </requestHandler>
  1487 +
  1488 + <!-- Highlighting Component
  1489 +
  1490 + http://wiki.apache.org/solr/HighlightingParameters
  1491 + -->
  1492 + <searchComponent class="solr.HighlightComponent" name="highlight">
  1493 + <highlighting>
  1494 + <!-- Configure the standard fragmenter -->
  1495 + <!-- This could most likely be commented out in the "default" case -->
  1496 + <fragmenter name="gap"
  1497 + default="true"
  1498 + class="solr.highlight.GapFragmenter">
  1499 + <lst name="defaults">
  1500 + <int name="hl.fragsize">250</int>
  1501 + </lst>
  1502 + </fragmenter>
  1503 +
  1504 + <!-- A regular-expression-based fragmenter
  1505 + (for sentence extraction)
  1506 + -->
  1507 + <fragmenter name="regex"
  1508 + class="solr.highlight.RegexFragmenter">
  1509 + <lst name="defaults">
  1510 + <!-- slightly smaller fragsizes work better because of slop -->
  1511 + <int name="hl.fragsize">250</int>
  1512 + <!-- allow 50% slop on fragment sizes -->
  1513 + <float name="hl.regex.slop">0.5</float>
  1514 + <!-- a basic sentence pattern -->
  1515 + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
  1516 + </lst>
  1517 + </fragmenter>
  1518 +
  1519 + <!-- Configure the standard formatter -->
  1520 + <formatter name="html"
  1521 + default="true"
  1522 + class="solr.highlight.HtmlFormatter">
  1523 + <lst name="defaults">
  1524 + <str name="hl.simple.pre"><![CDATA[<b>]]></str>
  1525 + <str name="hl.simple.post"><![CDATA[</b>]]></str>
  1526 + </lst>
  1527 + </formatter>
  1528 +
  1529 + <!-- Configure the standard encoder -->
  1530 + <encoder name="html"
  1531 + class="solr.highlight.HtmlEncoder" />
  1532 +
  1533 + <!-- Configure the standard fragListBuilder -->
  1534 + <fragListBuilder name="simple"
  1535 + class="solr.highlight.SimpleFragListBuilder"/>
  1536 +
  1537 + <!-- Configure the single fragListBuilder -->
  1538 + <fragListBuilder name="single"
  1539 + class="solr.highlight.SingleFragListBuilder"/>
  1540 +
  1541 + <!-- Configure the weighted fragListBuilder -->
  1542 + <fragListBuilder name="weighted"
  1543 + default="true"
  1544 + class="solr.highlight.WeightedFragListBuilder"/>
  1545 +
  1546 + <!-- default tag FragmentsBuilder -->
  1547 + <fragmentsBuilder name="default"
  1548 + default="true"
  1549 + class="solr.highlight.ScoreOrderFragmentsBuilder">
  1550 + <!--
  1551 + <lst name="defaults">
  1552 + <str name="hl.multiValuedSeparatorChar">/</str>
  1553 + </lst>
  1554 + -->
  1555 + </fragmentsBuilder>
  1556 +
  1557 + <!-- multi-colored tag FragmentsBuilder -->
  1558 + <fragmentsBuilder name="colored"
  1559 + class="solr.highlight.ScoreOrderFragmentsBuilder">
  1560 + <lst name="defaults">
  1561 + <str name="hl.tag.pre"><![CDATA[
  1562 + <b style="background:yellow">,<b style="background:lawgreen">,
  1563 + <b style="background:aquamarine">,<b style="background:magenta">,
  1564 + <b style="background:palegreen">,<b style="background:coral">,
  1565 + <b style="background:wheat">,<b style="background:khaki">,
  1566 + <b style="background:lime">,<b style="background:deepskyblue">]]></str>
  1567 + <str name="hl.tag.post"><![CDATA[</b>]]></str>
  1568 + </lst>
  1569 + </fragmentsBuilder>
  1570 +
  1571 + <boundaryScanner name="default"
  1572 + default="true"
  1573 + class="solr.highlight.SimpleBoundaryScanner">
  1574 + <lst name="defaults">
  1575 + <str name="hl.bs.maxScan">10</str>
  1576 + <str name="hl.bs.chars">.,!?
  1577 +
  1578 +</str>
  1579 + </lst>
  1580 + </boundaryScanner>
  1581 +
  1582 + <boundaryScanner name="breakIterator"
  1583 + class="solr.highlight.BreakIteratorBoundaryScanner">
  1584 + <lst name="defaults">
  1585 + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
  1586 + <str name="hl.bs.type">WORD</str>
  1587 + <!-- language and country are used when constructing Locale object. -->
  1588 + <!-- And the Locale object will be used when getting instance of BreakIterator -->
  1589 + <str name="hl.bs.language">pt</str>
  1590 + <str name="hl.bs.country">BR</str>
  1591 + </lst>
  1592 + </boundaryScanner>
  1593 + </highlighting>
  1594 + </searchComponent>
  1595 +
  1596 + <!-- Update Processors
  1597 +
  1598 + Chains of Update Processor Factories for dealing with Update
  1599 + Requests can be declared, and then used by name in Update
  1600 + Request Processors
  1601 +
  1602 + http://wiki.apache.org/solr/UpdateRequestProcessor
  1603 +
  1604 + -->
  1605 + <!-- Deduplication
  1606 +
  1607 + An example dedup update processor that creates the "id" field
  1608 + on the fly based on the hash code of some other fields. This
  1609 + example has overwriteDupes set to false since we are using the
  1610 + id field as the signatureField and Solr will maintain
  1611 + uniqueness based on that anyway.
  1612 +
  1613 + -->
  1614 + <!--
  1615 + <updateRequestProcessorChain name="dedupe">
  1616 + <processor class="solr.processor.SignatureUpdateProcessorFactory">
  1617 + <bool name="enabled">true</bool>
  1618 + <str name="signatureField">id</str>
  1619 + <bool name="overwriteDupes">false</bool>
  1620 + <str name="fields">name,features,cat</str>
  1621 + <str name="signatureClass">solr.processor.Lookup3Signature</str>
  1622 + </processor>
  1623 + <processor class="solr.LogUpdateProcessorFactory" />
  1624 + <processor class="solr.RunUpdateProcessorFactory" />
  1625 + </updateRequestProcessorChain>
  1626 + -->
  1627 +
  1628 + <!-- Language identification
  1629 +
  1630 + This example update chain identifies the language of the incoming
  1631 + documents using the langid contrib. The detected language is
  1632 + written to field language_s. No field name mapping is done.
  1633 + The fields used for detection are text, title, subject and description,
  1634 + making this example suitable for detecting languages form full-text
  1635 + rich documents injected via ExtractingRequestHandler.
  1636 + See more about langId at http://wiki.apache.org/solr/LanguageDetection
  1637 + -->
  1638 + <!--
  1639 + <updateRequestProcessorChain name="langid">
  1640 + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
  1641 + <str name="langid.fl">text,title,subject,description</str>
  1642 + <str name="langid.langField">language_s</str>
  1643 + <str name="langid.fallback">en</str>
  1644 + </processor>
  1645 + <processor class="solr.LogUpdateProcessorFactory" />
  1646 + <processor class="solr.RunUpdateProcessorFactory" />
  1647 + </updateRequestProcessorChain>
  1648 + -->
  1649 +
  1650 + <!-- Script update processor
  1651 +
  1652 + This example hooks in an update processor implemented using JavaScript.
  1653 +
  1654 + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
  1655 + -->
  1656 + <!--
  1657 + <updateRequestProcessorChain name="script">
  1658 + <processor class="solr.StatelessScriptUpdateProcessorFactory">
  1659 + <str name="script">update-script.js</str>
  1660 + <lst name="params">
  1661 + <str name="config_param">example config parameter</str>
  1662 + </lst>
  1663 + </processor>
  1664 + <processor class="solr.RunUpdateProcessorFactory" />
  1665 + </updateRequestProcessorChain>
  1666 + -->
  1667 +
  1668 + <!-- Response Writers
  1669 +
  1670 + http://wiki.apache.org/solr/QueryResponseWriter
  1671 +
  1672 + Request responses will be written using the writer specified by
  1673 + the 'wt' request parameter matching the name of a registered
  1674 + writer.
  1675 +
  1676 + The "default" writer is the default and will be used if 'wt' is
  1677 + not specified in the request.
  1678 + -->
  1679 + <!-- The following response writers are implicitly configured unless
  1680 + overridden...
  1681 + -->
  1682 + <!--
  1683 + <queryResponseWriter name="xml"
  1684 + default="true"
  1685 + class="solr.XMLResponseWriter" />
  1686 + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
  1687 + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
  1688 + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
  1689 + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
  1690 + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
  1691 + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
  1692 + -->
  1693 +
  1694 + <queryResponseWriter name="json" class="solr.JSONResponseWriter">
  1695 + <!-- For the purposes of the tutorial, JSON responses are written as
  1696 + plain text so that they are easy to read in *any* browser.
  1697 + If you expect a MIME type of "application/json" just remove this override.
  1698 + -->
  1699 + <str name="content-type">text/plain; charset=UTF-8</str>
  1700 + </queryResponseWriter>
  1701 +
  1702 + <!--
  1703 + Custom response writers can be declared as needed...
  1704 + -->
  1705 + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
  1706 +
  1707 +
  1708 + <!-- XSLT response writer transforms the XML output by any xslt file found
  1709 + in Solr's conf/xslt directory. Changes to xslt files are checked for
  1710 + every xsltCacheLifetimeSeconds.
  1711 + -->
  1712 + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
  1713 + <int name="xsltCacheLifetimeSeconds">5</int>
  1714 + </queryResponseWriter>
  1715 +
  1716 + <!-- Query Parsers
  1717 +
  1718 + http://wiki.apache.org/solr/SolrQuerySyntax
  1719 +
  1720 + Multiple QParserPlugins can be registered by name, and then
  1721 + used in either the "defType" param for the QueryComponent (used
  1722 + by SearchHandler) or in LocalParams
  1723 + -->
  1724 + <!-- example of registering a query parser -->
  1725 + <!--
  1726 + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
  1727 + -->
  1728 +
  1729 + <!-- Function Parsers
  1730 +
  1731 + http://wiki.apache.org/solr/FunctionQuery
  1732 +
  1733 + Multiple ValueSourceParsers can be registered by name, and then
  1734 + used as function names when using the "func" QParser.
  1735 + -->
  1736 + <!-- example of registering a custom function parser -->
  1737 + <!--
  1738 + <valueSourceParser name="myfunc"
  1739 + class="com.mycompany.MyValueSourceParser" />
  1740 + -->
  1741 +
  1742 +
  1743 + <!-- Document Transformers
  1744 + http://wiki.apache.org/solr/DocTransformers
  1745 + -->
  1746 + <!--
  1747 + Could be something like:
  1748 + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
  1749 + <int name="connection">jdbc://....</int>
  1750 + </transformer>
  1751 +
  1752 + To add a constant value to all docs, use:
  1753 + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
  1754 + <int name="value">5</int>
  1755 + </transformer>
  1756 +
  1757 + If you want the user to still be able to change it with _value:something_ use this:
  1758 + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
  1759 + <double name="defaultValue">5</double>
  1760 + </transformer>
  1761 +
  1762 + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
  1763 + EditorialMarkerFactory will do exactly that:
  1764 + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
  1765 + -->
  1766 +
  1767 +
  1768 + <!-- Legacy config for the admin interface -->
  1769 + <admin>
  1770 + <defaultQuery>*:*</defaultQuery>
  1771 + </admin>
  1772 +
  1773 +</config>
... ...
index/sei-protocolos-schema.xml 0 → 100755
  1 +++ a/index/sei-protocolos-schema.xml
... ... @@ -0,0 +1,1163 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +
  19 +<!--
  20 + This is the Solr schema file. This file should be named "schema.xml" and
  21 + should be in the conf directory under the solr home
  22 + (i.e. ./solr/conf/schema.xml by default)
  23 + or located where the classloader for the Solr webapp can find it.
  24 +
  25 + This example schema is the recommended starting point for users.
  26 + It should be kept correct and concise, usable out-of-the-box.
  27 +
  28 + For more information, on how to customize this file, please see
  29 + http://wiki.apache.org/solr/SchemaXml
  30 +
  31 + PERFORMANCE NOTE: this schema includes many optional features and should not
  32 + be used for benchmarking. To improve performance one could
  33 + - set stored="false" for all fields possible (esp large fields) when you
  34 + only need to search on the field but don't need to return the original
  35 + value.
  36 + - set indexed="false" if you don't need to search on the field, but only
  37 + return the field as a result of searching on other indexed fields.
  38 + - remove all unneeded copyField statements
  39 + - for best index size and searching performance, set "index" to false
  40 + for all general text fields, use copyField to copy them to the
  41 + catchall "text" field, and use that for searching.
  42 + - For maximum indexing performance, use the StreamingUpdateSolrServer
  43 + java client.
  44 + - Remember to run the JVM in server mode, and use a higher logging level
  45 + that avoids logging every request
  46 +-->
  47 +
  48 +<schema name="sei-protocolos" version="1.5">
  49 + <!-- attribute "name" is the name of this schema and is only used for display purposes.
  50 + version="x.y" is Solr's version number for the schema syntax and
  51 + semantics. It should not normally be changed by applications.
  52 +
  53 + 1.0: multiValued attribute did not exist, all fields are multiValued
  54 + by nature
  55 + 1.1: multiValued attribute introduced, false by default
  56 + 1.2: omitTermFreqAndPositions attribute introduced, true by default
  57 + except for text fields.
  58 + 1.3: removed optional field compress feature
  59 + 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
  60 + behavior when a single string produces multiple tokens. Defaults
  61 + to off for version >= 1.4
  62 + 1.5: omitNorms defaults to true for primitive field types
  63 + (int, float, boolean, string...)
  64 + -->
  65 +
  66 + <fields>
  67 +
  68 +
  69 + <field name="idx_descricao" type="text_general" indexed="true" stored="false" />
  70 + <field name="numero" type="string" indexed="true" stored="true" />
  71 + <field name="id_assinante" type="string" indexed="true" stored="false" />
  72 + <field name="id_protocolo" type="string" indexed="false" stored="true" />
  73 + <field name="id_unidade_geradora" type="string" indexed="true" stored="false" />
  74 + <field name="id_serie" type="string" indexed="true" stored="false" />
  75 + <field name="dta_geracao" type="date" indexed="true" stored="true" />
  76 + <field name="sigla_usuario_gerador" type="string" indexed="true" stored="true" />
  77 + <field name="nome_usuario_gerador" type="string" indexed="false" stored="true" />
  78 + <field name="sta_protocolo" type="string" indexed="true" stored="false" />
  79 + <field name="id_assunto" type="string" indexed="true" stored="false" />
  80 + <field name="id_unidade_aberto" type="string" indexed="true" stored="false" />
  81 + <field name="id_unidade_acesso" type="string" indexed="true" stored="false" />
  82 + <field name="id_tipo_processo" type="string" indexed="true" stored="false" />
  83 + <field name="nome_tipo_processo" type="string" indexed="false" stored="true" />
  84 + <field name="sigla_unidade_geradora" type="string" indexed="false" stored="true" />
  85 + <field name="descricao_unidade_geradora" type="string" indexed="false" stored="true" />
  86 + <field name="id_participante" type="string" indexed="true" stored="false" />
  87 + <field name="tipo_acesso" type="string" indexed="true" stored="false" />
  88 + <field name="identificacao_protocolo" type="string" indexed="false" stored="true" />
  89 + <field name="protocolo_formatado_pesquisa" type="string" indexed="true" stored="false" />
  90 + <field name="protocolo_processo_formatado" type="string" indexed="false" stored="true" />
  91 + <field name="protocolo_documento_formatado" type="string" indexed="true" stored="true" />
  92 + <field name="link_arvore" type="string" indexed="false" stored="true" />
  93 + <dynamicField name="idx_observacao_*" type="text_general" indexed="true" stored="false" />
  94 +
  95 + <!-- Valid attributes for fields:
  96 + name: mandatory - the name for the field
  97 + type: mandatory - the name of a field type from the
  98 + <types> fieldType section
  99 + indexed: true if this field should be indexed (searchable or sortable)
  100 + stored: true if this field should be retrievable
  101 + multiValued: true if this field may contain multiple values per document
  102 + omitNorms: (expert) set to true to omit the norms associated with
  103 + this field (this disables length normalization and index-time
  104 + boosting for the field, and saves some memory). Only full-text
  105 + fields or fields that need an index-time boost need norms.
  106 + Norms are omitted for primitive (non-analyzed) types by default.
  107 + termVectors: [false] set to true to store the term vector for a
  108 + given field.
  109 + When using MoreLikeThis, fields used for similarity should be
  110 + stored for best performance.
  111 + termPositions: Store position information with the term vector.
  112 + This will increase storage costs.
  113 + termOffsets: Store offset information with the term vector. This
  114 + will increase storage costs.
  115 + required: The field is required. It will throw an error if the
  116 + value does not exist
  117 + default: a value that should be used if no value is specified
  118 + when adding a document.
  119 + -->
  120 +
  121 + <!-- field names should consist of alphanumeric or underscore characters only and
  122 + not start with a digit. This is not currently strictly enforced,
  123 + but other field names will not have first class support from all components
  124 + and back compatibility is not guaranteed. Names with both leading and
  125 + trailing underscores (e.g. _version_) are reserved.
  126 + -->
  127 + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
  128 +
  129 + <!-- mairon
  130 + <field name="documento" type="string" indexed="true" stored="true" />
  131 + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
  132 + <field name="name" type="text_general" indexed="true" stored="true"/>
  133 + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
  134 + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
  135 + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
  136 + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
  137 +
  138 + <field name="weight" type="float" indexed="true" stored="true"/>
  139 + <field name="price" type="float" indexed="true" stored="true"/>
  140 + <field name="popularity" type="int" indexed="true" stored="true" />
  141 + <field name="inStock" type="boolean" indexed="true" stored="true" />
  142 +
  143 + <field name="store" type="location" indexed="true" stored="true"/>
  144 + -->
  145 +
  146 + <!-- Common metadata fields, named specifically to match up with
  147 + SolrCell metadata when parsing rich documents such as Word, PDF.
  148 + Some fields are multiValued only because Tika currently may return
  149 + multiple values for them. Some metadata is parsed from the documents,
  150 + but there are some which come from the client context:
  151 + "content_type": From the HTTP headers of incoming stream
  152 + "resourcename": From SolrCell request param resource.name
  153 + -->
  154 + <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
  155 + <field name="subject" type="text_general" indexed="true" stored="true"/>
  156 + <field name="description" type="text_general" indexed="true" stored="true"/>
  157 + <field name="comments" type="text_general" indexed="true" stored="true"/>
  158 + <field name="author" type="text_general" indexed="true" stored="true"/>
  159 + <field name="keywords" type="text_general" indexed="true" stored="true"/>
  160 + <field name="category" type="text_general" indexed="true" stored="true"/>
  161 + <field name="resourcename" type="text_general" indexed="true" stored="true"/>
  162 + <field name="url" type="text_general" indexed="true" stored="true"/>
  163 + <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
  164 + <field name="last_modified" type="date" indexed="true" stored="true"/>
  165 + <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
  166 +
  167 + <!-- Main body of document extracted by SolrCell.
  168 + NOTE: This field is not indexed by default, since it is also copied to "text"
  169 + using copyField below. This is to save space. Use this field for returning and
  170 + highlighting document content. Use the "text" field to search the content. -->
  171 + <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
  172 +
  173 +
  174 + <!-- catchall field, containing all other searchable text fields (implemented
  175 + via copyField further on in this schema -->
  176 + <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
  177 +
  178 + <!-- catchall text field that indexes tokens both normally and in reverse for efficient
  179 + leading wildcard queries. -->
  180 + <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
  181 +
  182 + <!-- non-tokenized version of manufacturer to make it easier to sort or group
  183 + results by manufacturer. copied from "manu" via copyField -->
  184 + <field name="manu_exact" type="string" indexed="true" stored="false"/>
  185 +
  186 + <field name="payloads" type="payloads" indexed="true" stored="true"/>
  187 +
  188 + <field name="_version_" type="long" indexed="true" stored="true"/>
  189 +
  190 + <!-- Uncommenting the following will create a "timestamp" field using
  191 + a default value of "NOW" to indicate when each document was indexed.
  192 + -->
  193 + <!--
  194 + <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
  195 + -->
  196 +
  197 + <!-- Dynamic field definitions allow using convention over configuration
  198 + for fields via the specification of patterns to match field names.
  199 + EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
  200 + RESTRICTION: the glob-like pattern in the name attribute must have
  201 + a "*" only at the start or the end. -->
  202 +
  203 + <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
  204 + <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
  205 + <dynamicField name="*_s" type="string" indexed="true" stored="true" />
  206 + <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
  207 + <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
  208 + <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
  209 + <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
  210 + <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
  211 + <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
  212 + <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
  213 + <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
  214 + <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
  215 + <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
  216 + <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
  217 + <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
  218 +
  219 + <!-- Type used to index the lat and lon components for the "location" FieldType -->
  220 + <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
  221 +
  222 + <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
  223 + <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
  224 + <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
  225 +
  226 + <!-- some trie-coded dynamic fields for faster range queries -->
  227 + <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
  228 + <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
  229 + <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
  230 + <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
  231 + <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
  232 +
  233 + <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
  234 + <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
  235 +
  236 + <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
  237 + <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
  238 +
  239 + <dynamicField name="random_*" type="random" />
  240 +
  241 + <!-- uncomment the following to ignore any fields that don't already match an existing
  242 + field name or dynamic field, rather than reporting them as an error.
  243 + alternately, change the type="ignored" to some other type e.g. "text" if you want
  244 + unknown fields indexed and/or stored by default -->
  245 + <!--dynamicField name="*" type="ignored" multiValued="true" /-->
  246 +
  247 + </fields>
  248 +
  249 +
  250 + <!-- Field to use to determine and enforce document uniqueness.
  251 + Unless this field is marked with required="false", it will be a required field
  252 + -->
  253 + <uniqueKey>id</uniqueKey>
  254 +
  255 + <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
  256 + parsing a query string that isn't explicit about the field. Machine (non-user)
  257 + generated queries are best made explicit, or they can use the "df" request parameter
  258 + which takes precedence over this.
  259 + Note: Un-commenting defaultSearchField will be insufficient if your request handler
  260 + in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
  261 + <defaultSearchField>text</defaultSearchField> -->
  262 +
  263 + <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
  264 + when parsing a query string to determine if a clause of the query should be marked as
  265 + required or optional, assuming the clause isn't already marked by some operator.
  266 + The default is OR, which is generally assumed so it is not a good idea to change it
  267 + globally here. The "q.op" request parameter takes precedence over this.
  268 + <solrQueryParser defaultOperator="OR"/> -->
  269 +
  270 + <!-- copyField commands copy one field to another at the time a document
  271 + is added to the index. It's used either to index the same field differently,
  272 + or to add multiple fields to the same field for easier/faster searching. -->
  273 +
  274 + <!-- mairon
  275 + <copyField source="cat" dest="text"/>
  276 + <copyField source="name" dest="text"/>
  277 + <copyField source="manu" dest="text"/>
  278 + <copyField source="features" dest="text"/>
  279 + <copyField source="includes" dest="text"/>
  280 + <copyField source="manu" dest="manu_exact"/>
  281 + <copyField source="price" dest="price_c"/>
  282 + -->
  283 +
  284 + <!-- Text fields from SolrCell to search by default in our catch-all field -->
  285 + <copyField source="title" dest="text"/>
  286 + <copyField source="author" dest="text"/>
  287 + <copyField source="description" dest="text"/>
  288 + <copyField source="keywords" dest="text"/>
  289 + <copyField source="content" dest="text"/>
  290 + <copyField source="content_type" dest="text"/>
  291 + <copyField source="resourcename" dest="text"/>
  292 + <copyField source="url" dest="text"/>
  293 +
  294 + <!-- Create a string version of author for faceting -->
  295 + <copyField source="author" dest="author_s"/>
  296 +
  297 + <!-- Above, multiple source fields are copied to the [text] field.
  298 + Another way to map multiple source fields to the same
  299 + destination field is to use the dynamic field syntax.
  300 + copyField also supports a maxChars to copy setting. -->
  301 +
  302 + <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
  303 +
  304 + <!-- copy name to alphaNameSort, a field designed for sorting by name -->
  305 + <!-- <copyField source="name" dest="alphaNameSort"/> -->
  306 +
  307 + <types>
  308 + <!-- field type definitions. The "name" attribute is
  309 + just a label to be used by field definitions. The "class"
  310 + attribute and any other attributes determine the real
  311 + behavior of the fieldType.
  312 + Class names starting with "solr" refer to java classes in a
  313 + standard package such as org.apache.solr.analysis
  314 + -->
  315 +
  316 + <fieldType name="date" class="solr.DateField" omitNorms="true"/>
  317 +
  318 + <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
  319 + <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
  320 +
  321 + <!-- boolean type: "true" or "false" -->
  322 + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  323 +
  324 + <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
  325 + currently supported on types that are sorted internally as strings
  326 + and on numeric types.
  327 + This includes "string","boolean", and, as of 3.5 (and 4.x),
  328 + int, float, long, date, double, including the "Trie" variants.
  329 + - If sortMissingLast="true", then a sort on this field will cause documents
  330 + without the field to come after documents with the field,
  331 + regardless of the requested sort order (asc or desc).
  332 + - If sortMissingFirst="true", then a sort on this field will cause documents
  333 + without the field to come before documents with the field,
  334 + regardless of the requested sort order.
  335 + - If sortMissingLast="false" and sortMissingFirst="false" (the default),
  336 + then default lucene sorting will be used which places docs without the
  337 + field first in an ascending sort and last in a descending sort.
  338 + -->
  339 +
  340 + <!--
  341 + Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
  342 + -->
  343 + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
  344 + <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
  345 + <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
  346 + <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
  347 +
  348 + <!--
  349 + Numeric field types that index each value at various levels of precision
  350 + to accelerate range queries when the number of values between the range
  351 + endpoints is large. See the javadoc for NumericRangeQuery for internal
  352 + implementation details.
  353 +
  354 + Smaller precisionStep values (specified in bits) will lead to more tokens
  355 + indexed per value, slightly larger index size, and faster range queries.
  356 + A precisionStep of 0 disables indexing at different precision levels.
  357 + -->
  358 + <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
  359 + <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
  360 + <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
  361 + <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
  362 +
  363 + <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
  364 + is a more restricted form of the canonical representation of dateTime
  365 + http://www.w3.org/TR/xmlschema-2/#dateTime
  366 + The trailing "Z" designates UTC time and is mandatory.
  367 + Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
  368 + All other components are mandatory.
  369 +
  370 + Expressions can also be used to denote calculations that should be
  371 + performed relative to "NOW" to determine the value, ie...
  372 +
  373 + NOW/HOUR
  374 + ... Round to the start of the current hour
  375 + NOW-1DAY
  376 + ... Exactly 1 day prior to now
  377 + NOW/DAY+6MONTHS+3DAYS
  378 + ... 6 months and 3 days in the future from the start of
  379 + the current day
  380 +
  381 + Consult the DateField javadocs for more information.
  382 +
  383 + Note: For faster range queries, consider the tdate type
  384 + -->
  385 +<!-- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
  386 +
  387 + A Trie based date field for faster date range queries and date faceting. -->
  388 + <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
  389 +
  390 +
  391 + <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
  392 + <fieldtype name="binary" class="solr.BinaryField"/>
  393 +
  394 + <!--
  395 + Note:
  396 + These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
  397 + Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
  398 +
  399 + Plain numeric field types that store and index the text
  400 + value verbatim (and hence don't correctly support range queries, since the
  401 + lexicographic ordering isn't equal to the numeric ordering)
  402 + -->
  403 + <fieldType name="pint" class="solr.IntField"/>
  404 + <fieldType name="plong" class="solr.LongField"/>
  405 + <fieldType name="pfloat" class="solr.FloatField"/>
  406 + <fieldType name="pdouble" class="solr.DoubleField"/>
  407 + <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
  408 +
  409 + <!-- The "RandomSortField" is not used to store or search any
  410 + data. You can declare fields of this type it in your schema
  411 + to generate pseudo-random orderings of your docs for sorting
  412 + or function purposes. The ordering is generated based on the field
  413 + name and the version of the index. As long as the index version
  414 + remains unchanged, and the same field name is reused,
  415 + the ordering of the docs will be consistent.
  416 + If you want different psuedo-random orderings of documents,
  417 + for the same version of the index, use a dynamicField and
  418 + change the field name in the request.
  419 + -->
  420 + <fieldType name="random" class="solr.RandomSortField" indexed="true" />
  421 +
  422 + <!-- solr.TextField allows the specification of custom text analyzers
  423 + specified as a tokenizer and a list of token filters. Different
  424 + analyzers may be specified for indexing and querying.
  425 +
  426 + The optional positionIncrementGap puts space between multiple fields of
  427 + this type on the same document, with the purpose of preventing false phrase
  428 + matching across fields.
  429 +
  430 + For more info on customizing your analyzer chain, please see
  431 + http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
  432 + -->
  433 +
  434 + <!-- One can also specify an existing Analyzer class that has a
  435 + default constructor via the class attribute on the analyzer element.
  436 + Example:
  437 + <fieldType name="text_greek" class="solr.TextField">
  438 + <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
  439 + </fieldType>
  440 + -->
  441 +
  442 + <!-- A text field that only splits on whitespace for exact matching of words -->
  443 + <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  444 + <analyzer>
  445 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  446 + </analyzer>
  447 + </fieldType>
  448 +
  449 + <!-- A general text field that has reasonable, generic
  450 + cross-language defaults: it tokenizes with StandardTokenizer,
  451 + removes stop words from case-insensitive "stopwords.txt"
  452 + (empty by default), and down cases. At query time only, it
  453 + also applies synonyms. -->
  454 + <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
  455 + <analyzer type="index">
  456 +
  457 + <tokenizer class="solr.StandardTokenizerFactory"/>
  458 +
  459 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" />
  460 + <!-- in this example, we will only use synonyms at query time
  461 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  462 + -->
  463 + <filter class="solr.LowerCaseFilterFactory"/>
  464 +
  465 + <!-- mairon -->
  466 + <filter class="solr.ASCIIFoldingFilterFactory"/>
  467 + <!-- mairon -->
  468 +
  469 +
  470 + </analyzer>
  471 +
  472 + <analyzer type="query">
  473 + <tokenizer class="solr.StandardTokenizerFactory"/>
  474 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" />
  475 + <filter class="solr.LowerCaseFilterFactory"/>
  476 +
  477 + <!-- mairon -->
  478 + <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -->
  479 + <!-- <filter class="solr.ASCIIFoldingFilterFactory"/> -->
  480 + <!-- <filter class="solr.BrazilianStemFilterFactory"/> -->
  481 + <!-- mairon -->
  482 +
  483 + </analyzer>
  484 +
  485 + </fieldType>
  486 +
  487 + <!-- A text field with defaults appropriate for English: it
  488 + tokenizes with StandardTokenizer, removes English stop words
  489 + (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
  490 + finally applies Porter's stemming. The query time analyzer
  491 + also applies synonyms from synonyms.txt. -->
  492 + <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  493 + <analyzer type="index">
  494 + <tokenizer class="solr.StandardTokenizerFactory"/>
  495 + <!-- in this example, we will only use synonyms at query time
  496 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  497 + -->
  498 + <!-- Case insensitive stop word removal.
  499 + add enablePositionIncrements=true in both the index and query
  500 + analyzers to leave a 'gap' for more accurate phrase queries.
  501 + -->
  502 + <filter class="solr.StopFilterFactory"
  503 + ignoreCase="true"
  504 + words="lang/stopwords_en.txt"
  505 + enablePositionIncrements="true"
  506 + />
  507 + <filter class="solr.LowerCaseFilterFactory"/>
  508 + <filter class="solr.EnglishPossessiveFilterFactory"/>
  509 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  510 + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  511 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  512 + -->
  513 + <filter class="solr.PorterStemFilterFactory"/>
  514 + </analyzer>
  515 + <analyzer type="query">
  516 + <tokenizer class="solr.StandardTokenizerFactory"/>
  517 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  518 + <filter class="solr.StopFilterFactory"
  519 + ignoreCase="true"
  520 + words="lang/stopwords_en.txt"
  521 + enablePositionIncrements="true"
  522 + />
  523 + <filter class="solr.LowerCaseFilterFactory"/>
  524 + <filter class="solr.EnglishPossessiveFilterFactory"/>
  525 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  526 + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  527 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  528 + -->
  529 + <filter class="solr.PorterStemFilterFactory"/>
  530 + </analyzer>
  531 + </fieldType>
  532 +
  533 + <!-- A text field with defaults appropriate for English, plus
  534 + aggressive word-splitting and autophrase features enabled.
  535 + This field is just like text_en, except it adds
  536 + WordDelimiterFilter to enable splitting and matching of
  537 + words on case-change, alpha numeric boundaries, and
  538 + non-alphanumeric chars. This means certain compound word
  539 + cases will work, for example query "wi fi" will match
  540 + document "WiFi" or "wi-fi".
  541 + -->
  542 + <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  543 + <analyzer type="index">
  544 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  545 + <!-- in this example, we will only use synonyms at query time
  546 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  547 + -->
  548 + <!-- Case insensitive stop word removal.
  549 + add enablePositionIncrements=true in both the index and query
  550 + analyzers to leave a 'gap' for more accurate phrase queries.
  551 + -->
  552 + <filter class="solr.StopFilterFactory"
  553 + ignoreCase="true"
  554 + words="lang/stopwords_en.txt"
  555 + enablePositionIncrements="true"
  556 + />
  557 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
  558 + <filter class="solr.LowerCaseFilterFactory"/>
  559 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  560 + <filter class="solr.PorterStemFilterFactory"/>
  561 +
  562 +
  563 + </analyzer>
  564 + <analyzer type="query">
  565 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  566 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  567 + <filter class="solr.StopFilterFactory"
  568 + ignoreCase="true"
  569 + words="lang/stopwords_en.txt"
  570 + enablePositionIncrements="true"
  571 + />
  572 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
  573 + <filter class="solr.LowerCaseFilterFactory"/>
  574 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  575 + <filter class="solr.PorterStemFilterFactory"/>
  576 + </analyzer>
  577 + </fieldType>
  578 +
  579 + <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
  580 + but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
  581 + <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  582 + <analyzer>
  583 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  584 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
  585 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
  586 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
  587 + <filter class="solr.LowerCaseFilterFactory"/>
  588 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  589 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  590 + <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
  591 + possible with WordDelimiterFilter in conjuncton with stemming. -->
  592 + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  593 + </analyzer>
  594 + </fieldType>
  595 +
  596 + <!-- Just like text_general except it reverses the characters of
  597 + each token, to enable more efficient leading wildcard queries. -->
  598 + <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
  599 + <analyzer type="index">
  600 + <tokenizer class="solr.StandardTokenizerFactory"/>
  601 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  602 + <filter class="solr.LowerCaseFilterFactory"/>
  603 + <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
  604 + maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
  605 + </analyzer>
  606 + <analyzer type="query">
  607 + <tokenizer class="solr.StandardTokenizerFactory"/>
  608 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  609 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  610 + <filter class="solr.LowerCaseFilterFactory"/>
  611 + </analyzer>
  612 + </fieldType>
  613 +
  614 + <!-- charFilter + WhitespaceTokenizer -->
  615 + <!--
  616 + <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
  617 + <analyzer>
  618 + <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
  619 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  620 + </analyzer>
  621 + </fieldType>
  622 + -->
  623 +
  624 + <!-- This is an example of using the KeywordTokenizer along
  625 + With various TokenFilterFactories to produce a sortable field
  626 + that does not include some properties of the source text
  627 + -->
  628 + <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
  629 + <analyzer>
  630 + <!-- KeywordTokenizer does no actual tokenizing, so the entire
  631 + input string is preserved as a single token
  632 + -->
  633 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  634 + <!-- The LowerCase TokenFilter does what you expect, which can be
  635 + when you want your sorting to be case insensitive
  636 + -->
  637 + <filter class="solr.LowerCaseFilterFactory" />
  638 + <!-- The TrimFilter removes any leading or trailing whitespace -->
  639 + <filter class="solr.TrimFilterFactory" />
  640 + <!-- The PatternReplaceFilter gives you the flexibility to use
  641 + Java Regular expression to replace any sequence of characters
  642 + matching a pattern with an arbitrary replacement string,
  643 + which may include back references to portions of the original
  644 + string matched by the pattern.
  645 +
  646 + See the Java Regular Expression documentation for more
  647 + information on pattern and replacement string syntax.
  648 +
  649 + http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
  650 + -->
  651 + <filter class="solr.PatternReplaceFilterFactory"
  652 + pattern="([^a-z])" replacement="" replace="all"
  653 + />
  654 + </analyzer>
  655 + </fieldType>
  656 +
  657 + <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
  658 + <analyzer>
  659 + <tokenizer class="solr.StandardTokenizerFactory"/>
  660 + <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
  661 + </analyzer>
  662 + </fieldtype>
  663 +
  664 + <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
  665 + <analyzer>
  666 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  667 + <!--
  668 + The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
  669 + a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
  670 + Attributes of the DelimitedPayloadTokenFilterFactory :
  671 + "delimiter" - a one character delimiter. Default is | (pipe)
  672 + "encoder" - how to encode the following value into a playload
  673 + float -> org.apache.lucene.analysis.payloads.FloatEncoder,
  674 + integer -> o.a.l.a.p.IntegerEncoder
  675 + identity -> o.a.l.a.p.IdentityEncoder
  676 + Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
  677 + -->
  678 + <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
  679 + </analyzer>
  680 + </fieldtype>
  681 +
  682 + <!-- lowercases the entire field value, keeping it as a single token. -->
  683 + <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
  684 + <analyzer>
  685 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  686 + <filter class="solr.LowerCaseFilterFactory" />
  687 + </analyzer>
  688 + </fieldType>
  689 +
  690 + <!--
  691 + Example of using PathHierarchyTokenizerFactory at index time, so
  692 + queries for paths match documents at that path, or in descendent paths
  693 + -->
  694 + <fieldType name="descendent_path" class="solr.TextField">
  695 + <analyzer type="index">
  696 + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  697 + </analyzer>
  698 + <analyzer type="query">
  699 + <tokenizer class="solr.KeywordTokenizerFactory" />
  700 + </analyzer>
  701 + </fieldType>
  702 + <!--
  703 + Example of using PathHierarchyTokenizerFactory at query time, so
  704 + queries for paths match documents at that path, or in ancestor paths
  705 + -->
  706 + <fieldType name="ancestor_path" class="solr.TextField">
  707 + <analyzer type="index">
  708 + <tokenizer class="solr.KeywordTokenizerFactory" />
  709 + </analyzer>
  710 + <analyzer type="query">
  711 + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  712 + </analyzer>
  713 + </fieldType>
  714 +
  715 + <!-- since fields of this type are by default not stored or indexed,
  716 + any data added to them will be ignored outright. -->
  717 + <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
  718 +
  719 + <!-- This point type indexes the coordinates as separate fields (subFields)
  720 + If subFieldType is defined, it references a type, and a dynamic field
  721 + definition is created matching *___<typename>. Alternately, if
  722 + subFieldSuffix is defined, that is used to create the subFields.
  723 + Example: if subFieldType="double", then the coordinates would be
  724 + indexed in fields myloc_0___double,myloc_1___double.
  725 + Example: if subFieldSuffix="_d" then the coordinates would be indexed
  726 + in fields myloc_0_d,myloc_1_d
  727 + The subFields are an implementation detail of the fieldType, and end
  728 + users normally should not need to know about them.
  729 + -->
  730 + <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
  731 +
  732 + <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
  733 + <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
  734 +
  735 + <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
  736 + For more information about this and other Spatial fields new to Solr 4, see:
  737 + http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
  738 + -->
  739 + <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
  740 + geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
  741 +
  742 + <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
  743 + Parameters:
  744 + defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
  745 + precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
  746 + providerClass: Lets you plug in other exchange provider backend:
  747 + solr.FileExchangeRateProvider is the default and takes one parameter:
  748 + currencyConfig: name of an xml file holding exchange rates
  749 + solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
  750 + ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
  751 + refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
  752 + -->
  753 + <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
  754 +
  755 +
  756 +
  757 + <!-- some examples for different languages (generally ordered by ISO code) -->
  758 +
  759 + <!-- Arabic -->
  760 + <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
  761 + <analyzer>
  762 + <tokenizer class="solr.StandardTokenizerFactory"/>
  763 + <!-- for any non-arabic -->
  764 + <filter class="solr.LowerCaseFilterFactory"/>
  765 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
  766 + <!-- normalizes ﻯ to ﻱ, etc -->
  767 + <filter class="solr.ArabicNormalizationFilterFactory"/>
  768 + <filter class="solr.ArabicStemFilterFactory"/>
  769 + </analyzer>
  770 + </fieldType>
  771 +
  772 + <!-- Bulgarian -->
  773 + <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
  774 + <analyzer>
  775 + <tokenizer class="solr.StandardTokenizerFactory"/>
  776 + <filter class="solr.LowerCaseFilterFactory"/>
  777 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
  778 + <filter class="solr.BulgarianStemFilterFactory"/>
  779 + </analyzer>
  780 + </fieldType>
  781 +
  782 + <!-- Catalan -->
  783 + <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
  784 + <analyzer>
  785 + <tokenizer class="solr.StandardTokenizerFactory"/>
  786 + <!-- removes l', etc -->
  787 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
  788 + <filter class="solr.LowerCaseFilterFactory"/>
  789 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
  790 + <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
  791 + </analyzer>
  792 + </fieldType>
  793 +
  794 + <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
  795 + <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
  796 + <analyzer>
  797 + <tokenizer class="solr.StandardTokenizerFactory"/>
  798 + <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
  799 + <filter class="solr.CJKWidthFilterFactory"/>
  800 + <!-- for any non-CJK -->
  801 + <filter class="solr.LowerCaseFilterFactory"/>
  802 + <filter class="solr.CJKBigramFilterFactory"/>
  803 + </analyzer>
  804 + </fieldType>
  805 +
  806 + <!-- Czech -->
  807 + <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
  808 + <analyzer>
  809 + <tokenizer class="solr.StandardTokenizerFactory"/>
  810 + <filter class="solr.LowerCaseFilterFactory"/>
  811 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
  812 + <filter class="solr.CzechStemFilterFactory"/>
  813 + </analyzer>
  814 + </fieldType>
  815 +
  816 + <!-- Danish -->
  817 + <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
  818 + <analyzer>
  819 + <tokenizer class="solr.StandardTokenizerFactory"/>
  820 + <filter class="solr.LowerCaseFilterFactory"/>
  821 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
  822 + <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
  823 + </analyzer>
  824 + </fieldType>
  825 +
  826 + <!-- German -->
  827 + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
  828 + <analyzer>
  829 + <tokenizer class="solr.StandardTokenizerFactory"/>
  830 + <filter class="solr.LowerCaseFilterFactory"/>
  831 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
  832 + <filter class="solr.GermanNormalizationFilterFactory"/>
  833 + <filter class="solr.GermanLightStemFilterFactory"/>
  834 + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
  835 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
  836 + </analyzer>
  837 + </fieldType>
  838 +
  839 + <!-- Greek -->
  840 + <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
  841 + <analyzer>
  842 + <tokenizer class="solr.StandardTokenizerFactory"/>
  843 + <!-- greek specific lowercase for sigma -->
  844 + <filter class="solr.GreekLowerCaseFilterFactory"/>
  845 + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
  846 + <filter class="solr.GreekStemFilterFactory"/>
  847 + </analyzer>
  848 + </fieldType>
  849 +
  850 + <!-- Spanish -->
  851 + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
  852 + <analyzer>
  853 + <tokenizer class="solr.StandardTokenizerFactory"/>
  854 + <filter class="solr.LowerCaseFilterFactory"/>
  855 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
  856 + <filter class="solr.SpanishLightStemFilterFactory"/>
  857 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
  858 + </analyzer>
  859 + </fieldType>
  860 +
  861 + <!-- Basque -->
  862 + <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
  863 + <analyzer>
  864 + <tokenizer class="solr.StandardTokenizerFactory"/>
  865 + <filter class="solr.LowerCaseFilterFactory"/>
  866 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
  867 + <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
  868 + </analyzer>
  869 + </fieldType>
  870 +
  871 + <!-- Persian -->
  872 + <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
  873 + <analyzer>
  874 + <!-- for ZWNJ -->
  875 + <charFilter class="solr.PersianCharFilterFactory"/>
  876 + <tokenizer class="solr.StandardTokenizerFactory"/>
  877 + <filter class="solr.LowerCaseFilterFactory"/>
  878 + <filter class="solr.ArabicNormalizationFilterFactory"/>
  879 + <filter class="solr.PersianNormalizationFilterFactory"/>
  880 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
  881 + </analyzer>
  882 + </fieldType>
  883 +
  884 + <!-- Finnish -->
  885 + <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
  886 + <analyzer>
  887 + <tokenizer class="solr.StandardTokenizerFactory"/>
  888 + <filter class="solr.LowerCaseFilterFactory"/>
  889 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
  890 + <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
  891 + <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
  892 + </analyzer>
  893 + </fieldType>
  894 +
  895 + <!-- French -->
  896 + <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  897 + <analyzer>
  898 + <tokenizer class="solr.StandardTokenizerFactory"/>
  899 + <!-- removes l', etc -->
  900 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
  901 + <filter class="solr.LowerCaseFilterFactory"/>
  902 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
  903 + <filter class="solr.FrenchLightStemFilterFactory"/>
  904 + <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
  905 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
  906 + </analyzer>
  907 + </fieldType>
  908 +
  909 + <!-- Irish -->
  910 + <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
  911 + <analyzer>
  912 + <tokenizer class="solr.StandardTokenizerFactory"/>
  913 + <!-- removes d', etc -->
  914 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
  915 + <!-- removes n-, etc. position increments is intentionally false! -->
  916 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
  917 + <filter class="solr.IrishLowerCaseFilterFactory"/>
  918 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
  919 + <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
  920 + </analyzer>
  921 + </fieldType>
  922 +
  923 + <!-- Galician -->
  924 + <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
  925 + <analyzer>
  926 + <tokenizer class="solr.StandardTokenizerFactory"/>
  927 + <filter class="solr.LowerCaseFilterFactory"/>
  928 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
  929 + <filter class="solr.GalicianStemFilterFactory"/>
  930 + <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
  931 + </analyzer>
  932 + </fieldType>
  933 +
  934 + <!-- Hindi -->
  935 + <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
  936 + <analyzer>
  937 + <tokenizer class="solr.StandardTokenizerFactory"/>
  938 + <filter class="solr.LowerCaseFilterFactory"/>
  939 + <!-- normalizes unicode representation -->
  940 + <filter class="solr.IndicNormalizationFilterFactory"/>
  941 + <!-- normalizes variation in spelling -->
  942 + <filter class="solr.HindiNormalizationFilterFactory"/>
  943 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
  944 + <filter class="solr.HindiStemFilterFactory"/>
  945 + </analyzer>
  946 + </fieldType>
  947 +
  948 + <!-- Hungarian -->
  949 + <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
  950 + <analyzer>
  951 + <tokenizer class="solr.StandardTokenizerFactory"/>
  952 + <filter class="solr.LowerCaseFilterFactory"/>
  953 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
  954 + <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
  955 + <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
  956 + </analyzer>
  957 + </fieldType>
  958 +
  959 + <!-- Armenian -->
  960 + <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
  961 + <analyzer>
  962 + <tokenizer class="solr.StandardTokenizerFactory"/>
  963 + <filter class="solr.LowerCaseFilterFactory"/>
  964 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
  965 + <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
  966 + </analyzer>
  967 + </fieldType>
  968 +
  969 + <!-- Indonesian -->
  970 + <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
  971 + <analyzer>
  972 + <tokenizer class="solr.StandardTokenizerFactory"/>
  973 + <filter class="solr.LowerCaseFilterFactory"/>
  974 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
  975 + <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
  976 + <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
  977 + </analyzer>
  978 + </fieldType>
  979 +
  980 + <!-- Italian -->
  981 + <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
  982 + <analyzer>
  983 + <tokenizer class="solr.StandardTokenizerFactory"/>
  984 + <!-- removes l', etc -->
  985 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
  986 + <filter class="solr.LowerCaseFilterFactory"/>
  987 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
  988 + <filter class="solr.ItalianLightStemFilterFactory"/>
  989 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
  990 + </analyzer>
  991 + </fieldType>
  992 +
  993 + <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
  994 +
  995 + NOTE: If you want to optimize search for precision, use default operator AND in your query
  996 + parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
  997 + OR if you would like to optimize for recall (default).
  998 + -->
  999 + <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
  1000 + <analyzer>
  1001 + <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
  1002 +
  1003 + Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
  1004 + is used to segment compounds into its parts and the compound itself is kept as synonym.
  1005 +
  1006 + Valid values for attribute mode are:
  1007 + normal: regular segmentation
  1008 + search: segmentation useful for search with synonyms compounds (default)
  1009 + extended: same as search mode, but unigrams unknown words (experimental)
  1010 +
  1011 + For some applications it might be good to use search mode for indexing and normal mode for
  1012 + queries to reduce recall and prevent parts of compounds from being matched and highlighted.
  1013 + Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
  1014 +
  1015 + Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
  1016 + model with your own entries for segmentation, part-of-speech tags and readings without a need
  1017 + to specify weights. Notice that user dictionaries have not been subject to extensive testing.
  1018 +
  1019 + User dictionary attributes are:
  1020 + userDictionary: user dictionary filename
  1021 + userDictionaryEncoding: user dictionary encoding (default is UTF-8)
  1022 +
  1023 + See lang/userdict_ja.txt for a sample user dictionary file.
  1024 +
  1025 + Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
  1026 +
  1027 + See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
  1028 + -->
  1029 + <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
  1030 + <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
  1031 + <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
  1032 + <filter class="solr.JapaneseBaseFormFilterFactory"/>
  1033 + <!-- Removes tokens with certain part-of-speech tags -->
  1034 + <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
  1035 + <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
  1036 + <filter class="solr.CJKWidthFilterFactory"/>
  1037 + <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
  1038 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
  1039 + <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
  1040 + <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
  1041 + <!-- Lower-cases romaji characters -->
  1042 + <filter class="solr.LowerCaseFilterFactory"/>
  1043 + </analyzer>
  1044 + </fieldType>
  1045 +
  1046 + <!-- Latvian -->
  1047 + <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
  1048 + <analyzer>
  1049 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1050 + <filter class="solr.LowerCaseFilterFactory"/>
  1051 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
  1052 + <filter class="solr.LatvianStemFilterFactory"/>
  1053 + </analyzer>
  1054 + </fieldType>
  1055 +
  1056 + <!-- Dutch -->
  1057 + <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
  1058 + <analyzer>
  1059 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1060 + <filter class="solr.LowerCaseFilterFactory"/>
  1061 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
  1062 + <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
  1063 + <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
  1064 + </analyzer>
  1065 + </fieldType>
  1066 +
  1067 + <!-- Norwegian -->
  1068 + <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
  1069 + <analyzer>
  1070 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1071 + <filter class="solr.LowerCaseFilterFactory"/>
  1072 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
  1073 + <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
  1074 + <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
  1075 + <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
  1076 + </analyzer>
  1077 + </fieldType>
  1078 +
  1079 + <!-- Portuguese -->
  1080 + <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
  1081 + <analyzer>
  1082 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1083 + <filter class="solr.LowerCaseFilterFactory"/>
  1084 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
  1085 +
  1086 + <!-- mairon -->
  1087 + <!-- <filter class="solr.PortugueseLightStemFilterFactory"/> -->
  1088 + <filter class="solr.BrazilianStemFilterFactory"/>
  1089 + <filter class="solr.ASCIIFoldingFilterFactory"/>
  1090 + <!-- mairon -->
  1091 +
  1092 + <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
  1093 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
  1094 + <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
  1095 + </analyzer>
  1096 + </fieldType>
  1097 +
  1098 + <!-- Romanian -->
  1099 + <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
  1100 + <analyzer>
  1101 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1102 + <filter class="solr.LowerCaseFilterFactory"/>
  1103 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
  1104 + <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
  1105 + </analyzer>
  1106 + </fieldType>
  1107 +
  1108 + <!-- Russian -->
  1109 + <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
  1110 + <analyzer>
  1111 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1112 + <filter class="solr.LowerCaseFilterFactory"/>
  1113 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
  1114 + <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
  1115 + <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
  1116 + </analyzer>
  1117 + </fieldType>
  1118 +
  1119 + <!-- Swedish -->
  1120 + <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
  1121 + <analyzer>
  1122 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1123 + <filter class="solr.LowerCaseFilterFactory"/>
  1124 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
  1125 + <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
  1126 + <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
  1127 + </analyzer>
  1128 + </fieldType>
  1129 +
  1130 + <!-- Thai -->
  1131 + <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
  1132 + <analyzer>
  1133 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1134 + <filter class="solr.LowerCaseFilterFactory"/>
  1135 + <filter class="solr.ThaiWordFilterFactory"/>
  1136 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
  1137 + </analyzer>
  1138 + </fieldType>
  1139 +
  1140 + <!-- Turkish -->
  1141 + <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
  1142 + <analyzer>
  1143 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1144 + <filter class="solr.TurkishLowerCaseFilterFactory"/>
  1145 + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
  1146 + <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
  1147 + </analyzer>
  1148 + </fieldType>
  1149 +
  1150 + </types>
  1151 +
  1152 + <!-- Similarity is the scoring routine for each document vs. a query.
  1153 + A custom Similarity or SimilarityFactory may be specified here, but
  1154 + the default is fine for most applications.
  1155 + For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
  1156 + -->
  1157 + <!--
  1158 + <similarity class="com.example.solr.CustomSimilarityFactory">
  1159 + <str name="paramkey">param value</str>
  1160 + </similarity>
  1161 + -->
  1162 +
  1163 +</schema>
0 1164 \ No newline at end of file
... ...
index/sei-publicacoes-config.xml 0 → 100755
  1 +++ a/index/sei-publicacoes-config.xml
... ... @@ -0,0 +1,1773 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +
  19 +<!--
  20 + For more details about configurations options that may appear in
  21 + this file, see http://wiki.apache.org/solr/SolrConfigXml.
  22 +-->
  23 +<config>
  24 + <!-- In all configuration below, a prefix of "solr." for class names
  25 + is an alias that causes solr to search appropriate packages,
  26 + including org.apache.solr.(search|update|request|core|analysis)
  27 +
  28 + You may also specify a fully qualified Java classname if you
  29 + have your own custom plugins.
  30 + -->
  31 +
  32 + <!-- Controls what version of Lucene various components of Solr
  33 + adhere to. Generally, you want to use the latest version to
  34 + get all bug fixes and improvements. It is highly recommended
  35 + that you fully re-index after changing this setting as it can
  36 + affect both how text is indexed and queried.
  37 + -->
  38 + <luceneMatchVersion>LUCENE_40</luceneMatchVersion>
  39 +
  40 + <!-- <lib/> directives can be used to instruct Solr to load an Jars
  41 + identified and use them to resolve any "plugins" specified in
  42 + your solrconfig.xml or schema.xml (ie: Analyzers, Request
  43 + Handlers, etc...).
  44 +
  45 + All directories and paths are resolved relative to the
  46 + instanceDir.
  47 +
  48 + Please note that <lib/> directives are processed in the order
  49 + that they appear in your solrconfig.xml file, and are "stacked"
  50 + on top of each other when building a ClassLoader - so if you have
  51 + plugin jars with dependencies on other jars, the "lower level"
  52 + dependency jars should be loaded first.
  53 +
  54 + If a "./lib" directory exists in your instanceDir, all files
  55 + found in it are included as if you had used the following
  56 + syntax...
  57 +
  58 +-->
  59 + <lib dir="./lib" />
  60 +
  61 +
  62 + <!-- A 'dir' option by itself adds any files found in the directory
  63 + to the classpath, this is useful for including all jars in a
  64 + directory.
  65 +
  66 + When a 'regex' is specified in addition to a 'dir', only the
  67 + files in that directory which completely match the regex
  68 + (anchored on both ends) will be included.
  69 +
  70 + The examples below can be used to load some solr-contribs along
  71 + with their external dependencies.
  72 + -->
  73 + <lib dir="./contrib/extraction/lib" regex=".*\.jar" />
  74 + <lib dir="./dist/" regex="apache-solr-cell-\d.*\.jar" />
  75 +
  76 + <lib dir="./contrib/clustering/lib/" regex=".*\.jar" />
  77 + <lib dir="./dist/" regex="apache-solr-clustering-\d.*\.jar" />
  78 +
  79 + <lib dir="./contrib/langid/lib/" regex=".*\.jar" />
  80 + <lib dir="./dist/" regex="apache-solr-langid-\d.*\.jar" />
  81 +
  82 + <lib dir="./contrib/velocity/lib" regex=".*\.jar" />
  83 + <lib dir="./dist/" regex="apache-solr-velocity-\d.*\.jar" />
  84 +
  85 + <!-- If a 'dir' option (with or without a regex) is used and nothing
  86 + is found that matches, it will be ignored
  87 + -->
  88 + <lib dir="/total/crap/dir/ignored" />
  89 +
  90 + <!-- an exact 'path' can be used instead of a 'dir' to specify a
  91 + specific jar file. This will cause a serious error to be logged
  92 + if it can't be loaded.
  93 + -->
  94 + <!--
  95 + <lib path="../a-jar-that-does-not-exist.jar" />
  96 + -->
  97 +
  98 + <!-- Data Directory
  99 +
  100 + Used to specify an alternate directory to hold all index data
  101 + other than the default ./data under the Solr home. If
  102 + replication is in use, this should match the replication
  103 + configuration.
  104 + -->
  105 + <dataDir>${solr.data.dir:}</dataDir>
  106 +
  107 +
  108 + <!-- The DirectoryFactory to use for indexes.
  109 +
  110 + solr.StandardDirectoryFactory is filesystem
  111 + based and tries to pick the best implementation for the current
  112 + JVM and platform. solr.NRTCachingDirectoryFactory, the default,
  113 + wraps solr.StandardDirectoryFactory and caches small files in memory
  114 + for better NRT performance.
  115 +
  116 + One can force a particular implementation via solr.MMapDirectoryFactory,
  117 + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
  118 +
  119 + solr.RAMDirectoryFactory is memory based, not
  120 + persistent, and doesn't work with replication.
  121 + -->
  122 + <directoryFactory name="DirectoryFactory"
  123 + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
  124 +
  125 + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  126 + Index Config - These settings control low-level behavior of indexing
  127 + Most example settings here show the default value, but are commented
  128 + out, to more easily see where customizations have been made.
  129 +
  130 + Note: This replaces <indexDefaults> and <mainIndex> from older versions
  131 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
  132 + <indexConfig>
  133 + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
  134 + LimitTokenCountFilterFactory in your fieldType definition. E.g.
  135 + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
  136 + -->
  137 + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
  138 + <!-- <writeLockTimeout>1000</writeLockTimeout> -->
  139 +
  140 + <!-- Expert: Enabling compound file will use less files for the index,
  141 + using fewer file descriptors on the expense of performance decrease.
  142 + Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
  143 + <!-- <useCompoundFile>false</useCompoundFile> -->
  144 +
  145 + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
  146 + indexing for buffering added documents and deletions before they are
  147 + flushed to the Directory.
  148 + maxBufferedDocs sets a limit on the number of documents buffered
  149 + before flushing.
  150 + If both ramBufferSizeMB and maxBufferedDocs is set, then
  151 + Lucene will flush based on whichever limit is hit first. -->
  152 + <!-- <ramBufferSizeMB>32</ramBufferSizeMB> -->
  153 + <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
  154 +
  155 + <!-- Expert: Merge Policy
  156 + The Merge Policy in Lucene controls how merging of segments is done.
  157 + The default since Solr/Lucene 3.3 is TieredMergePolicy.
  158 + The default since Lucene 2.3 was the LogByteSizeMergePolicy,
  159 + Even older versions of Lucene used LogDocMergePolicy.
  160 + -->
  161 + <!--
  162 + <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
  163 + <int name="maxMergeAtOnce">10</int>
  164 + <int name="segmentsPerTier">10</int>
  165 + </mergePolicy>
  166 + -->
  167 +
  168 + <!-- Merge Factor
  169 + The merge factor controls how many segments will get merged at a time.
  170 + For TieredMergePolicy, mergeFactor is a convenience parameter which
  171 + will set both MaxMergeAtOnce and SegmentsPerTier at once.
  172 + For LogByteSizeMergePolicy, mergeFactor decides how many new segments
  173 + will be allowed before they are merged into one.
  174 + Default is 10 for both merge policies.
  175 + -->
  176 + <!--
  177 + <mergeFactor>10</mergeFactor>
  178 + -->
  179 +
  180 + <!-- Expert: Merge Scheduler
  181 + The Merge Scheduler in Lucene controls how merges are
  182 + performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
  183 + can perform merges in the background using separate threads.
  184 + The SerialMergeScheduler (Lucene 2.2 default) does not.
  185 + -->
  186 + <!--
  187 + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
  188 + -->
  189 +
  190 + <!-- LockFactory
  191 +
  192 + This option specifies which Lucene LockFactory implementation
  193 + to use.
  194 +
  195 + single = SingleInstanceLockFactory - suggested for a
  196 + read-only index or when there is no possibility of
  197 + another process trying to modify the index.
  198 + native = NativeFSLockFactory - uses OS native file locking.
  199 + Do not use when multiple solr webapps in the same
  200 + JVM are attempting to share a single index.
  201 + simple = SimpleFSLockFactory - uses a plain file for locking
  202 +
  203 + Defaults: 'native' is default for Solr3.6 and later, otherwise
  204 + 'simple' is the default
  205 +
  206 + More details on the nuances of each LockFactory...
  207 + http://wiki.apache.org/lucene-java/AvailableLockFactories
  208 + -->
  209 + <!-- <lockType>native</lockType> -->
  210 +
  211 + <!-- Unlock On Startup
  212 +
  213 + If true, unlock any held write or commit locks on startup.
  214 + This defeats the locking mechanism that allows multiple
  215 + processes to safely access a lucene index, and should be used
  216 + with care. Default is "false".
  217 +
  218 + This is not needed if lock type is 'none' or 'single'
  219 + -->
  220 + <!--
  221 + <unlockOnStartup>false</unlockOnStartup>
  222 + -->
  223 +
  224 + <!-- Expert: Controls how often Lucene loads terms into memory
  225 + Default is 128 and is likely good for most everyone.
  226 + -->
  227 + <!-- <termIndexInterval>128</termIndexInterval> -->
  228 +
  229 + <!-- If true, IndexReaders will be reopened (often more efficient)
  230 + instead of closed and then opened. Default: true
  231 + -->
  232 + <!--
  233 + <reopenReaders>true</reopenReaders>
  234 + -->
  235 +
  236 + <!-- Commit Deletion Policy
  237 +
  238 + Custom deletion policies can be specified here. The class must
  239 + implement org.apache.lucene.index.IndexDeletionPolicy.
  240 +
  241 + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html
  242 +
  243 + The default Solr IndexDeletionPolicy implementation supports
  244 + deleting index commit points on number of commits, age of
  245 + commit point and optimized status.
  246 +
  247 + The latest commit point should always be preserved regardless
  248 + of the criteria.
  249 + -->
  250 + <!--
  251 + <deletionPolicy class="solr.SolrDeletionPolicy">
  252 + -->
  253 + <!-- The number of commit points to be kept -->
  254 + <!-- <str name="maxCommitsToKeep">1</str> -->
  255 + <!-- The number of optimized commit points to be kept -->
  256 + <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
  257 + <!--
  258 + Delete all commit points once they have reached the given age.
  259 + Supports DateMathParser syntax e.g.
  260 + -->
  261 + <!--
  262 + <str name="maxCommitAge">30MINUTES</str>
  263 + <str name="maxCommitAge">1DAY</str>
  264 + -->
  265 + <!--
  266 + </deletionPolicy>
  267 + -->
  268 +
  269 + <!-- Lucene Infostream
  270 +
  271 + To aid in advanced debugging, Lucene provides an "InfoStream"
  272 + of detailed information when indexing.
  273 +
  274 + Setting The value to true will instruct the underlying Lucene
  275 + IndexWriter to write its debugging info the specified file
  276 + -->
  277 + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
  278 + </indexConfig>
  279 +
  280 +
  281 + <!-- JMX
  282 +
  283 + This example enables JMX if and only if an existing MBeanServer
  284 + is found, use this if you want to configure JMX through JVM
  285 + parameters. Remove this to disable exposing Solr configuration
  286 + and statistics to JMX.
  287 +
  288 + For more details see http://wiki.apache.org/solr/SolrJmx
  289 + -->
  290 + <jmx />
  291 + <!-- If you want to connect to a particular server, specify the
  292 + agentId
  293 + -->
  294 + <!-- <jmx agentId="myAgent" /> -->
  295 + <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
  296 + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
  297 + -->
  298 +
  299 + <!-- The default high-performance update handler -->
  300 + <updateHandler class="solr.DirectUpdateHandler2">
  301 +
  302 + <!-- AutoCommit
  303 +
  304 + Perform a hard commit automatically under certain conditions.
  305 + Instead of enabling autoCommit, consider using "commitWithin"
  306 + when adding documents.
  307 +
  308 + http://wiki.apache.org/solr/UpdateXmlMessages
  309 +
  310 + maxDocs - Maximum number of documents to add since the last
  311 + commit before automatically triggering a new commit.
  312 +
  313 + maxTime - Maximum amount of time in ms that is allowed to pass
  314 + since a document was added before automaticly
  315 + triggering a new commit.
  316 + openSearcher - if false, the commit causes recent index changes
  317 + to be flushed to stable storage, but does not cause a new
  318 + searcher to be opened to make those changes visible.
  319 + -->
  320 + <autoCommit>
  321 + <maxTime>300000</maxTime>
  322 + <openSearcher>false</openSearcher>
  323 + </autoCommit>
  324 +
  325 + <!-- softAutoCommit is like autoCommit except it causes a
  326 + 'soft' commit which only ensures that changes are visible
  327 + but does not ensure that data is synced to disk. This is
  328 + faster and more near-realtime friendly than a hard commit.
  329 + -->
  330 + <!--
  331 + <autoSoftCommit>
  332 + <maxTime>1000</maxTime>
  333 + </autoSoftCommit>
  334 + -->
  335 +
  336 + <!-- Update Related Event Listeners
  337 +
  338 + Various IndexWriter related events can trigger Listeners to
  339 + take actions.
  340 +
  341 + postCommit - fired after every commit or optimize command
  342 + postOptimize - fired after every optimize command
  343 + -->
  344 + <!-- The RunExecutableListener executes an external command from a
  345 + hook such as postCommit or postOptimize.
  346 +
  347 + exe - the name of the executable to run
  348 + dir - dir to use as the current working directory. (default=".")
  349 + wait - the calling thread waits until the executable returns.
  350 + (default="true")
  351 + args - the arguments to pass to the program. (default is none)
  352 + env - environment variables to set. (default is none)
  353 + -->
  354 + <!-- This example shows how RunExecutableListener could be used
  355 + with the script based replication...
  356 + http://wiki.apache.org/solr/CollectionDistribution
  357 + -->
  358 + <!--
  359 + <listener event="postCommit" class="solr.RunExecutableListener">
  360 + <str name="exe">solr/bin/snapshooter</str>
  361 + <str name="dir">.</str>
  362 + <bool name="wait">true</bool>
  363 + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
  364 + <arr name="env"> <str>MYVAR=val1</str> </arr>
  365 + </listener>
  366 + -->
  367 +
  368 + <!-- Enables a transaction log, currently used for real-time get.
  369 + "dir" - the target directory for transaction logs, defaults to the
  370 + solr data directory. -->
  371 + <updateLog>
  372 + <str name="dir">${solr.data.dir:}</str>
  373 + </updateLog>
  374 +
  375 +
  376 + </updateHandler>
  377 +
  378 + <!-- IndexReaderFactory
  379 +
  380 + Use the following format to specify a custom IndexReaderFactory,
  381 + which allows for alternate IndexReader implementations.
  382 +
  383 + ** Experimental Feature **
  384 +
  385 + Please note - Using a custom IndexReaderFactory may prevent
  386 + certain other features from working. The API to
  387 + IndexReaderFactory may change without warning or may even be
  388 + removed from future releases if the problems cannot be
  389 + resolved.
  390 +
  391 +
  392 + ** Features that may not work with custom IndexReaderFactory **
  393 +
  394 + The ReplicationHandler assumes a disk-resident index. Using a
  395 + custom IndexReader implementation may cause incompatibility
  396 + with ReplicationHandler and may cause replication to not work
  397 + correctly. See SOLR-1366 for details.
  398 +
  399 + -->
  400 + <!--
  401 + <indexReaderFactory name="IndexReaderFactory" class="package.class">
  402 + <str name="someArg">Some Value</str>
  403 + </indexReaderFactory >
  404 + -->
  405 + <!-- By explicitly declaring the Factory, the termIndexDivisor can
  406 + be specified.
  407 + -->
  408 + <!--
  409 + <indexReaderFactory name="IndexReaderFactory"
  410 + class="solr.StandardIndexReaderFactory">
  411 + <int name="setTermIndexDivisor">12</int>
  412 + </indexReaderFactory >
  413 + -->
  414 +
  415 + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  416 + Query section - these settings control query time things like caches
  417 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
  418 + <query>
  419 + <!-- Max Boolean Clauses
  420 +
  421 + Maximum number of clauses in each BooleanQuery, an exception
  422 + is thrown if exceeded.
  423 +
  424 + ** WARNING **
  425 +
  426 + This option actually modifies a global Lucene property that
  427 + will affect all SolrCores. If multiple solrconfig.xml files
  428 + disagree on this property, the value at any given moment will
  429 + be based on the last SolrCore to be initialized.
  430 +
  431 + -->
  432 + <maxBooleanClauses>1024</maxBooleanClauses>
  433 +
  434 +
  435 + <!-- Solr Internal Query Caches
  436 +
  437 + There are two implementations of cache available for Solr,
  438 + LRUCache, based on a synchronized LinkedHashMap, and
  439 + FastLRUCache, based on a ConcurrentHashMap.
  440 +
  441 + FastLRUCache has faster gets and slower puts in single
  442 + threaded operation and thus is generally faster than LRUCache
  443 + when the hit ratio of the cache is high (> 75%), and may be
  444 + faster under other scenarios on multi-cpu systems.
  445 + -->
  446 +
  447 + <!-- Filter Cache
  448 +
  449 + Cache used by SolrIndexSearcher for filters (DocSets),
  450 + unordered sets of *all* documents that match a query. When a
  451 + new searcher is opened, its caches may be prepopulated or
  452 + "autowarmed" using data from caches in the old searcher.
  453 + autowarmCount is the number of items to prepopulate. For
  454 + LRUCache, the autowarmed items will be the most recently
  455 + accessed items.
  456 +
  457 + Parameters:
  458 + class - the SolrCache implementation LRUCache or
  459 + (LRUCache or FastLRUCache)
  460 + size - the maximum number of entries in the cache
  461 + initialSize - the initial capacity (number of entries) of
  462 + the cache. (see java.util.HashMap)
  463 + autowarmCount - the number of entries to prepopulate from
  464 + and old cache.
  465 + -->
  466 + <filterCache class="solr.FastLRUCache"
  467 + size="512"
  468 + initialSize="512"
  469 + autowarmCount="0"/>
  470 +
  471 + <!-- Query Result Cache
  472 +
  473 + Caches results of searches - ordered lists of document ids
  474 + (DocList) based on a query, a sort, and the range of documents requested.
  475 + -->
  476 + <queryResultCache class="solr.LRUCache"
  477 + size="512"
  478 + initialSize="512"
  479 + autowarmCount="0"/>
  480 +
  481 + <!-- Document Cache
  482 +
  483 + Caches Lucene Document objects (the stored fields for each
  484 + document). Since Lucene internal document ids are transient,
  485 + this cache will not be autowarmed.
  486 + -->
  487 + <documentCache class="solr.LRUCache"
  488 + size="512"
  489 + initialSize="512"
  490 + autowarmCount="0"/>
  491 +
  492 + <!-- Field Value Cache
  493 +
  494 + Cache used to hold field values that are quickly accessible
  495 + by document id. The fieldValueCache is created by default
  496 + even if not configured here.
  497 + -->
  498 + <!--
  499 + <fieldValueCache class="solr.FastLRUCache"
  500 + size="512"
  501 + autowarmCount="128"
  502 + showItems="32" />
  503 + -->
  504 +
  505 + <!-- Custom Cache
  506 +
  507 + Example of a generic cache. These caches may be accessed by
  508 + name through SolrIndexSearcher.getCache(),cacheLookup(), and
  509 + cacheInsert(). The purpose is to enable easy caching of
  510 + user/application level data. The regenerator argument should
  511 + be specified as an implementation of solr.CacheRegenerator
  512 + if autowarming is desired.
  513 + -->
  514 + <!--
  515 + <cache name="myUserCache"
  516 + class="solr.LRUCache"
  517 + size="4096"
  518 + initialSize="1024"
  519 + autowarmCount="1024"
  520 + regenerator="com.mycompany.MyRegenerator"
  521 + />
  522 + -->
  523 +
  524 +
  525 + <!-- Lazy Field Loading
  526 +
  527 + If true, stored fields that are not requested will be loaded
  528 + lazily. This can result in a significant speed improvement
  529 + if the usual case is to not load all stored fields,
  530 + especially if the skipped fields are large compressed text
  531 + fields.
  532 + -->
  533 + <enableLazyFieldLoading>true</enableLazyFieldLoading>
  534 +
  535 + <!-- Use Filter For Sorted Query
  536 +
  537 + A possible optimization that attempts to use a filter to
  538 + satisfy a search. If the requested sort does not include
  539 + score, then the filterCache will be checked for a filter
  540 + matching the query. If found, the filter will be used as the
  541 + source of document ids, and then the sort will be applied to
  542 + that.
  543 +
  544 + For most situations, this will not be useful unless you
  545 + frequently get the same search repeatedly with different sort
  546 + options, and none of them ever use "score"
  547 + -->
  548 + <!--
  549 + <useFilterForSortedQuery>true</useFilterForSortedQuery>
  550 + -->
  551 +
  552 + <!-- Result Window Size
  553 +
  554 + An optimization for use with the queryResultCache. When a search
  555 + is requested, a superset of the requested number of document ids
  556 + are collected. For example, if a search for a particular query
  557 + requests matching documents 10 through 19, and queryWindowSize is 50,
  558 + then documents 0 through 49 will be collected and cached. Any further
  559 + requests in that range can be satisfied via the cache.
  560 + -->
  561 + <queryResultWindowSize>20</queryResultWindowSize>
  562 +
  563 + <!-- Maximum number of documents to cache for any entry in the
  564 + queryResultCache.
  565 + -->
  566 + <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
  567 +
  568 + <!-- Query Related Event Listeners
  569 +
  570 + Various IndexSearcher related events can trigger Listeners to
  571 + take actions.
  572 +
  573 + newSearcher - fired whenever a new searcher is being prepared
  574 + and there is a current searcher handling requests (aka
  575 + registered). It can be used to prime certain caches to
  576 + prevent long request times for certain requests.
  577 +
  578 + firstSearcher - fired whenever a new searcher is being
  579 + prepared but there is no current registered searcher to handle
  580 + requests or to gain autowarming data from.
  581 +
  582 +
  583 + -->
  584 + <!-- QuerySenderListener takes an array of NamedList and executes a
  585 + local query request for each NamedList in sequence.
  586 + -->
  587 + <listener event="newSearcher" class="solr.QuerySenderListener">
  588 + <arr name="queries">
  589 + <!--
  590 + <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
  591 + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
  592 + -->
  593 + </arr>
  594 + </listener>
  595 + <listener event="firstSearcher" class="solr.QuerySenderListener">
  596 + <arr name="queries">
  597 + <lst>
  598 + <str name="q">static firstSearcher warming in solrconfig.xml</str>
  599 + </lst>
  600 + </arr>
  601 + </listener>
  602 +
  603 + <!-- Use Cold Searcher
  604 +
  605 + If a search request comes in and there is no current
  606 + registered searcher, then immediately register the still
  607 + warming searcher and use it. If "false" then all requests
  608 + will block until the first searcher is done warming.
  609 + -->
  610 + <useColdSearcher>false</useColdSearcher>
  611 +
  612 + <!-- Max Warming Searchers
  613 +
  614 + Maximum number of searchers that may be warming in the
  615 + background concurrently. An error is returned if this limit
  616 + is exceeded.
  617 +
  618 + Recommend values of 1-2 for read-only slaves, higher for
  619 + masters w/o cache warming.
  620 + -->
  621 + <maxWarmingSearchers>2</maxWarmingSearchers>
  622 +
  623 + </query>
  624 +
  625 +
  626 + <!-- Request Dispatcher
  627 +
  628 + This section contains instructions for how the SolrDispatchFilter
  629 + should behave when processing requests for this SolrCore.
  630 +
  631 + handleSelect is a legacy option that affects the behavior of requests
  632 + such as /select?qt=XXX
  633 +
  634 + handleSelect="true" will cause the SolrDispatchFilter to process
  635 + the request and dispatch the query to a handler specified by the
  636 + "qt" param, assuming "/select" isn't already registered.
  637 +
  638 + handleSelect="false" will cause the SolrDispatchFilter to
  639 + ignore "/select" requests, resulting in a 404 unless a handler
  640 + is explicitly registered with the name "/select"
  641 +
  642 + handleSelect="true" is not recommended for new users, but is the default
  643 + for backwards compatibility
  644 + -->
  645 + <requestDispatcher handleSelect="false" >
  646 + <!-- Request Parsing
  647 +
  648 + These settings indicate how Solr Requests may be parsed, and
  649 + what restrictions may be placed on the ContentStreams from
  650 + those requests
  651 +
  652 + enableRemoteStreaming - enables use of the stream.file
  653 + and stream.url parameters for specifying remote streams.
  654 +
  655 + multipartUploadLimitInKB - specifies the max size of
  656 + Multipart File Uploads that Solr will allow in a Request.
  657 +
  658 + *** WARNING ***
  659 + The settings below authorize Solr to fetch remote files, You
  660 + should make sure your system has some authentication before
  661 + using enableRemoteStreaming="true"
  662 +
  663 + -->
  664 + <requestParsers enableRemoteStreaming="true"
  665 + multipartUploadLimitInKB="2048000" />
  666 +
  667 + <!-- HTTP Caching
  668 +
  669 + Set HTTP caching related parameters (for proxy caches and clients).
  670 +
  671 + The options below instruct Solr not to output any HTTP Caching
  672 + related headers
  673 + -->
  674 + <httpCaching never304="true" />
  675 + <!-- If you include a <cacheControl> directive, it will be used to
  676 + generate a Cache-Control header (as well as an Expires header
  677 + if the value contains "max-age=")
  678 +
  679 + By default, no Cache-Control header is generated.
  680 +
  681 + You can use the <cacheControl> option even if you have set
  682 + never304="true"
  683 + -->
  684 + <!--
  685 + <httpCaching never304="true" >
  686 + <cacheControl>max-age=30, public</cacheControl>
  687 + </httpCaching>
  688 + -->
  689 + <!-- To enable Solr to respond with automatically generated HTTP
  690 + Caching headers, and to response to Cache Validation requests
  691 + correctly, set the value of never304="false"
  692 +
  693 + This will cause Solr to generate Last-Modified and ETag
  694 + headers based on the properties of the Index.
  695 +
  696 + The following options can also be specified to affect the
  697 + values of these headers...
  698 +
  699 + lastModFrom - the default value is "openTime" which means the
  700 + Last-Modified value (and validation against If-Modified-Since
  701 + requests) will all be relative to when the current Searcher
  702 + was opened. You can change it to lastModFrom="dirLastMod" if
  703 + you want the value to exactly correspond to when the physical
  704 + index was last modified.
  705 +
  706 + etagSeed="..." is an option you can change to force the ETag
  707 + header (and validation against If-None-Match requests) to be
  708 + different even if the index has not changed (ie: when making
  709 + significant changes to your config file)
  710 +
  711 + (lastModifiedFrom and etagSeed are both ignored if you use
  712 + the never304="true" option)
  713 + -->
  714 + <!--
  715 + <httpCaching lastModifiedFrom="openTime"
  716 + etagSeed="Solr">
  717 + <cacheControl>max-age=30, public</cacheControl>
  718 + </httpCaching>
  719 + -->
  720 + </requestDispatcher>
  721 +
  722 + <!-- Request Handlers
  723 +
  724 + http://wiki.apache.org/solr/SolrRequestHandler
  725 +
  726 + Incoming queries will be dispatched to a specific handler by name
  727 + based on the path specified in the request.
  728 +
  729 + Legacy behavior: If the request path uses "/select" but no Request
  730 + Handler has that name, and if handleSelect="true" has been specified in
  731 + the requestDispatcher, then the Request Handler is dispatched based on
  732 + the qt parameter. Handlers without a leading '/' are accessed this way
  733 + like so: http://host/app/[core/]select?qt=name If no qt is
  734 + given, then the requestHandler that declares default="true" will be
  735 + used or the one named "standard".
  736 +
  737 + If a Request Handler is declared with startup="lazy", then it will
  738 + not be initialized until the first request that uses it.
  739 +
  740 + -->
  741 + <!-- SearchHandler
  742 +
  743 + http://wiki.apache.org/solr/SearchHandler
  744 +
  745 + For processing Search Queries, the primary Request Handler
  746 + provided with Solr is "SearchHandler" It delegates to a sequent
  747 + of SearchComponents (see below) and supports distributed
  748 + queries across multiple shards
  749 + -->
  750 + <requestHandler name="/select" class="solr.SearchHandler">
  751 + <!-- default values for query parameters can be specified, these
  752 + will be overridden by parameters in the request
  753 + -->
  754 + <lst name="defaults">
  755 + <str name="echoParams">explicit</str>
  756 + <int name="rows">10</int>
  757 + <str name="df">text</str>
  758 + </lst>
  759 + <!-- In addition to defaults, "appends" params can be specified
  760 + to identify values which should be appended to the list of
  761 + multi-val params from the query (or the existing "defaults").
  762 + -->
  763 + <!-- In this example, the param "fq=instock:true" would be appended to
  764 + any query time fq params the user may specify, as a mechanism for
  765 + partitioning the index, independent of any user selected filtering
  766 + that may also be desired (perhaps as a result of faceted searching).
  767 +
  768 + NOTE: there is *absolutely* nothing a client can do to prevent these
  769 + "appends" values from being used, so don't use this mechanism
  770 + unless you are sure you always want it.
  771 + -->
  772 + <!--
  773 + <lst name="appends">
  774 + <str name="fq">inStock:true</str>
  775 + </lst>
  776 + -->
  777 + <!-- "invariants" are a way of letting the Solr maintainer lock down
  778 + the options available to Solr clients. Any params values
  779 + specified here are used regardless of what values may be specified
  780 + in either the query, the "defaults", or the "appends" params.
  781 +
  782 + In this example, the facet.field and facet.query params would
  783 + be fixed, limiting the facets clients can use. Faceting is
  784 + not turned on by default - but if the client does specify
  785 + facet=true in the request, these are the only facets they
  786 + will be able to see counts for; regardless of what other
  787 + facet.field or facet.query params they may specify.
  788 +
  789 + NOTE: there is *absolutely* nothing a client can do to prevent these
  790 + "invariants" values from being used, so don't use this mechanism
  791 + unless you are sure you always want it.
  792 + -->
  793 + <!--
  794 + <lst name="invariants">
  795 + <str name="facet.field">cat</str>
  796 + <str name="facet.field">manu_exact</str>
  797 + <str name="facet.query">price:[* TO 500]</str>
  798 + <str name="facet.query">price:[500 TO *]</str>
  799 + </lst>
  800 + -->
  801 + <!-- If the default list of SearchComponents is not desired, that
  802 + list can either be overridden completely, or components can be
  803 + prepended or appended to the default list. (see below)
  804 + -->
  805 + <!--
  806 + <arr name="components">
  807 + <str>nameOfCustomComponent1</str>
  808 + <str>nameOfCustomComponent2</str>
  809 + </arr>
  810 + -->
  811 +
  812 + </requestHandler>
  813 +
  814 + <!-- A request handler that returns indented JSON by default -->
  815 + <requestHandler name="/query" class="solr.SearchHandler">
  816 + <lst name="defaults">
  817 + <str name="echoParams">explicit</str>
  818 + <str name="wt">json</str>
  819 + <str name="indent">true</str>
  820 + <str name="df">text</str>
  821 + </lst>
  822 + </requestHandler>
  823 +
  824 +
  825 + <!-- realtime get handler, guaranteed to return the latest stored fields of
  826 + any document, without the need to commit or open a new searcher. The
  827 + current implementation relies on the updateLog feature being enabled. -->
  828 + <requestHandler name="/get" class="solr.RealTimeGetHandler">
  829 + <lst name="defaults">
  830 + <str name="omitHeader">true</str>
  831 + <str name="wt">json</str>
  832 + <str name="indent">true</str>
  833 + </lst>
  834 + </requestHandler>
  835 +
  836 +
  837 + <!-- A Robust Example
  838 +
  839 + This example SearchHandler declaration shows off usage of the
  840 + SearchHandler with many defaults declared
  841 +
  842 + Note that multiple instances of the same Request Handler
  843 + (SearchHandler) can be registered multiple times with different
  844 + names (and different init parameters)
  845 + -->
  846 + <requestHandler name="/browse" class="solr.SearchHandler">
  847 + <lst name="defaults">
  848 + <str name="echoParams">explicit</str>
  849 +
  850 + <!-- VelocityResponseWriter settings -->
  851 + <str name="wt">velocity</str>
  852 + <str name="v.template">browse</str>
  853 + <str name="v.layout">layout</str>
  854 + <str name="title">Solritas</str>
  855 +
  856 + <!-- Query settings -->
  857 + <str name="defType">edismax</str>
  858 + <str name="qf">
  859 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  860 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
  861 + </str>
  862 + <str name="df">text</str>
  863 + <str name="mm">100%</str>
  864 + <str name="q.alt">*:*</str>
  865 + <str name="rows">10</str>
  866 + <str name="fl">*,score</str>
  867 +
  868 + <str name="mlt.qf">
  869 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  870 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
  871 + </str>
  872 + <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
  873 + <int name="mlt.count">3</int>
  874 +
  875 + <!-- Faceting defaults -->
  876 + <str name="facet">on</str>
  877 + <str name="facet.field">cat</str>
  878 + <str name="facet.field">manu_exact</str>
  879 + <str name="facet.field">content_type</str>
  880 + <str name="facet.field">author_s</str>
  881 + <str name="facet.query">ipod</str>
  882 + <str name="facet.query">GB</str>
  883 + <str name="facet.mincount">1</str>
  884 + <str name="facet.pivot">cat,inStock</str>
  885 + <str name="facet.range.other">after</str>
  886 + <str name="facet.range">price</str>
  887 + <int name="f.price.facet.range.start">0</int>
  888 + <int name="f.price.facet.range.end">600</int>
  889 + <int name="f.price.facet.range.gap">50</int>
  890 + <str name="facet.range">popularity</str>
  891 + <int name="f.popularity.facet.range.start">0</int>
  892 + <int name="f.popularity.facet.range.end">10</int>
  893 + <int name="f.popularity.facet.range.gap">3</int>
  894 + <str name="facet.range">manufacturedate_dt</str>
  895 + <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
  896 + <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
  897 + <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
  898 + <str name="f.manufacturedate_dt.facet.range.other">before</str>
  899 + <str name="f.manufacturedate_dt.facet.range.other">after</str>
  900 +
  901 + <!-- Highlighting defaults -->
  902 + <str name="hl">on</str>
  903 + <str name="hl.fl">content</str>
  904 + <str name="hl.encoder">html</str>
  905 + <str name="hl.simple.pre"><![CDATA[<b>]]></str>
  906 + <str name="hl.simple.post"><![CDATA[</b>]]></str>
  907 + <str name="f.title.hl.fragsize">0</str>
  908 + <str name="f.title.hl.alternateField">title</str>
  909 + <str name="f.name.hl.fragsize">0</str>
  910 + <str name="f.name.hl.alternateField">name</str>
  911 + <str name="f.content.hl.snippets">3</str>
  912 + <str name="f.content.hl.fragsize">1000</str>
  913 + <str name="f.content.hl.alternateField">content</str>
  914 + <str name="f.content.hl.maxAlternateFieldLength">250</str>
  915 +
  916 +
  917 + <!-- Spell checking defaults -->
  918 + <str name="spellcheck">on</str>
  919 + <str name="spellcheck.extendedResults">false</str>
  920 + <str name="spellcheck.count">5</str>
  921 + <str name="spellcheck.alternativeTermCount">2</str>
  922 + <str name="spellcheck.maxResultsForSuggest">5</str>
  923 + <str name="spellcheck.collate">true</str>
  924 + <str name="spellcheck.collateExtendedResults">true</str>
  925 + <str name="spellcheck.maxCollationTries">5</str>
  926 + <str name="spellcheck.maxCollations">3</str>
  927 + </lst>
  928 +
  929 + <!-- append spellchecking to our list of components -->
  930 + <arr name="last-components">
  931 + <str>spellcheck</str>
  932 + </arr>
  933 + </requestHandler>
  934 +
  935 +
  936 + <!-- Update Request Handler.
  937 +
  938 + http://wiki.apache.org/solr/UpdateXmlMessages
  939 +
  940 + The canonical Request Handler for Modifying the Index through
  941 + commands specified using XML, JSON, CSV, or JAVABIN
  942 +
  943 + Note: Since solr1.1 requestHandlers requires a valid content
  944 + type header if posted in the body. For example, curl now
  945 + requires: -H 'Content-type:text/xml; charset=utf-8'
  946 +
  947 + To override the request content type and force a specific
  948 + Content-type, use the request parameter:
  949 + ?update.contentType=text/csv
  950 +
  951 + This handler will pick a response format to match the input
  952 + if the 'wt' parameter is not explicit
  953 + -->
  954 + <requestHandler name="/update" class="solr.UpdateRequestHandler">
  955 + <!-- See below for information on defining
  956 + updateRequestProcessorChains that can be used by name
  957 + on each Update Request
  958 + -->
  959 + <!--
  960 + <lst name="defaults">
  961 + <str name="update.chain">dedupe</str>
  962 + </lst>
  963 + -->
  964 + </requestHandler>
  965 +
  966 + <!-- for back compat with clients using /update/json and /update/csv -->
  967 + <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
  968 + <lst name="defaults">
  969 + <str name="stream.contentType">application/json</str>
  970 + </lst>
  971 + </requestHandler>
  972 + <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
  973 + <lst name="defaults">
  974 + <str name="stream.contentType">application/csv</str>
  975 + </lst>
  976 + </requestHandler>
  977 +
  978 + <!-- Solr Cell Update Request Handler
  979 +
  980 + http://wiki.apache.org/solr/ExtractingRequestHandler
  981 +
  982 + -->
  983 + <requestHandler name="/update/extract"
  984 + startup="lazy"
  985 + class="solr.extraction.ExtractingRequestHandler" >
  986 + <lst name="defaults">
  987 + <str name="lowernames">true</str>
  988 + <str name="uprefix">ignored_</str>
  989 +
  990 + <!-- capture link hrefs but ignore div attributes -->
  991 + <str name="captureAttr">true</str>
  992 + <str name="fmap.a">links</str>
  993 + <str name="fmap.div">ignored_</str>
  994 + </lst>
  995 + </requestHandler>
  996 +
  997 +
  998 + <!-- Field Analysis Request Handler
  999 +
  1000 + RequestHandler that provides much the same functionality as
  1001 + analysis.jsp. Provides the ability to specify multiple field
  1002 + types and field names in the same request and outputs
  1003 + index-time and query-time analysis for each of them.
  1004 +
  1005 + Request parameters are:
  1006 + analysis.fieldname - field name whose analyzers are to be used
  1007 +
  1008 + analysis.fieldtype - field type whose analyzers are to be used
  1009 + analysis.fieldvalue - text for index-time analysis
  1010 + q (or analysis.q) - text for query time analysis
  1011 + analysis.showmatch (true|false) - When set to true and when
  1012 + query analysis is performed, the produced tokens of the
  1013 + field value analysis will be marked as "matched" for every
  1014 + token that is produces by the query analysis
  1015 + -->
  1016 + <requestHandler name="/analysis/field"
  1017 + startup="lazy"
  1018 + class="solr.FieldAnalysisRequestHandler" />
  1019 +
  1020 +
  1021 + <!-- Document Analysis Handler
  1022 +
  1023 + http://wiki.apache.org/solr/AnalysisRequestHandler
  1024 +
  1025 + An analysis handler that provides a breakdown of the analysis
  1026 + process of provided documents. This handler expects a (single)
  1027 + content stream with the following format:
  1028 +
  1029 + <docs>
  1030 + <doc>
  1031 + <field name="id">1</field>
  1032 + <field name="name">The Name</field>
  1033 + <field name="text">The Text Value</field>
  1034 + </doc>
  1035 + <doc>...</doc>
  1036 + <doc>...</doc>
  1037 + ...
  1038 + </docs>
  1039 +
  1040 + Note: Each document must contain a field which serves as the
  1041 + unique key. This key is used in the returned response to associate
  1042 + an analysis breakdown to the analyzed document.
  1043 +
  1044 + Like the FieldAnalysisRequestHandler, this handler also supports
  1045 + query analysis by sending either an "analysis.query" or "q"
  1046 + request parameter that holds the query text to be analyzed. It
  1047 + also supports the "analysis.showmatch" parameter which when set to
  1048 + true, all field tokens that match the query tokens will be marked
  1049 + as a "match".
  1050 + -->
  1051 + <requestHandler name="/analysis/document"
  1052 + class="solr.DocumentAnalysisRequestHandler"
  1053 + startup="lazy" />
  1054 +
  1055 + <!-- Admin Handlers
  1056 +
  1057 + Admin Handlers - This will register all the standard admin
  1058 + RequestHandlers.
  1059 + -->
  1060 + <requestHandler name="/admin/"
  1061 + class="solr.admin.AdminHandlers" />
  1062 + <!-- This single handler is equivalent to the following... -->
  1063 + <!--
  1064 + <requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />
  1065 + <requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />
  1066 + <requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />
  1067 + <requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />
  1068 + <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
  1069 + <requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >
  1070 + -->
  1071 + <!-- If you wish to hide files under ${solr.home}/conf, explicitly
  1072 + register the ShowFileRequestHandler using:
  1073 + -->
  1074 + <!--
  1075 + <requestHandler name="/admin/file"
  1076 + class="solr.admin.ShowFileRequestHandler" >
  1077 + <lst name="invariants">
  1078 + <str name="hidden">synonyms.txt</str>
  1079 + <str name="hidden">anotherfile.txt</str>
  1080 + </lst>
  1081 + </requestHandler>
  1082 + -->
  1083 +
  1084 + <!-- ping/healthcheck -->
  1085 + <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
  1086 + <lst name="invariants">
  1087 + <str name="q">solrpingquery</str>
  1088 + </lst>
  1089 + <lst name="defaults">
  1090 + <str name="echoParams">all</str>
  1091 + </lst>
  1092 + <!-- An optional feature of the PingRequestHandler is to configure the
  1093 + handler with a "healthcheckFile" which can be used to enable/disable
  1094 + the PingRequestHandler.
  1095 + relative paths are resolved against the data dir
  1096 + -->
  1097 + <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
  1098 + </requestHandler>
  1099 +
  1100 + <!-- Echo the request contents back to the client -->
  1101 + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
  1102 + <lst name="defaults">
  1103 + <str name="echoParams">explicit</str>
  1104 + <str name="echoHandler">true</str>
  1105 + </lst>
  1106 + </requestHandler>
  1107 +
  1108 + <!-- Solr Replication
  1109 +
  1110 + The SolrReplicationHandler supports replicating indexes from a
  1111 + "master" used for indexing and "slaves" used for queries.
  1112 +
  1113 + http://wiki.apache.org/solr/SolrReplication
  1114 +
  1115 + It is also neccessary for SolrCloud to function (in Cloud mode, the
  1116 + replication handler is used to bulk transfer segments when nodes
  1117 + are added or need to recover).
  1118 +
  1119 + https://wiki.apache.org/solr/SolrCloud/
  1120 + -->
  1121 + <requestHandler name="/replication" class="solr.ReplicationHandler" >
  1122 + <!--
  1123 + To enable simple master/slave replication, uncomment one of the
  1124 + sections below, depending on wether this solr instance should be
  1125 + the "master" or a "slave". If this instance is a "slave" you will
  1126 + also need to fill in the masterUrl to point to a real machine.
  1127 + -->
  1128 + <!--
  1129 + <lst name="master">
  1130 + <str name="replicateAfter">commit</str>
  1131 + <str name="replicateAfter">startup</str>
  1132 + <str name="confFiles">schema.xml,stopwords.txt</str>
  1133 + </lst>
  1134 + -->
  1135 + <!--
  1136 + <lst name="slave">
  1137 + <str name="masterUrl">http://your-master-hostname:8983/solr</str>
  1138 + <str name="pollInterval">00:00:60</str>
  1139 + </lst>
  1140 + -->
  1141 + </requestHandler>
  1142 +
  1143 + <!-- Search Components
  1144 +
  1145 + Search components are registered to SolrCore and used by
  1146 + instances of SearchHandler (which can access them by name)
  1147 +
  1148 + By default, the following components are available:
  1149 +
  1150 + <searchComponent name="query" class="solr.QueryComponent" />
  1151 + <searchComponent name="facet" class="solr.FacetComponent" />
  1152 + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
  1153 + <searchComponent name="highlight" class="solr.HighlightComponent" />
  1154 + <searchComponent name="stats" class="solr.StatsComponent" />
  1155 + <searchComponent name="debug" class="solr.DebugComponent" />
  1156 +
  1157 + Default configuration in a requestHandler would look like:
  1158 +
  1159 + <arr name="components">
  1160 + <str>query</str>
  1161 + <str>facet</str>
  1162 + <str>mlt</str>
  1163 + <str>highlight</str>
  1164 + <str>stats</str>
  1165 + <str>debug</str>
  1166 + </arr>
  1167 +
  1168 + If you register a searchComponent to one of the standard names,
  1169 + that will be used instead of the default.
  1170 +
  1171 + To insert components before or after the 'standard' components, use:
  1172 +
  1173 + <arr name="first-components">
  1174 + <str>myFirstComponentName</str>
  1175 + </arr>
  1176 +
  1177 + <arr name="last-components">
  1178 + <str>myLastComponentName</str>
  1179 + </arr>
  1180 +
  1181 + NOTE: The component registered with the name "debug" will
  1182 + always be executed after the "last-components"
  1183 +
  1184 + -->
  1185 +
  1186 + <!-- Spell Check
  1187 +
  1188 + The spell check component can return a list of alternative spelling
  1189 + suggestions.
  1190 +
  1191 + http://wiki.apache.org/solr/SpellCheckComponent
  1192 + -->
  1193 + <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
  1194 +
  1195 + <str name="queryAnalyzerFieldType">textSpell</str>
  1196 +
  1197 + <!-- Multiple "Spell Checkers" can be declared and used by this
  1198 + component
  1199 + -->
  1200 +
  1201 + <!-- a spellchecker built from a field of the main index -->
  1202 + <lst name="spellchecker">
  1203 + <str name="name">default</str>
  1204 + <str name="field">name</str>
  1205 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1206 + <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
  1207 + <str name="distanceMeasure">internal</str>
  1208 + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
  1209 + <float name="accuracy">0.5</float>
  1210 + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
  1211 + <int name="maxEdits">2</int>
  1212 + <!-- the minimum shared prefix when enumerating terms -->
  1213 + <int name="minPrefix">1</int>
  1214 + <!-- maximum number of inspections per result. -->
  1215 + <int name="maxInspections">5</int>
  1216 + <!-- minimum length of a query term to be considered for correction -->
  1217 + <int name="minQueryLength">4</int>
  1218 + <!-- maximum threshold of documents a query term can appear to be considered for correction -->
  1219 + <float name="maxQueryFrequency">0.01</float>
  1220 + <!-- uncomment this to require suggestions to occur in 1% of the documents
  1221 + <float name="thresholdTokenFrequency">.01</float>
  1222 + -->
  1223 + </lst>
  1224 +
  1225 + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
  1226 + <lst name="spellchecker">
  1227 + <str name="name">wordbreak</str>
  1228 + <str name="classname">solr.WordBreakSolrSpellChecker</str>
  1229 + <str name="field">name</str>
  1230 + <str name="combineWords">true</str>
  1231 + <str name="breakWords">true</str>
  1232 + <int name="maxChanges">10</int>
  1233 + </lst>
  1234 +
  1235 + <!-- a spellchecker that uses a different distance measure -->
  1236 + <!--
  1237 + <lst name="spellchecker">
  1238 + <str name="name">jarowinkler</str>
  1239 + <str name="field">spell</str>
  1240 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1241 + <str name="distanceMeasure">
  1242 + org.apache.lucene.search.spell.JaroWinklerDistance
  1243 + </str>
  1244 + </lst>
  1245 + -->
  1246 +
  1247 + <!-- a spellchecker that use an alternate comparator
  1248 +
  1249 + comparatorClass be one of:
  1250 + 1. score (default)
  1251 + 2. freq (Frequency first, then score)
  1252 + 3. A fully qualified class name
  1253 + -->
  1254 + <!--
  1255 + <lst name="spellchecker">
  1256 + <str name="name">freq</str>
  1257 + <str name="field">lowerfilt</str>
  1258 + <str name="classname">solr.DirectSolrSpellChecker</str>
  1259 + <str name="comparatorClass">freq</str>
  1260 + -->
  1261 +
  1262 + <!-- A spellchecker that reads the list of words from a file -->
  1263 + <!--
  1264 + <lst name="spellchecker">
  1265 + <str name="classname">solr.FileBasedSpellChecker</str>
  1266 + <str name="name">file</str>
  1267 + <str name="sourceLocation">spellings.txt</str>
  1268 + <str name="characterEncoding">UTF-8</str>
  1269 + <str name="spellcheckIndexDir">spellcheckerFile</str>
  1270 + </lst>
  1271 + -->
  1272 + </searchComponent>
  1273 +
  1274 + <!-- A request handler for demonstrating the spellcheck component.
  1275 +
  1276 + NOTE: This is purely as an example. The whole purpose of the
  1277 + SpellCheckComponent is to hook it into the request handler that
  1278 + handles your normal user queries so that a separate request is
  1279 + not needed to get suggestions.
  1280 +
  1281 + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
  1282 + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
  1283 +
  1284 + See http://wiki.apache.org/solr/SpellCheckComponent for details
  1285 + on the request parameters.
  1286 + -->
  1287 + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
  1288 + <lst name="defaults">
  1289 + <str name="df">text</str>
  1290 + <!-- Solr will use suggestions from both the 'default' spellchecker
  1291 + and from the 'wordbreak' spellchecker and combine them.
  1292 + collations (re-written queries) can include a combination of
  1293 + corrections from both spellcheckers -->
  1294 + <str name="spellcheck.dictionary">default</str>
  1295 + <str name="spellcheck.dictionary">wordbreak</str>
  1296 + <str name="spellcheck">on</str>
  1297 + <str name="spellcheck.extendedResults">true</str>
  1298 + <str name="spellcheck.count">10</str>
  1299 + <str name="spellcheck.alternativeTermCount">5</str>
  1300 + <str name="spellcheck.maxResultsForSuggest">5</str>
  1301 + <str name="spellcheck.collate">true</str>
  1302 + <str name="spellcheck.collateExtendedResults">true</str>
  1303 + <str name="spellcheck.maxCollationTries">10</str>
  1304 + <str name="spellcheck.maxCollations">5</str>
  1305 + </lst>
  1306 + <arr name="last-components">
  1307 + <str>spellcheck</str>
  1308 + </arr>
  1309 + </requestHandler>
  1310 +
  1311 + <!-- Term Vector Component
  1312 +
  1313 + http://wiki.apache.org/solr/TermVectorComponent
  1314 + -->
  1315 + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
  1316 +
  1317 + <!-- A request handler for demonstrating the term vector component
  1318 +
  1319 + This is purely as an example.
  1320 +
  1321 + In reality you will likely want to add the component to your
  1322 + already specified request handlers.
  1323 + -->
  1324 + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
  1325 + <lst name="defaults">
  1326 + <str name="df">text</str>
  1327 + <bool name="tv">true</bool>
  1328 + </lst>
  1329 + <arr name="last-components">
  1330 + <str>tvComponent</str>
  1331 + </arr>
  1332 + </requestHandler>
  1333 +
  1334 + <!-- Clustering Component
  1335 +
  1336 + http://wiki.apache.org/solr/ClusteringComponent
  1337 +
  1338 + You'll need to set the solr.cluster.enabled system property
  1339 + when running solr to run with clustering enabled:
  1340 +
  1341 + java -Dsolr.clustering.enabled=true -jar start.jar
  1342 +
  1343 + -->
  1344 + <searchComponent name="clustering"
  1345 + enable="${solr.clustering.enabled:false}"
  1346 + class="solr.clustering.ClusteringComponent" >
  1347 + <!-- Declare an engine -->
  1348 + <lst name="engine">
  1349 + <!-- The name, only one can be named "default" -->
  1350 + <str name="name">default</str>
  1351 +
  1352 + <!-- Class name of Carrot2 clustering algorithm.
  1353 +
  1354 + Currently available algorithms are:
  1355 +
  1356 + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
  1357 + * org.carrot2.clustering.stc.STCClusteringAlgorithm
  1358 + * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
  1359 +
  1360 + See http://project.carrot2.org/algorithms.html for the
  1361 + algorithm's characteristics.
  1362 + -->
  1363 + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
  1364 +
  1365 + <!-- Overriding values for Carrot2 default algorithm attributes.
  1366 +
  1367 + For a description of all available attributes, see:
  1368 + http://download.carrot2.org/stable/manual/#chapter.components.
  1369 + Use attribute key as name attribute of str elements
  1370 + below. These can be further overridden for individual
  1371 + requests by specifying attribute key as request parameter
  1372 + name and attribute value as parameter value.
  1373 + -->
  1374 + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
  1375 +
  1376 + <!-- Location of Carrot2 lexical resources.
  1377 +
  1378 + A directory from which to load Carrot2-specific stop words
  1379 + and stop labels. Absolute or relative to Solr config directory.
  1380 + If a specific resource (e.g. stopwords.en) is present in the
  1381 + specified dir, it will completely override the corresponding
  1382 + default one that ships with Carrot2.
  1383 +
  1384 + For an overview of Carrot2 lexical resources, see:
  1385 + http://download.carrot2.org/head/manual/#chapter.lexical-resources
  1386 + -->
  1387 + <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
  1388 +
  1389 + <!-- The language to assume for the documents.
  1390 +
  1391 + For a list of allowed values, see:
  1392 + http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
  1393 + -->
  1394 + <str name="MultilingualClustering.defaultLanguage">PORTUGUESE</str>
  1395 + </lst>
  1396 + <lst name="engine">
  1397 + <str name="name">stc</str>
  1398 + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
  1399 + </lst>
  1400 + </searchComponent>
  1401 +
  1402 + <!-- A request handler for demonstrating the clustering component
  1403 +
  1404 + This is purely as an example.
  1405 +
  1406 + In reality you will likely want to add the component to your
  1407 + already specified request handlers.
  1408 + -->
  1409 + <requestHandler name="/clustering"
  1410 + startup="lazy"
  1411 + enable="${solr.clustering.enabled:false}"
  1412 + class="solr.SearchHandler">
  1413 + <lst name="defaults">
  1414 + <bool name="clustering">true</bool>
  1415 + <str name="clustering.engine">default</str>
  1416 + <bool name="clustering.results">true</bool>
  1417 + <!-- The title field -->
  1418 + <str name="carrot.title">name</str>
  1419 + <str name="carrot.url">id</str>
  1420 + <!-- The field to cluster on -->
  1421 + <str name="carrot.snippet">features</str>
  1422 + <!-- produce summaries -->
  1423 + <bool name="carrot.produceSummary">true</bool>
  1424 + <!-- the maximum number of labels per cluster -->
  1425 + <!--<int name="carrot.numDescriptions">5</int>-->
  1426 + <!-- produce sub clusters -->
  1427 + <bool name="carrot.outputSubClusters">false</bool>
  1428 +
  1429 + <str name="defType">edismax</str>
  1430 + <str name="qf">
  1431 + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
  1432 + </str>
  1433 + <str name="q.alt">*:*</str>
  1434 + <str name="rows">10</str>
  1435 + <str name="fl">*,score</str>
  1436 + </lst>
  1437 + <arr name="last-components">
  1438 + <str>clustering</str>
  1439 + </arr>
  1440 + </requestHandler>
  1441 +
  1442 + <!-- Terms Component
  1443 +
  1444 + http://wiki.apache.org/solr/TermsComponent
  1445 +
  1446 + A component to return terms and document frequency of those
  1447 + terms
  1448 + -->
  1449 + <searchComponent name="terms" class="solr.TermsComponent"/>
  1450 +
  1451 + <!-- A request handler for demonstrating the terms component -->
  1452 + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
  1453 + <lst name="defaults">
  1454 + <bool name="terms">true</bool>
  1455 + <bool name="distrib">false</bool>
  1456 + </lst>
  1457 + <arr name="components">
  1458 + <str>terms</str>
  1459 + </arr>
  1460 + </requestHandler>
  1461 +
  1462 +
  1463 + <!-- Query Elevation Component
  1464 +
  1465 + http://wiki.apache.org/solr/QueryElevationComponent
  1466 +
  1467 + a search component that enables you to configure the top
  1468 + results for a given query regardless of the normal lucene
  1469 + scoring.
  1470 + -->
  1471 + <searchComponent name="elevator" class="solr.QueryElevationComponent" >
  1472 + <!-- pick a fieldType to analyze queries -->
  1473 + <str name="queryFieldType">string</str>
  1474 + <str name="config-file">elevate.xml</str>
  1475 + </searchComponent>
  1476 +
  1477 + <!-- A request handler for demonstrating the elevator component -->
  1478 + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
  1479 + <lst name="defaults">
  1480 + <str name="echoParams">explicit</str>
  1481 + <str name="df">text</str>
  1482 + </lst>
  1483 + <arr name="last-components">
  1484 + <str>elevator</str>
  1485 + </arr>
  1486 + </requestHandler>
  1487 +
  1488 + <!-- Highlighting Component
  1489 +
  1490 + http://wiki.apache.org/solr/HighlightingParameters
  1491 + -->
  1492 + <searchComponent class="solr.HighlightComponent" name="highlight">
  1493 + <highlighting>
  1494 + <!-- Configure the standard fragmenter -->
  1495 + <!-- This could most likely be commented out in the "default" case -->
  1496 + <fragmenter name="gap"
  1497 + default="true"
  1498 + class="solr.highlight.GapFragmenter">
  1499 + <lst name="defaults">
  1500 + <int name="hl.fragsize">250</int>
  1501 + </lst>
  1502 + </fragmenter>
  1503 +
  1504 + <!-- A regular-expression-based fragmenter
  1505 + (for sentence extraction)
  1506 + -->
  1507 + <fragmenter name="regex"
  1508 + class="solr.highlight.RegexFragmenter">
  1509 + <lst name="defaults">
  1510 + <!-- slightly smaller fragsizes work better because of slop -->
  1511 + <int name="hl.fragsize">250</int>
  1512 + <!-- allow 50% slop on fragment sizes -->
  1513 + <float name="hl.regex.slop">0.5</float>
  1514 + <!-- a basic sentence pattern -->
  1515 + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
  1516 + </lst>
  1517 + </fragmenter>
  1518 +
  1519 + <!-- Configure the standard formatter -->
  1520 + <formatter name="html"
  1521 + default="true"
  1522 + class="solr.highlight.HtmlFormatter">
  1523 + <lst name="defaults">
  1524 + <str name="hl.simple.pre"><![CDATA[<b>]]></str>
  1525 + <str name="hl.simple.post"><![CDATA[</b>]]></str>
  1526 + </lst>
  1527 + </formatter>
  1528 +
  1529 + <!-- Configure the standard encoder -->
  1530 + <encoder name="html"
  1531 + class="solr.highlight.HtmlEncoder" />
  1532 +
  1533 + <!-- Configure the standard fragListBuilder -->
  1534 + <fragListBuilder name="simple"
  1535 + class="solr.highlight.SimpleFragListBuilder"/>
  1536 +
  1537 + <!-- Configure the single fragListBuilder -->
  1538 + <fragListBuilder name="single"
  1539 + class="solr.highlight.SingleFragListBuilder"/>
  1540 +
  1541 + <!-- Configure the weighted fragListBuilder -->
  1542 + <fragListBuilder name="weighted"
  1543 + default="true"
  1544 + class="solr.highlight.WeightedFragListBuilder"/>
  1545 +
  1546 + <!-- default tag FragmentsBuilder -->
  1547 + <fragmentsBuilder name="default"
  1548 + default="true"
  1549 + class="solr.highlight.ScoreOrderFragmentsBuilder">
  1550 + <!--
  1551 + <lst name="defaults">
  1552 + <str name="hl.multiValuedSeparatorChar">/</str>
  1553 + </lst>
  1554 + -->
  1555 + </fragmentsBuilder>
  1556 +
  1557 + <!-- multi-colored tag FragmentsBuilder -->
  1558 + <fragmentsBuilder name="colored"
  1559 + class="solr.highlight.ScoreOrderFragmentsBuilder">
  1560 + <lst name="defaults">
  1561 + <str name="hl.tag.pre"><![CDATA[
  1562 + <b style="background:yellow">,<b style="background:lawgreen">,
  1563 + <b style="background:aquamarine">,<b style="background:magenta">,
  1564 + <b style="background:palegreen">,<b style="background:coral">,
  1565 + <b style="background:wheat">,<b style="background:khaki">,
  1566 + <b style="background:lime">,<b style="background:deepskyblue">]]></str>
  1567 + <str name="hl.tag.post"><![CDATA[</b>]]></str>
  1568 + </lst>
  1569 + </fragmentsBuilder>
  1570 +
  1571 + <boundaryScanner name="default"
  1572 + default="true"
  1573 + class="solr.highlight.SimpleBoundaryScanner">
  1574 + <lst name="defaults">
  1575 + <str name="hl.bs.maxScan">10</str>
  1576 + <str name="hl.bs.chars">.,!?
  1577 +
  1578 +</str>
  1579 + </lst>
  1580 + </boundaryScanner>
  1581 +
  1582 + <boundaryScanner name="breakIterator"
  1583 + class="solr.highlight.BreakIteratorBoundaryScanner">
  1584 + <lst name="defaults">
  1585 + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
  1586 + <str name="hl.bs.type">WORD</str>
  1587 + <!-- language and country are used when constructing Locale object. -->
  1588 + <!-- And the Locale object will be used when getting instance of BreakIterator -->
  1589 + <str name="hl.bs.language">pt</str>
  1590 + <str name="hl.bs.country">BR</str>
  1591 + </lst>
  1592 + </boundaryScanner>
  1593 + </highlighting>
  1594 + </searchComponent>
  1595 +
  1596 + <!-- Update Processors
  1597 +
  1598 + Chains of Update Processor Factories for dealing with Update
  1599 + Requests can be declared, and then used by name in Update
  1600 + Request Processors
  1601 +
  1602 + http://wiki.apache.org/solr/UpdateRequestProcessor
  1603 +
  1604 + -->
  1605 + <!-- Deduplication
  1606 +
  1607 + An example dedup update processor that creates the "id" field
  1608 + on the fly based on the hash code of some other fields. This
  1609 + example has overwriteDupes set to false since we are using the
  1610 + id field as the signatureField and Solr will maintain
  1611 + uniqueness based on that anyway.
  1612 +
  1613 + -->
  1614 + <!--
  1615 + <updateRequestProcessorChain name="dedupe">
  1616 + <processor class="solr.processor.SignatureUpdateProcessorFactory">
  1617 + <bool name="enabled">true</bool>
  1618 + <str name="signatureField">id</str>
  1619 + <bool name="overwriteDupes">false</bool>
  1620 + <str name="fields">name,features,cat</str>
  1621 + <str name="signatureClass">solr.processor.Lookup3Signature</str>
  1622 + </processor>
  1623 + <processor class="solr.LogUpdateProcessorFactory" />
  1624 + <processor class="solr.RunUpdateProcessorFactory" />
  1625 + </updateRequestProcessorChain>
  1626 + -->
  1627 +
  1628 + <!-- Language identification
  1629 +
  1630 + This example update chain identifies the language of the incoming
  1631 + documents using the langid contrib. The detected language is
  1632 + written to field language_s. No field name mapping is done.
  1633 + The fields used for detection are text, title, subject and description,
  1634 + making this example suitable for detecting languages form full-text
  1635 + rich documents injected via ExtractingRequestHandler.
  1636 + See more about langId at http://wiki.apache.org/solr/LanguageDetection
  1637 + -->
  1638 + <!--
  1639 + <updateRequestProcessorChain name="langid">
  1640 + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
  1641 + <str name="langid.fl">text,title,subject,description</str>
  1642 + <str name="langid.langField">language_s</str>
  1643 + <str name="langid.fallback">en</str>
  1644 + </processor>
  1645 + <processor class="solr.LogUpdateProcessorFactory" />
  1646 + <processor class="solr.RunUpdateProcessorFactory" />
  1647 + </updateRequestProcessorChain>
  1648 + -->
  1649 +
  1650 + <!-- Script update processor
  1651 +
  1652 + This example hooks in an update processor implemented using JavaScript.
  1653 +
  1654 + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
  1655 + -->
  1656 + <!--
  1657 + <updateRequestProcessorChain name="script">
  1658 + <processor class="solr.StatelessScriptUpdateProcessorFactory">
  1659 + <str name="script">update-script.js</str>
  1660 + <lst name="params">
  1661 + <str name="config_param">example config parameter</str>
  1662 + </lst>
  1663 + </processor>
  1664 + <processor class="solr.RunUpdateProcessorFactory" />
  1665 + </updateRequestProcessorChain>
  1666 + -->
  1667 +
  1668 + <!-- Response Writers
  1669 +
  1670 + http://wiki.apache.org/solr/QueryResponseWriter
  1671 +
  1672 + Request responses will be written using the writer specified by
  1673 + the 'wt' request parameter matching the name of a registered
  1674 + writer.
  1675 +
  1676 + The "default" writer is the default and will be used if 'wt' is
  1677 + not specified in the request.
  1678 + -->
  1679 + <!-- The following response writers are implicitly configured unless
  1680 + overridden...
  1681 + -->
  1682 + <!--
  1683 + <queryResponseWriter name="xml"
  1684 + default="true"
  1685 + class="solr.XMLResponseWriter" />
  1686 + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
  1687 + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
  1688 + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
  1689 + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
  1690 + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
  1691 + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
  1692 + -->
  1693 +
  1694 + <queryResponseWriter name="json" class="solr.JSONResponseWriter">
  1695 + <!-- For the purposes of the tutorial, JSON responses are written as
  1696 + plain text so that they are easy to read in *any* browser.
  1697 + If you expect a MIME type of "application/json" just remove this override.
  1698 + -->
  1699 + <str name="content-type">text/plain; charset=UTF-8</str>
  1700 + </queryResponseWriter>
  1701 +
  1702 + <!--
  1703 + Custom response writers can be declared as needed...
  1704 + -->
  1705 + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
  1706 +
  1707 +
  1708 + <!-- XSLT response writer transforms the XML output by any xslt file found
  1709 + in Solr's conf/xslt directory. Changes to xslt files are checked for
  1710 + every xsltCacheLifetimeSeconds.
  1711 + -->
  1712 + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
  1713 + <int name="xsltCacheLifetimeSeconds">5</int>
  1714 + </queryResponseWriter>
  1715 +
  1716 + <!-- Query Parsers
  1717 +
  1718 + http://wiki.apache.org/solr/SolrQuerySyntax
  1719 +
  1720 + Multiple QParserPlugins can be registered by name, and then
  1721 + used in either the "defType" param for the QueryComponent (used
  1722 + by SearchHandler) or in LocalParams
  1723 + -->
  1724 + <!-- example of registering a query parser -->
  1725 + <!--
  1726 + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
  1727 + -->
  1728 +
  1729 + <!-- Function Parsers
  1730 +
  1731 + http://wiki.apache.org/solr/FunctionQuery
  1732 +
  1733 + Multiple ValueSourceParsers can be registered by name, and then
  1734 + used as function names when using the "func" QParser.
  1735 + -->
  1736 + <!-- example of registering a custom function parser -->
  1737 + <!--
  1738 + <valueSourceParser name="myfunc"
  1739 + class="com.mycompany.MyValueSourceParser" />
  1740 + -->
  1741 +
  1742 +
  1743 + <!-- Document Transformers
  1744 + http://wiki.apache.org/solr/DocTransformers
  1745 + -->
  1746 + <!--
  1747 + Could be something like:
  1748 + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
  1749 + <int name="connection">jdbc://....</int>
  1750 + </transformer>
  1751 +
  1752 + To add a constant value to all docs, use:
  1753 + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
  1754 + <int name="value">5</int>
  1755 + </transformer>
  1756 +
  1757 + If you want the user to still be able to change it with _value:something_ use this:
  1758 + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
  1759 + <double name="defaultValue">5</double>
  1760 + </transformer>
  1761 +
  1762 + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
  1763 + EditorialMarkerFactory will do exactly that:
  1764 + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
  1765 + -->
  1766 +
  1767 +
  1768 + <!-- Legacy config for the admin interface -->
  1769 + <admin>
  1770 + <defaultQuery>*:*</defaultQuery>
  1771 + </admin>
  1772 +
  1773 +</config>
0 1774 \ No newline at end of file
... ...
index/sei-publicacoes-schema.xml 0 → 100755
  1 +++ a/index/sei-publicacoes-schema.xml
... ... @@ -0,0 +1,1177 @@
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<!--
  3 + Licensed to the Apache Software Foundation (ASF) under one or more
  4 + contributor license agreements. See the NOTICE file distributed with
  5 + this work for additional information regarding copyright ownership.
  6 + The ASF licenses this file to You under the Apache License, Version 2.0
  7 + (the "License"); you may not use this file except in compliance with
  8 + the License. You may obtain a copy of the License at
  9 +
  10 + http://www.apache.org/licenses/LICENSE-2.0
  11 +
  12 + Unless required by applicable law or agreed to in writing, software
  13 + distributed under the License is distributed on an "AS IS" BASIS,
  14 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 + See the License for the specific language governing permissions and
  16 + limitations under the License.
  17 +-->
  18 +
  19 +<!--
  20 + This is the Solr schema file. This file should be named "schema.xml" and
  21 + should be in the conf directory under the solr home
  22 + (i.e. ./solr/conf/schema.xml by default)
  23 + or located where the classloader for the Solr webapp can find it.
  24 +
  25 + This example schema is the recommended starting point for users.
  26 + It should be kept correct and concise, usable out-of-the-box.
  27 +
  28 + For more information, on how to customize this file, please see
  29 + http://wiki.apache.org/solr/SchemaXml
  30 +
  31 + PERFORMANCE NOTE: this schema includes many optional features and should not
  32 + be used for benchmarking. To improve performance one could
  33 + - set stored="false" for all fields possible (esp large fields) when you
  34 + only need to search on the field but don't need to return the original
  35 + value.
  36 + - set indexed="false" if you don't need to search on the field, but only
  37 + return the field as a result of searching on other indexed fields.
  38 + - remove all unneeded copyField statements
  39 + - for best index size and searching performance, set "index" to false
  40 + for all general text fields, use copyField to copy them to the
  41 + catchall "text" field, and use that for searching.
  42 + - For maximum indexing performance, use the StreamingUpdateSolrServer
  43 + java client.
  44 + - Remember to run the JVM in server mode, and use a higher logging level
  45 + that avoids logging every request
  46 +-->
  47 +
  48 +<schema name="sei-publicacoes" version="1.5">
  49 + <!-- attribute "name" is the name of this schema and is only used for display purposes.
  50 + version="x.y" is Solr's version number for the schema syntax and
  51 + semantics. It should not normally be changed by applications.
  52 +
  53 + 1.0: multiValued attribute did not exist, all fields are multiValued
  54 + by nature
  55 + 1.1: multiValued attribute introduced, false by default
  56 + 1.2: omitTermFreqAndPositions attribute introduced, true by default
  57 + except for text fields.
  58 + 1.3: removed optional field compress feature
  59 + 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
  60 + behavior when a single string produces multiple tokens. Defaults
  61 + to off for version >= 1.4
  62 + 1.5: omitNorms defaults to true for primitive field types
  63 + (int, float, boolean, string...)
  64 + -->
  65 +
  66 + <fields>
  67 +
  68 + <field name="id_publicacao" type="string" indexed="true" stored="true" />
  69 + <field name="id_publicacao_legado" type="string" indexed="true" stored="true" />
  70 + <field name="id_documento" type="string" indexed="true" stored="true" />
  71 + <field name="id_protocolo_agrupador" type="string" indexed="true" stored="true" />
  72 + <field name="id_orgao_responsavel" type="string" indexed="true" stored="true" />
  73 + <field name="sigla_orgao_responsavel" type="string" indexed="false" stored="true" />
  74 + <field name="descricao_orgao_responsavel" type="string" indexed="false" stored="true" />
  75 + <field name="id_unidade_responsavel" type="string" indexed="true" stored="true" />
  76 + <field name="sigla_unidade_responsavel" type="string" indexed="false" stored="true" />
  77 + <field name="descricao_unidade_responsavel" type="string" indexed="false" stored="true" />
  78 + <field name="id_serie" type="string" indexed="true" stored="true" />
  79 + <field name="nome_serie" type="string" indexed="false" stored="true" />
  80 + <field name="numero" type="string" indexed="true" stored="true" />
  81 + <field name="protocolo_formatado_pesquisa" type="string" indexed="true" stored="false" />
  82 + <field name="protocolo_documento_formatado" type="string" indexed="false" stored="true" />
  83 + <field name="dta_documento" type="date" indexed="true" stored="true" />
  84 + <field name="dta_publicacao" type="date" indexed="true" stored="true" />
  85 + <field name="numero_publicacao" type="string" indexed="true" stored="true" />
  86 + <field name="id_veiculo_publicacao" type="string" indexed="true" stored="true" />
  87 + <field name="nome_veiculo_publicacao" type="string" indexed="false" stored="true" />
  88 + <field name="resumo" type="text_general" indexed="true" stored="true" />
  89 + <field name="id_veiculo_io" type="string" indexed="true" stored="true" />
  90 + <field name="sigla_veiculo_io" type="string" indexed="false" stored="true" />
  91 + <field name="descricao_veiculo_io" type="string" indexed="false" stored="true" />
  92 + <field name="dta_publicacao_io" type="date" indexed="true" stored="true" />
  93 + <field name="id_secao_io" type="string" indexed="true" stored="true" />
  94 + <field name="nome_secao_io" type="string" indexed="false" stored="true" />
  95 + <field name="pagina_io" type="string" indexed="false" stored="true" />
  96 +
  97 + <!-- Valid attributes for fields:
  98 + name: mandatory - the name for the field
  99 + type: mandatory - the name of a field type from the
  100 + <types> fieldType section
  101 + indexed: true if this field should be indexed (searchable or sortable)
  102 + stored: true if this field should be retrievable
  103 + multiValued: true if this field may contain multiple values per document
  104 + omitNorms: (expert) set to true to omit the norms associated with
  105 + this field (this disables length normalization and index-time
  106 + boosting for the field, and saves some memory). Only full-text
  107 + fields or fields that need an index-time boost need norms.
  108 + Norms are omitted for primitive (non-analyzed) types by default.
  109 + termVectors: [false] set to true to store the term vector for a
  110 + given field.
  111 + When using MoreLikeThis, fields used for similarity should be
  112 + stored for best performance.
  113 + termPositions: Store position information with the term vector.
  114 + This will increase storage costs.
  115 + termOffsets: Store offset information with the term vector. This
  116 + will increase storage costs.
  117 + required: The field is required. It will throw an error if the
  118 + value does not exist
  119 + default: a value that should be used if no value is specified
  120 + when adding a document.
  121 + -->
  122 +
  123 + <!-- field names should consist of alphanumeric or underscore characters only and
  124 + not start with a digit. This is not currently strictly enforced,
  125 + but other field names will not have first class support from all components
  126 + and back compatibility is not guaranteed. Names with both leading and
  127 + trailing underscores (e.g. _version_) are reserved.
  128 + -->
  129 + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
  130 + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
  131 + <field name="name" type="text_general" indexed="true" stored="true"/>
  132 + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
  133 + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
  134 + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
  135 + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
  136 +
  137 + <field name="weight" type="float" indexed="true" stored="true"/>
  138 + <field name="price" type="float" indexed="true" stored="true"/>
  139 + <field name="popularity" type="int" indexed="true" stored="true" />
  140 + <field name="inStock" type="boolean" indexed="true" stored="true" />
  141 +
  142 + <field name="store" type="location" indexed="true" stored="true"/>
  143 + <!-- mairon
  144 + <field name="documento" type="string" indexed="true" stored="true" />
  145 + <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
  146 + <field name="name" type="text_general" indexed="true" stored="true"/>
  147 + <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
  148 + <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
  149 + <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
  150 + <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
  151 +
  152 + <field name="weight" type="float" indexed="true" stored="true"/>
  153 + <field name="price" type="float" indexed="true" stored="true"/>
  154 + <field name="popularity" type="int" indexed="true" stored="true" />
  155 + <field name="inStock" type="boolean" indexed="true" stored="true" />
  156 +
  157 + <field name="store" type="location" indexed="true" stored="true"/>
  158 + -->
  159 +
  160 + <!-- Common metadata fields, named specifically to match up with
  161 + SolrCell metadata when parsing rich documents such as Word, PDF.
  162 + Some fields are multiValued only because Tika currently may return
  163 + multiple values for them. Some metadata is parsed from the documents,
  164 + but there are some which come from the client context:
  165 + "content_type": From the HTTP headers of incoming stream
  166 + "resourcename": From SolrCell request param resource.name
  167 + -->
  168 + <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
  169 + <field name="subject" type="text_general" indexed="true" stored="true"/>
  170 + <field name="description" type="text_general" indexed="true" stored="true"/>
  171 + <field name="comments" type="text_general" indexed="true" stored="true"/>
  172 + <field name="author" type="text_general" indexed="true" stored="true"/>
  173 + <field name="keywords" type="text_general" indexed="true" stored="true"/>
  174 + <field name="category" type="text_general" indexed="true" stored="true"/>
  175 + <field name="resourcename" type="text_general" indexed="true" stored="true"/>
  176 + <field name="url" type="text_general" indexed="true" stored="true"/>
  177 + <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
  178 + <field name="last_modified" type="date" indexed="true" stored="true"/>
  179 + <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
  180 +
  181 + <!-- Main body of document extracted by SolrCell.
  182 + NOTE: This field is not indexed by default, since it is also copied to "text"
  183 + using copyField below. This is to save space. Use this field for returning and
  184 + highlighting document content. Use the "text" field to search the content. -->
  185 + <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
  186 +
  187 +
  188 + <!-- catchall field, containing all other searchable text fields (implemented
  189 + via copyField further on in this schema -->
  190 + <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
  191 +
  192 + <!-- catchall text field that indexes tokens both normally and in reverse for efficient
  193 + leading wildcard queries. -->
  194 + <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
  195 +
  196 + <!-- non-tokenized version of manufacturer to make it easier to sort or group
  197 + results by manufacturer. copied from "manu" via copyField -->
  198 + <field name="manu_exact" type="string" indexed="true" stored="false"/>
  199 +
  200 + <field name="payloads" type="payloads" indexed="true" stored="true"/>
  201 +
  202 + <field name="_version_" type="long" indexed="true" stored="true"/>
  203 +
  204 + <!-- Uncommenting the following will create a "timestamp" field using
  205 + a default value of "NOW" to indicate when each document was indexed.
  206 + -->
  207 + <!--
  208 + <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
  209 + -->
  210 +
  211 + <!-- Dynamic field definitions allow using convention over configuration
  212 + for fields via the specification of patterns to match field names.
  213 + EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
  214 + RESTRICTION: the glob-like pattern in the name attribute must have
  215 + a "*" only at the start or the end. -->
  216 +
  217 + <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
  218 + <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
  219 + <dynamicField name="*_s" type="string" indexed="true" stored="true" />
  220 + <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
  221 + <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
  222 + <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
  223 + <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
  224 + <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
  225 + <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
  226 + <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
  227 + <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
  228 + <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
  229 + <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
  230 + <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
  231 + <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
  232 +
  233 + <!-- Type used to index the lat and lon components for the "location" FieldType -->
  234 + <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
  235 +
  236 + <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
  237 + <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
  238 + <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
  239 +
  240 + <!-- some trie-coded dynamic fields for faster range queries -->
  241 + <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
  242 + <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
  243 + <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
  244 + <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
  245 + <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
  246 +
  247 + <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
  248 + <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
  249 +
  250 + <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
  251 + <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
  252 +
  253 + <dynamicField name="random_*" type="random" />
  254 +
  255 + <!-- uncomment the following to ignore any fields that don't already match an existing
  256 + field name or dynamic field, rather than reporting them as an error.
  257 + alternately, change the type="ignored" to some other type e.g. "text" if you want
  258 + unknown fields indexed and/or stored by default -->
  259 + <!--dynamicField name="*" type="ignored" multiValued="true" /-->
  260 +
  261 + </fields>
  262 +
  263 +
  264 + <!-- Field to use to determine and enforce document uniqueness.
  265 + Unless this field is marked with required="false", it will be a required field
  266 + -->
  267 + <uniqueKey>id</uniqueKey>
  268 +
  269 + <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
  270 + parsing a query string that isn't explicit about the field. Machine (non-user)
  271 + generated queries are best made explicit, or they can use the "df" request parameter
  272 + which takes precedence over this.
  273 + Note: Un-commenting defaultSearchField will be insufficient if your request handler
  274 + in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
  275 + <defaultSearchField>text</defaultSearchField> -->
  276 +
  277 + <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
  278 + when parsing a query string to determine if a clause of the query should be marked as
  279 + required or optional, assuming the clause isn't already marked by some operator.
  280 + The default is OR, which is generally assumed so it is not a good idea to change it
  281 + globally here. The "q.op" request parameter takes precedence over this.
  282 + <solrQueryParser defaultOperator="OR"/> -->
  283 +
  284 + <!-- copyField commands copy one field to another at the time a document
  285 + is added to the index. It's used either to index the same field differently,
  286 + or to add multiple fields to the same field for easier/faster searching. -->
  287 +
  288 + <!-- mairon
  289 + <copyField source="cat" dest="text"/>
  290 + <copyField source="name" dest="text"/>
  291 + <copyField source="manu" dest="text"/>
  292 + <copyField source="features" dest="text"/>
  293 + <copyField source="includes" dest="text"/>
  294 + <copyField source="manu" dest="manu_exact"/>
  295 + <copyField source="price" dest="price_c"/>
  296 + -->
  297 +
  298 + <!-- Text fields from SolrCell to search by default in our catch-all field -->
  299 + <copyField source="title" dest="text"/>
  300 + <copyField source="author" dest="text"/>
  301 + <copyField source="description" dest="text"/>
  302 + <copyField source="keywords" dest="text"/>
  303 + <copyField source="content" dest="text"/>
  304 + <copyField source="content_type" dest="text"/>
  305 + <copyField source="resourcename" dest="text"/>
  306 + <copyField source="url" dest="text"/>
  307 +
  308 + <!-- Create a string version of author for faceting -->
  309 + <copyField source="author" dest="author_s"/>
  310 +
  311 + <!-- Above, multiple source fields are copied to the [text] field.
  312 + Another way to map multiple source fields to the same
  313 + destination field is to use the dynamic field syntax.
  314 + copyField also supports a maxChars to copy setting. -->
  315 +
  316 + <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
  317 +
  318 + <!-- copy name to alphaNameSort, a field designed for sorting by name -->
  319 + <!-- <copyField source="name" dest="alphaNameSort"/> -->
  320 +
  321 + <types>
  322 + <!-- field type definitions. The "name" attribute is
  323 + just a label to be used by field definitions. The "class"
  324 + attribute and any other attributes determine the real
  325 + behavior of the fieldType.
  326 + Class names starting with "solr" refer to java classes in a
  327 + standard package such as org.apache.solr.analysis
  328 + -->
  329 +
  330 + <fieldType name="date" class="solr.DateField" omitNorms="true"/>
  331 +
  332 + <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
  333 + <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
  334 +
  335 + <!-- boolean type: "true" or "false" -->
  336 + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  337 +
  338 + <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
  339 + currently supported on types that are sorted internally as strings
  340 + and on numeric types.
  341 + This includes "string","boolean", and, as of 3.5 (and 4.x),
  342 + int, float, long, date, double, including the "Trie" variants.
  343 + - If sortMissingLast="true", then a sort on this field will cause documents
  344 + without the field to come after documents with the field,
  345 + regardless of the requested sort order (asc or desc).
  346 + - If sortMissingFirst="true", then a sort on this field will cause documents
  347 + without the field to come before documents with the field,
  348 + regardless of the requested sort order.
  349 + - If sortMissingLast="false" and sortMissingFirst="false" (the default),
  350 + then default lucene sorting will be used which places docs without the
  351 + field first in an ascending sort and last in a descending sort.
  352 + -->
  353 +
  354 + <!--
  355 + Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
  356 + -->
  357 + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
  358 + <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
  359 + <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
  360 + <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
  361 +
  362 + <!--
  363 + Numeric field types that index each value at various levels of precision
  364 + to accelerate range queries when the number of values between the range
  365 + endpoints is large. See the javadoc for NumericRangeQuery for internal
  366 + implementation details.
  367 +
  368 + Smaller precisionStep values (specified in bits) will lead to more tokens
  369 + indexed per value, slightly larger index size, and faster range queries.
  370 + A precisionStep of 0 disables indexing at different precision levels.
  371 + -->
  372 + <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
  373 + <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
  374 + <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
  375 + <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
  376 +
  377 + <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
  378 + is a more restricted form of the canonical representation of dateTime
  379 + http://www.w3.org/TR/xmlschema-2/#dateTime
  380 + The trailing "Z" designates UTC time and is mandatory.
  381 + Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
  382 + All other components are mandatory.
  383 +
  384 + Expressions can also be used to denote calculations that should be
  385 + performed relative to "NOW" to determine the value, ie...
  386 +
  387 + NOW/HOUR
  388 + ... Round to the start of the current hour
  389 + NOW-1DAY
  390 + ... Exactly 1 day prior to now
  391 + NOW/DAY+6MONTHS+3DAYS
  392 + ... 6 months and 3 days in the future from the start of
  393 + the current day
  394 +
  395 + Consult the DateField javadocs for more information.
  396 +
  397 + Note: For faster range queries, consider the tdate type
  398 + -->
  399 +<!-- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
  400 +
  401 + A Trie based date field for faster date range queries and date faceting. -->
  402 + <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
  403 +
  404 +
  405 + <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
  406 + <fieldtype name="binary" class="solr.BinaryField"/>
  407 +
  408 + <!--
  409 + Note:
  410 + These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
  411 + Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
  412 +
  413 + Plain numeric field types that store and index the text
  414 + value verbatim (and hence don't correctly support range queries, since the
  415 + lexicographic ordering isn't equal to the numeric ordering)
  416 + -->
  417 + <fieldType name="pint" class="solr.IntField"/>
  418 + <fieldType name="plong" class="solr.LongField"/>
  419 + <fieldType name="pfloat" class="solr.FloatField"/>
  420 + <fieldType name="pdouble" class="solr.DoubleField"/>
  421 + <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
  422 +
  423 + <!-- The "RandomSortField" is not used to store or search any
  424 + data. You can declare fields of this type it in your schema
  425 + to generate pseudo-random orderings of your docs for sorting
  426 + or function purposes. The ordering is generated based on the field
  427 + name and the version of the index. As long as the index version
  428 + remains unchanged, and the same field name is reused,
  429 + the ordering of the docs will be consistent.
  430 + If you want different psuedo-random orderings of documents,
  431 + for the same version of the index, use a dynamicField and
  432 + change the field name in the request.
  433 + -->
  434 + <fieldType name="random" class="solr.RandomSortField" indexed="true" />
  435 +
  436 + <!-- solr.TextField allows the specification of custom text analyzers
  437 + specified as a tokenizer and a list of token filters. Different
  438 + analyzers may be specified for indexing and querying.
  439 +
  440 + The optional positionIncrementGap puts space between multiple fields of
  441 + this type on the same document, with the purpose of preventing false phrase
  442 + matching across fields.
  443 +
  444 + For more info on customizing your analyzer chain, please see
  445 + http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
  446 + -->
  447 +
  448 + <!-- One can also specify an existing Analyzer class that has a
  449 + default constructor via the class attribute on the analyzer element.
  450 + Example:
  451 + <fieldType name="text_greek" class="solr.TextField">
  452 + <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
  453 + </fieldType>
  454 + -->
  455 +
  456 + <!-- A text field that only splits on whitespace for exact matching of words -->
  457 + <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  458 + <analyzer>
  459 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  460 + </analyzer>
  461 + </fieldType>
  462 +
  463 + <!-- A general text field that has reasonable, generic
  464 + cross-language defaults: it tokenizes with StandardTokenizer,
  465 + removes stop words from case-insensitive "stopwords.txt"
  466 + (empty by default), and down cases. At query time only, it
  467 + also applies synonyms. -->
  468 + <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
  469 + <analyzer type="index">
  470 +
  471 + <tokenizer class="solr.StandardTokenizerFactory"/>
  472 +
  473 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" />
  474 + <!-- in this example, we will only use synonyms at query time
  475 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  476 + -->
  477 + <filter class="solr.LowerCaseFilterFactory"/>
  478 +
  479 + <!-- mairon -->
  480 + <filter class="solr.ASCIIFoldingFilterFactory"/>
  481 + <!-- mairon -->
  482 +
  483 +
  484 + </analyzer>
  485 +
  486 + <analyzer type="query">
  487 + <tokenizer class="solr.StandardTokenizerFactory"/>
  488 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" enablePositionIncrements="true" />
  489 + <filter class="solr.LowerCaseFilterFactory"/>
  490 +
  491 + <!-- mairon -->
  492 + <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -->
  493 + <!-- <filter class="solr.ASCIIFoldingFilterFactory"/> -->
  494 + <!-- <filter class="solr.BrazilianStemFilterFactory"/> -->
  495 + <!-- mairon -->
  496 +
  497 + </analyzer>
  498 +
  499 + </fieldType>
  500 +
  501 + <!-- A text field with defaults appropriate for English: it
  502 + tokenizes with StandardTokenizer, removes English stop words
  503 + (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
  504 + finally applies Porter's stemming. The query time analyzer
  505 + also applies synonyms from synonyms.txt. -->
  506 + <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  507 + <analyzer type="index">
  508 + <tokenizer class="solr.StandardTokenizerFactory"/>
  509 + <!-- in this example, we will only use synonyms at query time
  510 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  511 + -->
  512 + <!-- Case insensitive stop word removal.
  513 + add enablePositionIncrements=true in both the index and query
  514 + analyzers to leave a 'gap' for more accurate phrase queries.
  515 + -->
  516 + <filter class="solr.StopFilterFactory"
  517 + ignoreCase="true"
  518 + words="lang/stopwords_en.txt"
  519 + enablePositionIncrements="true"
  520 + />
  521 + <filter class="solr.LowerCaseFilterFactory"/>
  522 + <filter class="solr.EnglishPossessiveFilterFactory"/>
  523 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  524 + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  525 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  526 + -->
  527 + <filter class="solr.PorterStemFilterFactory"/>
  528 + </analyzer>
  529 + <analyzer type="query">
  530 + <tokenizer class="solr.StandardTokenizerFactory"/>
  531 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  532 + <filter class="solr.StopFilterFactory"
  533 + ignoreCase="true"
  534 + words="lang/stopwords_en.txt"
  535 + enablePositionIncrements="true"
  536 + />
  537 + <filter class="solr.LowerCaseFilterFactory"/>
  538 + <filter class="solr.EnglishPossessiveFilterFactory"/>
  539 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  540 + <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  541 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  542 + -->
  543 + <filter class="solr.PorterStemFilterFactory"/>
  544 + </analyzer>
  545 + </fieldType>
  546 +
  547 + <!-- A text field with defaults appropriate for English, plus
  548 + aggressive word-splitting and autophrase features enabled.
  549 + This field is just like text_en, except it adds
  550 + WordDelimiterFilter to enable splitting and matching of
  551 + words on case-change, alpha numeric boundaries, and
  552 + non-alphanumeric chars. This means certain compound word
  553 + cases will work, for example query "wi fi" will match
  554 + document "WiFi" or "wi-fi".
  555 + -->
  556 + <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  557 + <analyzer type="index">
  558 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  559 + <!-- in this example, we will only use synonyms at query time
  560 + <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  561 + -->
  562 + <!-- Case insensitive stop word removal.
  563 + add enablePositionIncrements=true in both the index and query
  564 + analyzers to leave a 'gap' for more accurate phrase queries.
  565 + -->
  566 + <filter class="solr.StopFilterFactory"
  567 + ignoreCase="true"
  568 + words="lang/stopwords_en.txt"
  569 + enablePositionIncrements="true"
  570 + />
  571 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
  572 + <filter class="solr.LowerCaseFilterFactory"/>
  573 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  574 + <filter class="solr.PorterStemFilterFactory"/>
  575 +
  576 +
  577 + </analyzer>
  578 + <analyzer type="query">
  579 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  580 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  581 + <filter class="solr.StopFilterFactory"
  582 + ignoreCase="true"
  583 + words="lang/stopwords_en.txt"
  584 + enablePositionIncrements="true"
  585 + />
  586 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
  587 + <filter class="solr.LowerCaseFilterFactory"/>
  588 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  589 + <filter class="solr.PorterStemFilterFactory"/>
  590 + </analyzer>
  591 + </fieldType>
  592 +
  593 + <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
  594 + but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
  595 + <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  596 + <analyzer>
  597 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  598 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
  599 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
  600 + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
  601 + <filter class="solr.LowerCaseFilterFactory"/>
  602 + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  603 + <filter class="solr.EnglishMinimalStemFilterFactory"/>
  604 + <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
  605 + possible with WordDelimiterFilter in conjuncton with stemming. -->
  606 + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  607 + </analyzer>
  608 + </fieldType>
  609 +
  610 + <!-- Just like text_general except it reverses the characters of
  611 + each token, to enable more efficient leading wildcard queries. -->
  612 + <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
  613 + <analyzer type="index">
  614 + <tokenizer class="solr.StandardTokenizerFactory"/>
  615 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  616 + <filter class="solr.LowerCaseFilterFactory"/>
  617 + <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
  618 + maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
  619 + </analyzer>
  620 + <analyzer type="query">
  621 + <tokenizer class="solr.StandardTokenizerFactory"/>
  622 + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  623 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
  624 + <filter class="solr.LowerCaseFilterFactory"/>
  625 + </analyzer>
  626 + </fieldType>
  627 +
  628 + <!-- charFilter + WhitespaceTokenizer -->
  629 + <!--
  630 + <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
  631 + <analyzer>
  632 + <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
  633 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  634 + </analyzer>
  635 + </fieldType>
  636 + -->
  637 +
  638 + <!-- This is an example of using the KeywordTokenizer along
  639 + With various TokenFilterFactories to produce a sortable field
  640 + that does not include some properties of the source text
  641 + -->
  642 + <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
  643 + <analyzer>
  644 + <!-- KeywordTokenizer does no actual tokenizing, so the entire
  645 + input string is preserved as a single token
  646 + -->
  647 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  648 + <!-- The LowerCase TokenFilter does what you expect, which can be
  649 + when you want your sorting to be case insensitive
  650 + -->
  651 + <filter class="solr.LowerCaseFilterFactory" />
  652 + <!-- The TrimFilter removes any leading or trailing whitespace -->
  653 + <filter class="solr.TrimFilterFactory" />
  654 + <!-- The PatternReplaceFilter gives you the flexibility to use
  655 + Java Regular expression to replace any sequence of characters
  656 + matching a pattern with an arbitrary replacement string,
  657 + which may include back references to portions of the original
  658 + string matched by the pattern.
  659 +
  660 + See the Java Regular Expression documentation for more
  661 + information on pattern and replacement string syntax.
  662 +
  663 + http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
  664 + -->
  665 + <filter class="solr.PatternReplaceFilterFactory"
  666 + pattern="([^a-z])" replacement="" replace="all"
  667 + />
  668 + </analyzer>
  669 + </fieldType>
  670 +
  671 + <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
  672 + <analyzer>
  673 + <tokenizer class="solr.StandardTokenizerFactory"/>
  674 + <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
  675 + </analyzer>
  676 + </fieldtype>
  677 +
  678 + <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
  679 + <analyzer>
  680 + <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  681 + <!--
  682 + The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
  683 + a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
  684 + Attributes of the DelimitedPayloadTokenFilterFactory :
  685 + "delimiter" - a one character delimiter. Default is | (pipe)
  686 + "encoder" - how to encode the following value into a playload
  687 + float -> org.apache.lucene.analysis.payloads.FloatEncoder,
  688 + integer -> o.a.l.a.p.IntegerEncoder
  689 + identity -> o.a.l.a.p.IdentityEncoder
  690 + Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
  691 + -->
  692 + <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
  693 + </analyzer>
  694 + </fieldtype>
  695 +
  696 + <!-- lowercases the entire field value, keeping it as a single token. -->
  697 + <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
  698 + <analyzer>
  699 + <tokenizer class="solr.KeywordTokenizerFactory"/>
  700 + <filter class="solr.LowerCaseFilterFactory" />
  701 + </analyzer>
  702 + </fieldType>
  703 +
  704 + <!--
  705 + Example of using PathHierarchyTokenizerFactory at index time, so
  706 + queries for paths match documents at that path, or in descendent paths
  707 + -->
  708 + <fieldType name="descendent_path" class="solr.TextField">
  709 + <analyzer type="index">
  710 + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  711 + </analyzer>
  712 + <analyzer type="query">
  713 + <tokenizer class="solr.KeywordTokenizerFactory" />
  714 + </analyzer>
  715 + </fieldType>
  716 + <!--
  717 + Example of using PathHierarchyTokenizerFactory at query time, so
  718 + queries for paths match documents at that path, or in ancestor paths
  719 + -->
  720 + <fieldType name="ancestor_path" class="solr.TextField">
  721 + <analyzer type="index">
  722 + <tokenizer class="solr.KeywordTokenizerFactory" />
  723 + </analyzer>
  724 + <analyzer type="query">
  725 + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  726 + </analyzer>
  727 + </fieldType>
  728 +
  729 + <!-- since fields of this type are by default not stored or indexed,
  730 + any data added to them will be ignored outright. -->
  731 + <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
  732 +
  733 + <!-- This point type indexes the coordinates as separate fields (subFields)
  734 + If subFieldType is defined, it references a type, and a dynamic field
  735 + definition is created matching *___<typename>. Alternately, if
  736 + subFieldSuffix is defined, that is used to create the subFields.
  737 + Example: if subFieldType="double", then the coordinates would be
  738 + indexed in fields myloc_0___double,myloc_1___double.
  739 + Example: if subFieldSuffix="_d" then the coordinates would be indexed
  740 + in fields myloc_0_d,myloc_1_d
  741 + The subFields are an implementation detail of the fieldType, and end
  742 + users normally should not need to know about them.
  743 + -->
  744 + <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
  745 +
  746 + <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
  747 + <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
  748 +
  749 + <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
  750 + For more information about this and other Spatial fields new to Solr 4, see:
  751 + http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
  752 + -->
  753 + <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
  754 + geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
  755 +
  756 + <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
  757 + Parameters:
  758 + defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
  759 + precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
  760 + providerClass: Lets you plug in other exchange provider backend:
  761 + solr.FileExchangeRateProvider is the default and takes one parameter:
  762 + currencyConfig: name of an xml file holding exchange rates
  763 + solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
  764 + ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
  765 + refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
  766 + -->
  767 + <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
  768 +
  769 +
  770 +
  771 + <!-- some examples for different languages (generally ordered by ISO code) -->
  772 +
  773 + <!-- Arabic -->
  774 + <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
  775 + <analyzer>
  776 + <tokenizer class="solr.StandardTokenizerFactory"/>
  777 + <!-- for any non-arabic -->
  778 + <filter class="solr.LowerCaseFilterFactory"/>
  779 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
  780 + <!-- normalizes ﻯ to ﻱ, etc -->
  781 + <filter class="solr.ArabicNormalizationFilterFactory"/>
  782 + <filter class="solr.ArabicStemFilterFactory"/>
  783 + </analyzer>
  784 + </fieldType>
  785 +
  786 + <!-- Bulgarian -->
  787 + <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
  788 + <analyzer>
  789 + <tokenizer class="solr.StandardTokenizerFactory"/>
  790 + <filter class="solr.LowerCaseFilterFactory"/>
  791 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
  792 + <filter class="solr.BulgarianStemFilterFactory"/>
  793 + </analyzer>
  794 + </fieldType>
  795 +
  796 + <!-- Catalan -->
  797 + <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
  798 + <analyzer>
  799 + <tokenizer class="solr.StandardTokenizerFactory"/>
  800 + <!-- removes l', etc -->
  801 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
  802 + <filter class="solr.LowerCaseFilterFactory"/>
  803 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
  804 + <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
  805 + </analyzer>
  806 + </fieldType>
  807 +
  808 + <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
  809 + <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
  810 + <analyzer>
  811 + <tokenizer class="solr.StandardTokenizerFactory"/>
  812 + <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
  813 + <filter class="solr.CJKWidthFilterFactory"/>
  814 + <!-- for any non-CJK -->
  815 + <filter class="solr.LowerCaseFilterFactory"/>
  816 + <filter class="solr.CJKBigramFilterFactory"/>
  817 + </analyzer>
  818 + </fieldType>
  819 +
  820 + <!-- Czech -->
  821 + <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
  822 + <analyzer>
  823 + <tokenizer class="solr.StandardTokenizerFactory"/>
  824 + <filter class="solr.LowerCaseFilterFactory"/>
  825 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
  826 + <filter class="solr.CzechStemFilterFactory"/>
  827 + </analyzer>
  828 + </fieldType>
  829 +
  830 + <!-- Danish -->
  831 + <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
  832 + <analyzer>
  833 + <tokenizer class="solr.StandardTokenizerFactory"/>
  834 + <filter class="solr.LowerCaseFilterFactory"/>
  835 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
  836 + <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
  837 + </analyzer>
  838 + </fieldType>
  839 +
  840 + <!-- German -->
  841 + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
  842 + <analyzer>
  843 + <tokenizer class="solr.StandardTokenizerFactory"/>
  844 + <filter class="solr.LowerCaseFilterFactory"/>
  845 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
  846 + <filter class="solr.GermanNormalizationFilterFactory"/>
  847 + <filter class="solr.GermanLightStemFilterFactory"/>
  848 + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
  849 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
  850 + </analyzer>
  851 + </fieldType>
  852 +
  853 + <!-- Greek -->
  854 + <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
  855 + <analyzer>
  856 + <tokenizer class="solr.StandardTokenizerFactory"/>
  857 + <!-- greek specific lowercase for sigma -->
  858 + <filter class="solr.GreekLowerCaseFilterFactory"/>
  859 + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
  860 + <filter class="solr.GreekStemFilterFactory"/>
  861 + </analyzer>
  862 + </fieldType>
  863 +
  864 + <!-- Spanish -->
  865 + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
  866 + <analyzer>
  867 + <tokenizer class="solr.StandardTokenizerFactory"/>
  868 + <filter class="solr.LowerCaseFilterFactory"/>
  869 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
  870 + <filter class="solr.SpanishLightStemFilterFactory"/>
  871 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
  872 + </analyzer>
  873 + </fieldType>
  874 +
  875 + <!-- Basque -->
  876 + <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
  877 + <analyzer>
  878 + <tokenizer class="solr.StandardTokenizerFactory"/>
  879 + <filter class="solr.LowerCaseFilterFactory"/>
  880 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
  881 + <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
  882 + </analyzer>
  883 + </fieldType>
  884 +
  885 + <!-- Persian -->
  886 + <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
  887 + <analyzer>
  888 + <!-- for ZWNJ -->
  889 + <charFilter class="solr.PersianCharFilterFactory"/>
  890 + <tokenizer class="solr.StandardTokenizerFactory"/>
  891 + <filter class="solr.LowerCaseFilterFactory"/>
  892 + <filter class="solr.ArabicNormalizationFilterFactory"/>
  893 + <filter class="solr.PersianNormalizationFilterFactory"/>
  894 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
  895 + </analyzer>
  896 + </fieldType>
  897 +
  898 + <!-- Finnish -->
  899 + <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
  900 + <analyzer>
  901 + <tokenizer class="solr.StandardTokenizerFactory"/>
  902 + <filter class="solr.LowerCaseFilterFactory"/>
  903 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
  904 + <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
  905 + <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
  906 + </analyzer>
  907 + </fieldType>
  908 +
  909 + <!-- French -->
  910 + <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  911 + <analyzer>
  912 + <tokenizer class="solr.StandardTokenizerFactory"/>
  913 + <!-- removes l', etc -->
  914 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
  915 + <filter class="solr.LowerCaseFilterFactory"/>
  916 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
  917 + <filter class="solr.FrenchLightStemFilterFactory"/>
  918 + <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
  919 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
  920 + </analyzer>
  921 + </fieldType>
  922 +
  923 + <!-- Irish -->
  924 + <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
  925 + <analyzer>
  926 + <tokenizer class="solr.StandardTokenizerFactory"/>
  927 + <!-- removes d', etc -->
  928 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
  929 + <!-- removes n-, etc. position increments is intentionally false! -->
  930 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
  931 + <filter class="solr.IrishLowerCaseFilterFactory"/>
  932 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
  933 + <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
  934 + </analyzer>
  935 + </fieldType>
  936 +
  937 + <!-- Galician -->
  938 + <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
  939 + <analyzer>
  940 + <tokenizer class="solr.StandardTokenizerFactory"/>
  941 + <filter class="solr.LowerCaseFilterFactory"/>
  942 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
  943 + <filter class="solr.GalicianStemFilterFactory"/>
  944 + <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
  945 + </analyzer>
  946 + </fieldType>
  947 +
  948 + <!-- Hindi -->
  949 + <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
  950 + <analyzer>
  951 + <tokenizer class="solr.StandardTokenizerFactory"/>
  952 + <filter class="solr.LowerCaseFilterFactory"/>
  953 + <!-- normalizes unicode representation -->
  954 + <filter class="solr.IndicNormalizationFilterFactory"/>
  955 + <!-- normalizes variation in spelling -->
  956 + <filter class="solr.HindiNormalizationFilterFactory"/>
  957 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
  958 + <filter class="solr.HindiStemFilterFactory"/>
  959 + </analyzer>
  960 + </fieldType>
  961 +
  962 + <!-- Hungarian -->
  963 + <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
  964 + <analyzer>
  965 + <tokenizer class="solr.StandardTokenizerFactory"/>
  966 + <filter class="solr.LowerCaseFilterFactory"/>
  967 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
  968 + <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
  969 + <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
  970 + </analyzer>
  971 + </fieldType>
  972 +
  973 + <!-- Armenian -->
  974 + <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
  975 + <analyzer>
  976 + <tokenizer class="solr.StandardTokenizerFactory"/>
  977 + <filter class="solr.LowerCaseFilterFactory"/>
  978 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
  979 + <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
  980 + </analyzer>
  981 + </fieldType>
  982 +
  983 + <!-- Indonesian -->
  984 + <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
  985 + <analyzer>
  986 + <tokenizer class="solr.StandardTokenizerFactory"/>
  987 + <filter class="solr.LowerCaseFilterFactory"/>
  988 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
  989 + <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
  990 + <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
  991 + </analyzer>
  992 + </fieldType>
  993 +
  994 + <!-- Italian -->
  995 + <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
  996 + <analyzer>
  997 + <tokenizer class="solr.StandardTokenizerFactory"/>
  998 + <!-- removes l', etc -->
  999 + <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
  1000 + <filter class="solr.LowerCaseFilterFactory"/>
  1001 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
  1002 + <filter class="solr.ItalianLightStemFilterFactory"/>
  1003 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
  1004 + </analyzer>
  1005 + </fieldType>
  1006 +
  1007 + <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
  1008 +
  1009 + NOTE: If you want to optimize search for precision, use default operator AND in your query
  1010 + parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
  1011 + OR if you would like to optimize for recall (default).
  1012 + -->
  1013 + <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
  1014 + <analyzer>
  1015 + <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
  1016 +
  1017 + Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
  1018 + is used to segment compounds into its parts and the compound itself is kept as synonym.
  1019 +
  1020 + Valid values for attribute mode are:
  1021 + normal: regular segmentation
  1022 + search: segmentation useful for search with synonyms compounds (default)
  1023 + extended: same as search mode, but unigrams unknown words (experimental)
  1024 +
  1025 + For some applications it might be good to use search mode for indexing and normal mode for
  1026 + queries to reduce recall and prevent parts of compounds from being matched and highlighted.
  1027 + Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
  1028 +
  1029 + Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
  1030 + model with your own entries for segmentation, part-of-speech tags and readings without a need
  1031 + to specify weights. Notice that user dictionaries have not been subject to extensive testing.
  1032 +
  1033 + User dictionary attributes are:
  1034 + userDictionary: user dictionary filename
  1035 + userDictionaryEncoding: user dictionary encoding (default is UTF-8)
  1036 +
  1037 + See lang/userdict_ja.txt for a sample user dictionary file.
  1038 +
  1039 + Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
  1040 +
  1041 + See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
  1042 + -->
  1043 + <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
  1044 + <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
  1045 + <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
  1046 + <filter class="solr.JapaneseBaseFormFilterFactory"/>
  1047 + <!-- Removes tokens with certain part-of-speech tags -->
  1048 + <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
  1049 + <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
  1050 + <filter class="solr.CJKWidthFilterFactory"/>
  1051 + <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
  1052 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
  1053 + <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
  1054 + <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
  1055 + <!-- Lower-cases romaji characters -->
  1056 + <filter class="solr.LowerCaseFilterFactory"/>
  1057 + </analyzer>
  1058 + </fieldType>
  1059 +
  1060 + <!-- Latvian -->
  1061 + <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
  1062 + <analyzer>
  1063 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1064 + <filter class="solr.LowerCaseFilterFactory"/>
  1065 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
  1066 + <filter class="solr.LatvianStemFilterFactory"/>
  1067 + </analyzer>
  1068 + </fieldType>
  1069 +
  1070 + <!-- Dutch -->
  1071 + <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
  1072 + <analyzer>
  1073 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1074 + <filter class="solr.LowerCaseFilterFactory"/>
  1075 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
  1076 + <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
  1077 + <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
  1078 + </analyzer>
  1079 + </fieldType>
  1080 +
  1081 + <!-- Norwegian -->
  1082 + <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
  1083 + <analyzer>
  1084 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1085 + <filter class="solr.LowerCaseFilterFactory"/>
  1086 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
  1087 + <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
  1088 + <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
  1089 + <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
  1090 + </analyzer>
  1091 + </fieldType>
  1092 +
  1093 + <!-- Portuguese -->
  1094 + <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
  1095 + <analyzer>
  1096 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1097 + <filter class="solr.LowerCaseFilterFactory"/>
  1098 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
  1099 +
  1100 + <!-- mairon -->
  1101 + <!-- <filter class="solr.PortugueseLightStemFilterFactory"/> -->
  1102 + <filter class="solr.BrazilianStemFilterFactory"/>
  1103 + <filter class="solr.ASCIIFoldingFilterFactory"/>
  1104 + <!-- mairon -->
  1105 +
  1106 + <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
  1107 + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
  1108 + <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
  1109 + </analyzer>
  1110 + </fieldType>
  1111 +
  1112 + <!-- Romanian -->
  1113 + <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
  1114 + <analyzer>
  1115 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1116 + <filter class="solr.LowerCaseFilterFactory"/>
  1117 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
  1118 + <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
  1119 + </analyzer>
  1120 + </fieldType>
  1121 +
  1122 + <!-- Russian -->
  1123 + <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
  1124 + <analyzer>
  1125 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1126 + <filter class="solr.LowerCaseFilterFactory"/>
  1127 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
  1128 + <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
  1129 + <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
  1130 + </analyzer>
  1131 + </fieldType>
  1132 +
  1133 + <!-- Swedish -->
  1134 + <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
  1135 + <analyzer>
  1136 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1137 + <filter class="solr.LowerCaseFilterFactory"/>
  1138 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
  1139 + <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
  1140 + <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
  1141 + </analyzer>
  1142 + </fieldType>
  1143 +
  1144 + <!-- Thai -->
  1145 + <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
  1146 + <analyzer>
  1147 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1148 + <filter class="solr.LowerCaseFilterFactory"/>
  1149 + <filter class="solr.ThaiWordFilterFactory"/>
  1150 + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
  1151 + </analyzer>
  1152 + </fieldType>
  1153 +
  1154 + <!-- Turkish -->
  1155 + <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
  1156 + <analyzer>
  1157 + <tokenizer class="solr.StandardTokenizerFactory"/>
  1158 + <filter class="solr.TurkishLowerCaseFilterFactory"/>
  1159 + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
  1160 + <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
  1161 + </analyzer>
  1162 + </fieldType>
  1163 +
  1164 + </types>
  1165 +
  1166 + <!-- Similarity is the scoring routine for each document vs. a query.
  1167 + A custom Similarity or SimilarityFactory may be specified here, but
  1168 + the default is fine for most applications.
  1169 + For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
  1170 + -->
  1171 + <!--
  1172 + <similarity class="com.example.solr.CustomSimilarityFactory">
  1173 + <str name="paramkey">param value</str>
  1174 + </similarity>
  1175 + -->
  1176 +
  1177 +</schema>
0 1178 \ No newline at end of file
... ...