|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectorg.apache.solr.update.processor.UpdateRequestProcessor
org.apache.solr.update.processor.LanguageIdentifierUpdateProcessor
public abstract class LanguageIdentifierUpdateProcessor
Identifies the language of a set of input fields. Also supports mapping of field names based on detected language.
See http://wiki.apache.org/solr/LanguageDetection
| Field Summary | |
|---|---|
protected HashSet<String> |
allMapFieldsSet
|
protected String |
docIdField
|
protected boolean |
enabled
|
protected boolean |
enableMapping
|
protected boolean |
enforceSchema
|
protected String[] |
fallbackFields
|
protected String |
fallbackValue
|
protected String[] |
inputFields
|
protected String |
langField
|
protected Pattern |
langPattern
|
protected String |
langsField
|
protected HashSet<String> |
langWhitelist
|
protected HashMap<String,String> |
lcMap
|
protected static org.slf4j.Logger |
log
|
protected String[] |
mapFields
|
protected boolean |
mapIndividual
|
protected HashSet<String> |
mapIndividualFieldsSet
|
protected boolean |
mapKeepOrig
|
protected boolean |
mapOverwrite
|
protected Pattern |
mapPattern
|
protected String |
mapReplaceStr
|
protected boolean |
overwrite
|
protected IndexSchema |
schema
|
protected double |
threshold
|
protected Pattern |
tikaSimilarityPattern
|
| Fields inherited from class org.apache.solr.update.processor.UpdateRequestProcessor |
|---|
next |
| Fields inherited from interface org.apache.solr.update.processor.LangIdParams |
|---|
DOCID_FIELD_DEFAULT, DOCID_LANGFIELD_DEFAULT, DOCID_LANGSFIELD_DEFAULT, DOCID_PARAM, DOCID_THRESHOLD_DEFAULT, ENFORCE_SCHEMA, FALLBACK, FALLBACK_FIELDS, FIELDS_PARAM, LANG_FIELD, LANG_WHITELIST, LANGS_FIELD, LANGUAGE_ID, MAP_ENABLE, MAP_FL, MAP_INDIVIDUAL, MAP_INDIVIDUAL_FL, MAP_KEEP_ORIG, MAP_LCMAP, MAP_OVERWRITE, MAP_PATTERN, MAP_PATTERN_DEFAULT, MAP_REPLACE, MAP_REPLACE_DEFAULT, OVERWRITE, THRESHOLD |
| Constructor Summary | |
|---|---|
LanguageIdentifierUpdateProcessor(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next)
|
|
| Method Summary | |
|---|---|
protected String |
concatFields(SolrInputDocument doc,
String[] fields)
|
protected abstract List<DetectedLanguage> |
detectLanguage(String content)
Detects language(s) from a string. |
protected String |
getMappedField(String currentField,
String language)
Returns the name of the field to map the current contents into, so that they are properly analyzed. |
boolean |
isEnabled()
Tells if this processor is enabled or not |
protected SolrInputDocument |
process(SolrInputDocument doc)
This is the main, testable process method called from processAdd() |
void |
processAdd(AddUpdateCommand cmd)
|
protected String |
resolveLanguage(List<DetectedLanguage> languages,
String fallbackLang)
Chooses a language based on the list of candidates detected |
protected String |
resolveLanguage(String language,
String fallbackLang)
Chooses a language based on the list of candidates detected |
void |
setEnabled(boolean enabled)
|
| Methods inherited from class org.apache.solr.update.processor.UpdateRequestProcessor |
|---|
finish, processCommit, processDelete, processMergeIndexes, processRollback |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected static final org.slf4j.Logger log
protected boolean enabled
protected String[] inputFields
protected String[] mapFields
protected Pattern mapPattern
protected String mapReplaceStr
protected String langField
protected String langsField
protected String docIdField
protected String fallbackValue
protected String[] fallbackFields
protected boolean enableMapping
protected boolean mapKeepOrig
protected boolean overwrite
protected boolean mapOverwrite
protected boolean mapIndividual
protected boolean enforceSchema
protected double threshold
protected HashSet<String> langWhitelist
protected HashSet<String> mapIndividualFieldsSet
protected HashSet<String> allMapFieldsSet
protected HashMap<String,String> lcMap
protected IndexSchema schema
protected final Pattern tikaSimilarityPattern
protected final Pattern langPattern
| Constructor Detail |
|---|
public LanguageIdentifierUpdateProcessor(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next)
| Method Detail |
|---|
public void processAdd(AddUpdateCommand cmd)
throws IOException
processAdd in class UpdateRequestProcessorIOExceptionprotected SolrInputDocument process(SolrInputDocument doc)
doc - the SolrInputDocument to work on
protected String concatFields(SolrInputDocument doc,
String[] fields)
protected abstract List<DetectedLanguage> detectLanguage(String content)
content - The content to identify
protected String resolveLanguage(String language,
String fallbackLang)
language - language code as a stringfallbackLang - the language code to use as a fallback
protected String resolveLanguage(List<DetectedLanguage> languages,
String fallbackLang)
languages - a List of DetectedLanguages with certainty scorefallbackLang - the language code to use as a fallback
protected String getMappedField(String currentField,
String language)
currentField - The current field namelanguage - the language code
public boolean isEnabled()
public void setEnabled(boolean enabled)
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||