MtasUpdateRequestProcessorFactory.java
package mtas.solr.update.processor;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import mtas.analysis.MtasTokenizer;
import mtas.analysis.util.MtasCharFilterFactory;
import mtas.analysis.util.MtasTokenizerFactory;
import mtas.codec.util.CodecUtil;
import mtas.solr.schema.MtasPreAnalyzedField;
/**
* A factory for creating MtasUpdateRequestProcessor objects.
*/
public class MtasUpdateRequestProcessorFactory
extends UpdateRequestProcessorFactory {
/** The Constant log. */
private static final Log log = LogFactory
.getLog(MtasUpdateRequestProcessorFactory.class);
/** The config. */
private MtasUpdateRequestProcessorConfig config = null;
/*
* (non-Javadoc)
*
* @see
* org.apache.solr.update.processor.UpdateRequestProcessorFactory#init(org.
* apache.solr.common.util.NamedList)
*/
@Override
@SuppressWarnings("rawtypes")
public void init(NamedList args) {
super.init(args);
}
/**
* Inits the.
*
* @param req the req
* @throws IOException Signals that an I/O exception has occurred.
*/
@SuppressWarnings("unchecked")
private void init(SolrQueryRequest req) throws IOException {
if (config == null) {
// initialise
config = new MtasUpdateRequestProcessorConfig();
// required info
Map<String, FieldType> fieldTypes = req.getSchema().getFieldTypes();
Map<String, SchemaField> fields = req.getSchema().getFields();
SolrResourceLoader resourceLoader = req.getCore().getSolrConfig()
.getResourceLoader();
// check fieldTypes
// for (String name : fieldTypes.keySet()) {
for (Entry<String, FieldType> entry : fieldTypes.entrySet()) {
// only for MtasPreAnalyzedField
if (entry.getValue() instanceof MtasPreAnalyzedField) {
MtasPreAnalyzedField mpaf = (MtasPreAnalyzedField) entry.getValue();
config.fieldTypeDefaultConfiguration.put(entry.getKey(),
mpaf.defaultConfiguration);
config.fieldTypeConfigurationFromField.put(entry.getKey(),
mpaf.configurationFromField);
config.fieldTypeNumberOfTokensField.put(entry.getKey(),
mpaf.setNumberOfTokens);
config.fieldTypeNumberOfPositionsField.put(entry.getKey(),
mpaf.setNumberOfPositions);
config.fieldTypeSizeField.put(entry.getKey(), mpaf.setSize);
config.fieldTypeErrorField.put(entry.getKey(), mpaf.setError);
config.fieldTypePrefixField.put(entry.getKey(), mpaf.setPrefix);
if (mpaf.followIndexAnalyzer == null
|| !fieldTypes.containsKey(mpaf.followIndexAnalyzer)) {
throw new IOException(
entry.getKey() + " can't follow " + mpaf.followIndexAnalyzer);
} else {
FieldType fieldType = fieldTypes.get(mpaf.followIndexAnalyzer);
SimpleOrderedMap<?> analyzer = null;
Object tmpObj1 = fieldType.getNamedPropertyValues(false)
.get(FieldType.INDEX_ANALYZER);
if (tmpObj1 != null && tmpObj1 instanceof SimpleOrderedMap) {
analyzer = (SimpleOrderedMap<?>) tmpObj1;
}
if (analyzer == null) {
Object tmpObj2 = fieldType.getNamedPropertyValues(false)
.get(FieldType.ANALYZER);
if (tmpObj2 != null && tmpObj2 instanceof SimpleOrderedMap) {
analyzer = (SimpleOrderedMap<?>) tmpObj2;
}
}
if (analyzer == null) {
throw new IOException("no analyzer");
} else {
// charfilters
ArrayList<SimpleOrderedMap<Object>> listCharFilters = null;
SimpleOrderedMap<Object> configTokenizer = null;
try {
listCharFilters = (ArrayList<SimpleOrderedMap<Object>>) analyzer
.findRecursive(FieldType.CHAR_FILTERS);
;
configTokenizer = (SimpleOrderedMap<Object>) analyzer
.findRecursive(FieldType.TOKENIZER);
} catch (ClassCastException e) {
throw new IOException(
"could not cast charFilters and/or tokenizer from analyzer",
e);
}
if (listCharFilters != null && !listCharFilters.isEmpty()) {
CharFilterFactory[] charFilterFactories = new CharFilterFactory[listCharFilters
.size()];
int number = 0;
for (SimpleOrderedMap<Object> configCharFilter : listCharFilters) {
String className = null;
Map<String, String> args = new HashMap<>();
Iterator<Map.Entry<String, Object>> it = configCharFilter
.iterator();
// get className and args
while (it.hasNext()) {
Map.Entry<String, Object> obj = it.next();
if (obj.getValue() instanceof String) {
if (obj.getKey().equals(FieldType.CLASS_NAME)) {
className = (String) obj.getValue();
} else {
args.put(obj.getKey(), (String) obj.getValue());
}
}
}
if (className != null) {
try {
Class<?> cls = Class.forName((String) className);
if (cls.isAssignableFrom(MtasCharFilterFactory.class)) {
Class<?>[] types = { Map.class, ResourceLoader.class };
Constructor<?> cnstr = cls.getConstructor(types);
Object cff = cnstr.newInstance(args, resourceLoader);
if (cff instanceof MtasCharFilterFactory) {
charFilterFactories[number] = (MtasCharFilterFactory) cff;
number++;
} else {
throw new IOException(
className + " is no MtasCharFilterFactory");
}
} else {
Class<?>[] types = { Map.class };
Constructor<?> cnstr = cls.getConstructor(types);
Object cff = cnstr.newInstance(args);
if (cff instanceof CharFilterFactory) {
charFilterFactories[number] = (CharFilterFactory) cff;
number++;
} else {
throw new IOException(
className + " is no CharFilterFactory");
}
}
} catch (ClassNotFoundException | InstantiationException
| IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException e) {
throw new IOException(e);
}
} else {
throw new IOException("no className");
}
}
config.fieldTypeCharFilterFactories.put(entry.getKey(),
charFilterFactories);
} else {
config.fieldTypeCharFilterFactories.put(entry.getKey(), null);
}
if (configTokenizer != null) {
String className = null;
Map<String, String> args = new HashMap<>();
Iterator<Map.Entry<String, Object>> it = configTokenizer
.iterator();
// get className and args
while (it.hasNext()) {
Map.Entry<String, Object> obj = it.next();
if (obj.getValue() instanceof String) {
if (obj.getKey().equals(FieldType.CLASS_NAME)) {
className = (String) obj.getValue();
} else {
args.put(obj.getKey(), (String) obj.getValue());
}
}
}
if (className != null) {
try {
Class<?> cls = Class.forName((String) className);
Class<?>[] types = { Map.class, ResourceLoader.class };
Constructor<?> cnstr = cls.getConstructor(types);
Object cff = cnstr.newInstance(args, resourceLoader);
if (cff instanceof MtasTokenizerFactory) {
config.fieldTypeTokenizerFactory.put(entry.getKey(),
(MtasTokenizerFactory) cff);
} else {
throw new IOException(
className + " is no MtasTokenizerFactory");
}
} catch (ClassNotFoundException | InstantiationException
| IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException e) {
throw new IOException(e);
}
} else {
throw new IOException("no className");
}
}
}
}
}
}
for (Entry<String, SchemaField> entry : fields.entrySet()) {
if (entry.getValue().getType() != null
&& config.fieldTypeTokenizerFactory
.containsKey(entry.getValue().getType().getTypeName())) {
config.fieldMapping.put(entry.getKey(),
entry.getValue().getType().getTypeName());
}
}
}
}
/*
* (non-Javadoc)
*
* @see
* org.apache.solr.update.processor.UpdateRequestProcessorFactory#getInstance(
* org.apache.solr.request.SolrQueryRequest,
* org.apache.solr.response.SolrQueryResponse,
* org.apache.solr.update.processor.UpdateRequestProcessor)
*/
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp, UpdateRequestProcessor next) {
try {
init(req);
} catch (IOException e) {
log.error(e);
}
return new MtasUpdateRequestProcessor(next, config);
}
}
class MtasUpdateRequestProcessor extends UpdateRequestProcessor {
/** The log. */
private static Log log = LogFactory.getLog(MtasUpdateRequestProcessor.class);
private MtasUpdateRequestProcessorConfig config;
public MtasUpdateRequestProcessor(UpdateRequestProcessor next,
MtasUpdateRequestProcessorConfig config) {
super(next);
this.config = config;
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
if (config != null && config.fieldMapping.size() > 0) {
// get document
SolrInputDocument doc = cmd.getSolrInputDocument();
// loop over configurations
for (String field : config.fieldMapping.keySet()) {
SolrInputField originalValue = doc.get(field);
String fieldType = config.fieldMapping.get(field);
CharFilterFactory[] charFilterFactories = config.fieldTypeCharFilterFactories
.get(fieldType);
MtasTokenizerFactory tokenizerFactory = config.fieldTypeTokenizerFactory
.get(config.fieldMapping.get(field));
MtasUpdateRequestProcessorSizeReader sizeReader;
if (originalValue != null
&& originalValue.getValue() instanceof String) {
MtasUpdateRequestProcessorResultWriter result = null;
try {
String storedValue = (String) originalValue.getValue();
// create reader
Reader reader = new StringReader(storedValue);
// configuration
String configuration = null;
String defaultConfiguration = config.fieldTypeDefaultConfiguration
.get(fieldType);
if (config.fieldTypeConfigurationFromField.get(fieldType) != null) {
Object obj = doc.getFieldValue(
config.fieldTypeConfigurationFromField.get(fieldType));
if (obj != null) {
configuration = obj.toString();
}
}
// charFilterFactories
if (charFilterFactories != null) {
for (CharFilterFactory charFilterFactory : charFilterFactories) {
if (charFilterFactory instanceof MtasCharFilterFactory) {
reader = ((MtasCharFilterFactory) charFilterFactory)
.create(reader, configuration, defaultConfiguration);
} else {
reader = charFilterFactory.create(reader);
}
if (reader == null) {
throw new IOException(
"charFilter " + charFilterFactory.getClass().getName()
+ " returns null");
}
}
}
sizeReader = new MtasUpdateRequestProcessorSizeReader(reader);
// tokenizerFactory
result = new MtasUpdateRequestProcessorResultWriter(storedValue);
int numberOfPositions = 0;
int numberOfTokens = 0;
Set<String> prefixes = new HashSet<>();
try (MtasTokenizer tokenizer = tokenizerFactory.create(configuration, defaultConfiguration)) {
tokenizer.setReader(sizeReader);
tokenizer.reset();
// attributes
CharTermAttribute termAttribute = tokenizer
.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = tokenizer
.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAttribute = tokenizer
.getAttribute(PositionIncrementAttribute.class);
PayloadAttribute payloadAttribute = tokenizer
.getAttribute(PayloadAttribute.class);
FlagsAttribute flagsAttribute = tokenizer
.getAttribute(FlagsAttribute.class);
while (tokenizer.incrementToken()) {
String term = null;
Integer offsetStart = null;
Integer offsetEnd = null;
Integer posIncr = null;
Integer flags = null;
BytesRef payload = null;
if (termAttribute != null) {
term = termAttribute.toString();
prefixes.add(CodecUtil.termPrefix(term));
}
if (offsetAttribute != null) {
offsetStart = offsetAttribute.startOffset();
offsetEnd = offsetAttribute.endOffset();
}
if (positionIncrementAttribute != null) {
posIncr = positionIncrementAttribute.getPositionIncrement();
} else {
posIncr = 0;
}
if (payloadAttribute != null) {
payload = payloadAttribute.getPayload();
}
if (flagsAttribute != null) {
flags = flagsAttribute.getFlags();
}
numberOfTokens++;
numberOfPositions += posIncr;
result.addItem(term, offsetStart, offsetEnd, posIncr, payload,
flags);
// System.out.print(term+" ");
}
// update field
doc.remove(field);
if (result.getTokenNumber() > 0) {
doc.addField(field, result.getFileName());
}
} finally {
result.close();
}
// update size
setFields(doc, config.fieldTypeSizeField.get(fieldType),
sizeReader.getTotalReadSize());
// update numberOfPositions
setFields(doc,
config.fieldTypeNumberOfPositionsField.get(fieldType),
numberOfPositions);
// update numberOfTokens
setFields(doc, config.fieldTypeNumberOfTokensField.get(fieldType),
numberOfTokens);
// update prefixes
setFields(doc, config.fieldTypePrefixField.get(fieldType),
prefixes);
} catch (IOException e) {
log.info(e);
// update error
doc.addField(config.fieldTypeErrorField.get(fieldType),
e.getMessage());
// update size
setFields(doc, config.fieldTypeSizeField.get(fieldType), 0);
// update numberOfPositions
setFields(doc,
config.fieldTypeNumberOfPositionsField.get(fieldType), 0);
// update numberOfTokens
setFields(doc, config.fieldTypeNumberOfTokensField.get(fieldType),
0);
// update prefixes
removeFields(doc, config.fieldTypePrefixField.get(fieldType));
if (result != null) {
result.forceCloseAndDelete();
doc.remove(field);
}
}
}
}
}
// pass it up the chain
super.processAdd(cmd);
}
private void removeFields(SolrInputDocument doc, String fieldNames) {
if (fieldNames != null) {
String[] tmpFields = fieldNames.split(",");
for (int i = 0; i < tmpFields.length; i++) {
doc.removeField(tmpFields[i]);
}
}
}
private void setFields(SolrInputDocument doc, String fieldNames,
Object value) {
if (fieldNames != null) {
String[] tmpFields = fieldNames.split(",");
for (int i = 0; i < tmpFields.length; i++) {
if (!tmpFields[i].trim().isEmpty()) {
doc.addField(tmpFields[i].trim(), value);
}
}
}
}
}
class MtasUpdateRequestProcessorConfig {
HashMap<String, CharFilterFactory[]> fieldTypeCharFilterFactories;
HashMap<String, MtasTokenizerFactory> fieldTypeTokenizerFactory;
HashMap<String, String> fieldMapping;
HashMap<String, String> fieldTypeDefaultConfiguration;
HashMap<String, String> fieldTypeConfigurationFromField;
HashMap<String, String> fieldTypeNumberOfTokensField;
HashMap<String, String> fieldTypeNumberOfPositionsField;
HashMap<String, String> fieldTypeSizeField;
HashMap<String, String> fieldTypeErrorField;
HashMap<String, String> fieldTypePrefixField;
MtasUpdateRequestProcessorConfig() {
fieldMapping = new HashMap<>();
fieldTypeCharFilterFactories = new HashMap<>();
fieldTypeTokenizerFactory = new HashMap<>();
fieldTypeDefaultConfiguration = new HashMap<>();
fieldTypeConfigurationFromField = new HashMap<>();
fieldTypeNumberOfTokensField = new HashMap<>();
fieldTypeNumberOfPositionsField = new HashMap<>();
fieldTypeSizeField = new HashMap<>();
fieldTypeErrorField = new HashMap<>();
fieldTypePrefixField = new HashMap<>();
}
}
class MtasUpdateRequestProcessorSizeReader extends Reader {
Reader reader;
long totalReadSize;
public MtasUpdateRequestProcessorSizeReader(Reader reader) {
this.reader = reader;
totalReadSize = 0;
}
public int read(char[] cbuf, int off, int len) throws IOException {
int read = reader.read(cbuf, off, len);
totalReadSize += read;
return read;
}
public void close() throws IOException {
reader.close();
}
public long getTotalReadSize() {
return totalReadSize;
}
}