MtasSpanRegexpQuery.java
package mtas.search.spans;
import java.io.IOException;
import mtas.analysis.token.MtasToken;
import mtas.codec.util.CodecUtil;
import mtas.search.spans.util.MtasSpanQuery;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
/**
* The Class MtasSpanRegexpQuery.
*/
public class MtasSpanRegexpQuery extends MtasSpanQuery {
/** The Constant MTAS_REGEXP_EXPAND_BOUNDARY. */
private static final int MTAS_REGEXP_EXPAND_BOUNDARY = 1000000;
/** The prefix. */
private String prefix;
/** The value. */
private String value;
/** The single position. */
private boolean singlePosition;
/** The term. */
private Term term;
/** The query. */
private SpanMultiTermQueryWrapper<RegexpQuery> query;
/**
* Instantiates a new mtas span regexp query.
*
* @param term the term
*/
public MtasSpanRegexpQuery(Term term) {
this(term, true);
}
/**
* Instantiates a new mtas span regexp query.
*
* @param term the term
* @param singlePosition the single position
*/
public MtasSpanRegexpQuery(Term term, boolean singlePosition) {
super(singlePosition ? 1 : null, singlePosition ? 1 : null);
RegexpQuery req = new RegexpQuery(term);
query = new SpanMultiTermQueryWrapper<>(req);
this.term = term;
this.singlePosition = singlePosition;
int i = term.text().indexOf(MtasToken.DELIMITER);
if (i >= 0) {
prefix = term.text().substring(0, i);
value = term.text().substring((i + MtasToken.DELIMITER.length()));
value = (value.length() > 0) ? value : null;
} else {
prefix = term.text();
value = null;
}
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader)
*/
@Override
public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
Query q = query.rewrite(reader);
if (q instanceof SpanOrQuery) {
SpanQuery[] clauses = ((SpanOrQuery) q).getClauses();
if (clauses.length > MTAS_REGEXP_EXPAND_BOUNDARY) {
// forward index solution ?
throw new IOException("Regexp \"" + CodecUtil.termValue(term.text())
+ "\" expands to " + clauses.length + " terms, too many (boundary "
+ MTAS_REGEXP_EXPAND_BOUNDARY + ")!");
}
MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.length];
for (int i = 0; i < clauses.length; i++) {
if (clauses[i] instanceof SpanTermQuery) {
newClauses[i] = new MtasSpanTermQuery((SpanTermQuery) clauses[i],
singlePosition).rewrite(reader);
} else {
throw new IOException("no SpanTermQuery after rewrite");
}
}
return new MtasSpanOrQuery(newClauses).rewrite(reader);
} else {
throw new IOException("no SpanOrQuery after rewrite");
}
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.spans.SpanTermQuery#toString(java.lang.String)
*/
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
buffer.append(this.getClass().getSimpleName() + "([");
if (value == null) {
buffer.append(this.query.getField() + ":" + prefix);
} else {
buffer.append(this.query.getField() + ":" + prefix + "=" + value);
}
buffer.append("])");
return buffer.toString();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.SpanQuery#getField()
*/
@Override
public String getField() {
return term.field();
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.spans.SpanQuery#createWeight(org.apache.lucene.
* search.IndexSearcher, boolean)
*/
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost)
throws IOException {
return ((SpanQuery) searcher.rewrite(query)).createWeight(searcher,
needsScores, boost);
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Query#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
MtasSpanRegexpQuery that = (MtasSpanRegexpQuery) obj;
return term.equals(that.term) && singlePosition == that.singlePosition;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Query#hashCode()
*/
@Override
public int hashCode() {
int h = this.getClass().getSimpleName().hashCode();
h = (h * 7) ^ term.hashCode();
h += (singlePosition ? 1 : 0);
return h;
}
@Override
public boolean isMatchAllPositionsQuery() {
return false;
}
}