MtasSpanSequenceQuery.java
- package mtas.search.spans;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.LeafReaderContext;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.index.TermContext;
- import org.apache.lucene.index.Terms;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.spans.SpanWeight;
- import org.apache.lucene.search.spans.Spans;
- import mtas.search.spans.util.MtasExpandSpanQuery;
- import mtas.search.spans.util.MtasIgnoreItem;
- import mtas.search.spans.util.MtasSpanQuery;
- import mtas.search.spans.util.MtasSpanWeight;
- import mtas.search.spans.util.MtasSpans;
- /**
- * The Class MtasSpanSequenceQuery.
- */
- public class MtasSpanSequenceQuery extends MtasSpanQuery {
- /** The items. */
- private List<MtasSpanSequenceItem> items;
- /** The left minimum. */
- private int leftMinimum;
- /** The left maximum. */
- private int leftMaximum;
- /** The right minimum. */
- private int rightMinimum;
- /** The right maximum. */
- private int rightMaximum;
- /** The ignore query. */
- private MtasSpanQuery ignoreQuery;
- /** The maximum ignore length. */
- private Integer maximumIgnoreLength;
- /** The field. */
- private String field;
- /**
- * Instantiates a new mtas span sequence query.
- *
- * @param items the items
- * @param ignoreQuery the ignore query
- * @param maximumIgnoreLength the maximum ignore length
- */
- public MtasSpanSequenceQuery(List<MtasSpanSequenceItem> items,
- MtasSpanQuery ignoreQuery, Integer maximumIgnoreLength) {
- this(items, 0, 0, 0, 0, ignoreQuery, maximumIgnoreLength);
- }
- /**
- * Instantiates a new mtas span sequence query.
- *
- * @param items the items
- * @param leftMinimum the left minimum
- * @param leftMaximum the left maximum
- * @param rightMinimum the right minimum
- * @param rightMaximum the right maximum
- * @param ignoreQuery the ignore query
- * @param maximumIgnoreLength the maximum ignore length
- */
- public MtasSpanSequenceQuery(List<MtasSpanSequenceItem> items,
- int leftMinimum, int leftMaximum, int rightMinimum, int rightMaximum,
- MtasSpanQuery ignoreQuery, Integer maximumIgnoreLength) {
- super(null, null);
- this.items = items;
- this.leftMinimum = leftMinimum;
- this.leftMaximum = leftMaximum;
- this.rightMinimum = rightMinimum;
- this.rightMaximum = rightMaximum;
- // get field and do checks
- Integer minimum = leftMinimum + rightMinimum;
- Integer maximum = leftMaximum + rightMaximum;
- for (MtasSpanSequenceItem item : items) {
- if (field == null) {
- field = item.getQuery().getField();
- } else if (item.getQuery().getField() != null
- && !item.getQuery().getField().equals(field)) {
- throw new IllegalArgumentException("Clauses must have same field.");
- }
- if (minimum != null && !item.isOptional()) {
- minimum = item.getQuery().getMinimumWidth() != null
- ? minimum + item.getQuery().getMinimumWidth() : null;
- }
- if (maximum != null) {
- maximum = item.getQuery().getMaximumWidth() != null
- ? maximum + item.getQuery().getMaximumWidth() : null;
- }
- }
- // check ignore
- if (field != null && ignoreQuery != null) {
- if (ignoreQuery.getField() == null
- || field.equals(ignoreQuery.getField())) {
- this.ignoreQuery = ignoreQuery;
- if (maximumIgnoreLength == null) {
- this.maximumIgnoreLength = MtasIgnoreItem.DEFAULT_MAXIMUM_IGNORE_LENGTH;
- } else {
- this.maximumIgnoreLength = maximumIgnoreLength;
- }
- } else {
- throw new IllegalArgumentException(
- "ignore must have same field as clauses");
- }
- if (maximum != null && items.size() > 1) {
- if (this.ignoreQuery.getMaximumWidth() != null) {
- maximum += (items.size() - 1) * this.maximumIgnoreLength
- * this.ignoreQuery.getMaximumWidth();
- } else {
- maximum = null;
- }
- }
- } else {
- this.ignoreQuery = null;
- this.maximumIgnoreLength = null;
- }
- setWidth(minimum, maximum);
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.SpanQuery#getField()
- */
- @Override
- public String getField() {
- return field;
- }
- /**
- * Gets the items.
- *
- * @return the items
- */
- public List<MtasSpanSequenceItem> getItems() {
- return items;
- }
- /**
- * Gets the ignore query.
- *
- * @return the ignore query
- */
- public MtasSpanQuery getIgnoreQuery() {
- return ignoreQuery;
- }
- /**
- * Gets the maximum ignore length.
- *
- * @return the maximum ignore length
- */
- public Integer getMaximumIgnoreLength() {
- return maximumIgnoreLength;
- }
- /*
- * (non-Javadoc)
- *
- * @see
- * org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader)
- */
- @Override
- public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
- if (items.size() == 1) {
- MtasSpanQuery singleQuery = items.get(0).getQuery();
- if (leftMaximum != 0 || rightMaximum != 0) {
- singleQuery = new MtasExpandSpanQuery(singleQuery, leftMinimum,
- leftMaximum, rightMinimum, rightMaximum);
- }
- return singleQuery.rewrite(reader);
- } else {
- MtasSpanSequenceItem newItem;
- MtasSpanSequenceItem previousNewItem = null;
- ArrayList<MtasSpanSequenceItem> newItems = new ArrayList<>(items.size());
- int newLeftMinimum = leftMinimum;
- int newLeftMaximum = leftMaximum;
- int newRightMinimum = rightMinimum;
- int newRightMaximum = rightMaximum;
- MtasSpanQuery newIgnoreClause = ignoreQuery != null
- ? ignoreQuery.rewrite(reader) : null;
- boolean actuallyRewritten = ignoreQuery != null
- ? !newIgnoreClause.equals(ignoreQuery) : false;
- for (int i = 0; i < items.size(); i++) {
- newItem = items.get(i).rewrite(reader);
- if (newItem.getQuery() instanceof MtasSpanMatchNoneQuery) {
- if (!newItem.isOptional()) {
- return new MtasSpanMatchNoneQuery(field);
- } else {
- actuallyRewritten = true;
- }
- } else {
- actuallyRewritten |= !items.get(i).equals(newItem);
- MtasSpanSequenceItem previousMergedItem = MtasSpanSequenceItem.merge(
- previousNewItem, newItem, ignoreQuery, maximumIgnoreLength);
- if (previousMergedItem != null) {
- newItems.set((newItems.size() - 1), previousMergedItem);
- actuallyRewritten = true;
- } else {
- newItems.add(newItem);
- }
- previousNewItem = newItem;
- }
- }
- // check first and last
- if (ignoreQuery == null) {
- ArrayList<MtasSpanSequenceItem> possibleTrimmedItems = new ArrayList<>(
- newItems.size());
- MtasSpanSequenceItem firstItem = newItems.get(0);
- MtasSpanQuery firstQuery = firstItem.getQuery();
- if (firstQuery instanceof MtasSpanMatchAllQuery) {
- newLeftMaximum++;
- if (!firstItem.isOptional()) {
- newLeftMinimum++;
- }
- } else if (firstQuery instanceof MtasSpanRecurrenceQuery) {
- MtasSpanRecurrenceQuery firstRecurrenceQuery = (MtasSpanRecurrenceQuery) firstQuery;
- if (firstRecurrenceQuery.getQuery() instanceof MtasSpanMatchAllQuery
- && firstRecurrenceQuery.getIgnoreQuery() == null) {
- if (!firstItem.isOptional()) {
- newLeftMinimum += firstRecurrenceQuery.getMinimumRecurrence();
- newLeftMaximum += firstRecurrenceQuery.getMaximumRecurrence();
- } else {
- if (firstRecurrenceQuery.getMinimumRecurrence() == 1
- || firstRecurrenceQuery
- .getMinimumRecurrence() <= newLeftMinimum) {
- newLeftMinimum += 0;
- newLeftMaximum += firstRecurrenceQuery.getMaximumRecurrence();
- } else {
- possibleTrimmedItems.add(firstItem);
- }
- }
- } else {
- possibleTrimmedItems.add(firstItem);
- }
- } else {
- possibleTrimmedItems.add(firstItem);
- }
- for (int i = 1; i < (newItems.size() - 1); i++) {
- possibleTrimmedItems.add(newItems.get(i));
- }
- if (newItems.size() > 1) {
- MtasSpanSequenceItem lastItem = newItems.get((newItems.size() - 1));
- MtasSpanQuery lastQuery = lastItem.getQuery();
- if (lastQuery instanceof MtasSpanMatchAllQuery) {
- newRightMaximum++;
- if (!lastItem.isOptional()) {
- newRightMinimum++;
- }
- } else if (lastQuery instanceof MtasSpanRecurrenceQuery) {
- MtasSpanRecurrenceQuery lastRecurrenceQuery = (MtasSpanRecurrenceQuery) lastQuery;
- if (lastRecurrenceQuery.getQuery() instanceof MtasSpanMatchAllQuery
- && lastRecurrenceQuery.getIgnoreQuery() == null) {
- if (!lastItem.isOptional()) {
- newRightMinimum += lastRecurrenceQuery.getMinimumRecurrence();
- newRightMaximum += lastRecurrenceQuery.getMaximumRecurrence();
- } else if (lastRecurrenceQuery.getMinimumRecurrence() == 1
- || lastRecurrenceQuery
- .getMinimumRecurrence() <= newRightMinimum) {
- newRightMinimum += 0;
- newRightMaximum += lastRecurrenceQuery.getMaximumRecurrence();
- } else {
- possibleTrimmedItems.add(lastItem);
- }
- } else {
- possibleTrimmedItems.add(lastItem);
- }
- } else {
- possibleTrimmedItems.add(lastItem);
- }
- }
- if (possibleTrimmedItems.size() < newItems.size()) {
- actuallyRewritten = true;
- newItems = possibleTrimmedItems;
- }
- }
- if (!actuallyRewritten) {
- if (leftMaximum != 0 || rightMaximum != 0) {
- newLeftMinimum = leftMinimum;
- newLeftMaximum = leftMaximum;
- newRightMinimum = rightMinimum;
- newRightMaximum = rightMaximum;
- leftMinimum = 0;
- leftMaximum = 0;
- rightMinimum = 0;
- rightMaximum = 0;
- MtasSpanQuery finalQuery = new MtasExpandSpanQuery(this,
- newLeftMinimum, newLeftMaximum, newRightMinimum, newRightMaximum);
- return finalQuery.rewrite(reader);
- } else {
- return super.rewrite(reader);
- }
- } else {
- if (!newItems.isEmpty()) {
- return new MtasSpanSequenceQuery(newItems, newLeftMinimum,
- newLeftMaximum, newRightMinimum, newRightMaximum, newIgnoreClause,
- maximumIgnoreLength).rewrite(reader);
- } else {
- return new MtasSpanMatchNoneQuery(field);
- }
- }
- }
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.Query#toString(java.lang.String)
- */
- @Override
- public String toString(String field) {
- StringBuilder buffer = new StringBuilder();
- buffer.append(this.getClass().getSimpleName() + "([");
- Iterator<MtasSpanSequenceItem> i = items.iterator();
- while (i.hasNext()) {
- MtasSpanSequenceItem item = i.next();
- MtasSpanQuery clause = item.getQuery();
- buffer.append(clause.toString(field));
- if (item.isOptional()) {
- buffer.append("{OPTIONAL}");
- }
- if (i.hasNext()) {
- buffer.append(", ");
- }
- }
- buffer.append("[" + leftMinimum + "," + leftMaximum + "]");
- buffer.append("[" + rightMinimum + "," + rightMaximum + "]");
- buffer.append("]");
- buffer.append(", ");
- buffer.append(ignoreQuery);
- buffer.append(")");
- return buffer.toString();
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.Query#equals(java.lang.Object)
- */
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- MtasSpanSequenceQuery other = (MtasSpanSequenceQuery) obj;
- boolean isEqual;
- isEqual = field.equals(other.field);
- isEqual &= items.equals(other.items);
- isEqual &= leftMinimum == other.leftMinimum;
- isEqual &= leftMaximum == other.leftMaximum;
- isEqual &= rightMinimum == other.rightMinimum;
- isEqual &= rightMaximum == other.rightMaximum;
- isEqual &= ((ignoreQuery == null && other.ignoreQuery == null)
- || (ignoreQuery != null && other.ignoreQuery != null
- && ignoreQuery.equals(other.ignoreQuery)));
- return isEqual;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.Query#hashCode()
- */
- @Override
- public int hashCode() {
- int h = this.getClass().getSimpleName().hashCode();
- h = (h * 3) ^ field.hashCode();
- h = (h * 5) ^ items.hashCode();
- h = Integer.rotateLeft(h, leftMinimum) + leftMinimum;
- h ^= 11;
- h = Integer.rotateLeft(h, leftMaximum) + leftMaximum;
- h ^= 13;
- h = Integer.rotateLeft(h, rightMinimum) + rightMinimum;
- h ^= 17;
- h = Integer.rotateLeft(h, rightMaximum) + rightMaximum;
- if (ignoreQuery != null) {
- h = (h * 7) ^ ignoreQuery.hashCode();
- h = (h * 11) ^ maximumIgnoreLength.hashCode();
- }
- return h;
- }
- /*
- * (non-Javadoc)
- *
- * @see
- * org.apache.lucene.search.spans.SpanQuery#createWeight(org.apache.lucene.
- * search.IndexSearcher, boolean)
- */
- @Override
- public MtasSpanWeight createWeight(IndexSearcher searcher,
- boolean needsScores, float boost) throws IOException {
- List<MtasSpanSequenceQueryWeight> subWeights = new ArrayList<>();
- SpanWeight ignoreWeight = null;
- for (MtasSpanSequenceItem item : items) {
- subWeights.add(new MtasSpanSequenceQueryWeight(
- item.getQuery().createWeight(searcher, false, boost), item.isOptional()));
- }
- if (ignoreQuery != null) {
- ignoreWeight = ignoreQuery.createWeight(searcher, false, boost);
- }
- return new SpanSequenceWeight(subWeights, ignoreWeight, maximumIgnoreLength,
- searcher, needsScores ? getTermContexts(subWeights) : null, boost);
- }
- /**
- * Gets the term contexts.
- *
- * @param items the items
- * @return the term contexts
- */
- protected Map<Term, TermContext> getTermContexts(
- List<MtasSpanSequenceQueryWeight> items) {
- List<SpanWeight> weights = new ArrayList<>();
- for (MtasSpanSequenceQueryWeight item : items) {
- weights.add(item.spanWeight);
- }
- return getTermContexts(weights);
- }
- /*
- * (non-Javadoc)
- *
- * @see mtas.search.spans.util.MtasSpanQuery#disableTwoPhaseIterator()
- */
- @Override
- public void disableTwoPhaseIterator() {
- super.disableTwoPhaseIterator();
- for (MtasSpanSequenceItem item : items) {
- item.getQuery().disableTwoPhaseIterator();
- }
- if (ignoreQuery != null) {
- ignoreQuery.disableTwoPhaseIterator();
- }
- }
- /**
- * The Class SpanSequenceWeight.
- */
- protected class SpanSequenceWeight extends MtasSpanWeight {
- /** The sub weights. */
- final List<MtasSpanSequenceQueryWeight> subWeights;
- /** The ignore weight. */
- final SpanWeight ignoreWeight;
- /** The maximum ignore length. */
- final Integer maximumIgnoreLength;
- /**
- * Instantiates a new span sequence weight.
- *
- * @param subWeights the sub weights
- * @param ignoreWeight the ignore weight
- * @param maximumIgnoreLength the maximum ignore length
- * @param searcher the searcher
- * @param terms the terms
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public SpanSequenceWeight(List<MtasSpanSequenceQueryWeight> subWeights,
- SpanWeight ignoreWeight, Integer maximumIgnoreLength,
- IndexSearcher searcher, Map<Term, TermContext> terms, float boost)
- throws IOException {
- super(MtasSpanSequenceQuery.this, searcher, terms, boost);
- this.subWeights = subWeights;
- this.ignoreWeight = ignoreWeight;
- this.maximumIgnoreLength = maximumIgnoreLength;
- }
- /*
- * (non-Javadoc)
- *
- * @see
- * org.apache.lucene.search.spans.SpanWeight#extractTermContexts(java.util.
- * Map)
- */
- @Override
- public void extractTermContexts(Map<Term, TermContext> contexts) {
- for (MtasSpanSequenceQueryWeight w : subWeights) {
- w.spanWeight.extractTermContexts(contexts);
- }
- if (ignoreWeight != null) {
- ignoreWeight.extractTermContexts(contexts);
- }
- }
- /*
- * (non-Javadoc)
- *
- * @see
- * org.apache.lucene.search.spans.SpanWeight#getSpans(org.apache.lucene.
- * index.LeafReaderContext,
- * org.apache.lucene.search.spans.SpanWeight.Postings)
- */
- @Override
- public MtasSpans getSpans(LeafReaderContext context,
- Postings requiredPostings) throws IOException {
- if (field == null) {
- return null;
- } else {
- Terms terms = context.reader().terms(field);
- if (terms == null) {
- return null; // field does not exist
- }
- List<MtasSpanSequenceQuerySpans> setSequenceSpans = new ArrayList<>(
- items.size());
- Spans ignoreSpans = null;
- boolean allSpansEmpty = true;
- for (MtasSpanSequenceQueryWeight w : subWeights) {
- Spans sequenceSpans = w.spanWeight.getSpans(context,
- requiredPostings);
- if (sequenceSpans != null) {
- setSequenceSpans.add(new MtasSpanSequenceQuerySpans(
- MtasSpanSequenceQuery.this, sequenceSpans, w.optional));
- allSpansEmpty = false;
- } else {
- if (w.optional) {
- setSequenceSpans.add(new MtasSpanSequenceQuerySpans(
- MtasSpanSequenceQuery.this, null, w.optional));
- } else {
- return null;
- }
- }
- }
- if (allSpansEmpty) {
- return null; // at least one required
- } else if (ignoreWeight != null) {
- ignoreSpans = ignoreWeight.getSpans(context, requiredPostings);
- }
- return new MtasSpanSequenceSpans(MtasSpanSequenceQuery.this,
- setSequenceSpans, ignoreSpans, maximumIgnoreLength);
- }
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.Weight#extractTerms(java.util.Set)
- */
- @Override
- public void extractTerms(Set<Term> terms) {
- for (MtasSpanSequenceQueryWeight w : subWeights) {
- w.spanWeight.extractTerms(terms);
- }
- if (ignoreWeight != null) {
- ignoreWeight.extractTerms(terms);
- }
- }
-
- // @Override
- // public boolean isCacheable(LeafReaderContext arg0) {
- // for(MtasSpanSequenceQueryWeight sqw : subWeights) {
- // if(!sqw.spanWeight.isCacheable(arg0)) {
- // return false;
- // }
- // }
- // if(ignoreWeight!=null) {
- // return ignoreWeight.isCacheable(arg0);
- // }
- // return true;
- // }
- }
- /**
- * The Class MtasSpanSequenceQuerySpans.
- */
- protected static class MtasSpanSequenceQuerySpans {
- /** The spans. */
- public Spans spans;
- /** The optional. */
- public boolean optional;
- /**
- * Instantiates a new mtas span sequence query spans.
- *
- * @param query the query
- * @param spans the spans
- * @param optional the optional
- */
- public MtasSpanSequenceQuerySpans(MtasSpanSequenceQuery query, Spans spans,
- boolean optional) {
- this.spans = spans != null ? spans : new MtasSpanMatchNoneSpans(query);
- this.optional = optional;
- }
- }
- /**
- * The Class MtasSpanSequenceQueryWeight.
- */
- private static class MtasSpanSequenceQueryWeight {
- /** The span weight. */
- public SpanWeight spanWeight;
- /** The optional. */
- public boolean optional;
- /**
- * Instantiates a new mtas span sequence query weight.
- *
- * @param spanWeight the span weight
- * @param optional the optional
- */
- public MtasSpanSequenceQueryWeight(SpanWeight spanWeight,
- boolean optional) {
- this.spanWeight = spanWeight;
- this.optional = optional;
- }
- }
-
- @Override
- public boolean isMatchAllPositionsQuery() {
- return false;
- }
- }