MtasSpanSequenceQuery.java

package mtas.search.spans;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;

import mtas.search.spans.util.MtasExpandSpanQuery;
import mtas.search.spans.util.MtasIgnoreItem;
import mtas.search.spans.util.MtasSpanQuery;
import mtas.search.spans.util.MtasSpanWeight;
import mtas.search.spans.util.MtasSpans;

/**
 * The Class MtasSpanSequenceQuery.
 */
public class MtasSpanSequenceQuery extends MtasSpanQuery {

  /** The items. */
  private List<MtasSpanSequenceItem> items;

  /** The left minimum. */
  private int leftMinimum;

  /** The left maximum. */
  private int leftMaximum;

  /** The right minimum. */
  private int rightMinimum;

  /** The right maximum. */
  private int rightMaximum;

  /** The ignore query. */
  private MtasSpanQuery ignoreQuery;

  /** The maximum ignore length. */
  private Integer maximumIgnoreLength;

  /** The field. */
  private String field;

  /**
   * Instantiates a new mtas span sequence query.
   *
   * @param items the items
   * @param ignoreQuery the ignore query
   * @param maximumIgnoreLength the maximum ignore length
   */
  public MtasSpanSequenceQuery(List<MtasSpanSequenceItem> items,
      MtasSpanQuery ignoreQuery, Integer maximumIgnoreLength) {
    this(items, 0, 0, 0, 0, ignoreQuery, maximumIgnoreLength);
  }

  /**
   * Instantiates a new mtas span sequence query.
   *
   * @param items the items
   * @param leftMinimum the left minimum
   * @param leftMaximum the left maximum
   * @param rightMinimum the right minimum
   * @param rightMaximum the right maximum
   * @param ignoreQuery the ignore query
   * @param maximumIgnoreLength the maximum ignore length
   */
  public MtasSpanSequenceQuery(List<MtasSpanSequenceItem> items,
      int leftMinimum, int leftMaximum, int rightMinimum, int rightMaximum,
      MtasSpanQuery ignoreQuery, Integer maximumIgnoreLength) {
    super(null, null);
    this.items = items;
    this.leftMinimum = leftMinimum;
    this.leftMaximum = leftMaximum;
    this.rightMinimum = rightMinimum;
    this.rightMaximum = rightMaximum;
    // get field and do checks
    Integer minimum = leftMinimum + rightMinimum;
    Integer maximum = leftMaximum + rightMaximum;
    for (MtasSpanSequenceItem item : items) {
      if (field == null) {
        field = item.getQuery().getField();
      } else if (item.getQuery().getField() != null
          && !item.getQuery().getField().equals(field)) {
        throw new IllegalArgumentException("Clauses must have same field.");
      }
      if (minimum != null && !item.isOptional()) {
        minimum = item.getQuery().getMinimumWidth() != null
            ? minimum + item.getQuery().getMinimumWidth() : null;
      }
      if (maximum != null) {
        maximum = item.getQuery().getMaximumWidth() != null
            ? maximum + item.getQuery().getMaximumWidth() : null;
      }
    }
    // check ignore
    if (field != null && ignoreQuery != null) {
      if (ignoreQuery.getField() == null
          || field.equals(ignoreQuery.getField())) {
        this.ignoreQuery = ignoreQuery;
        if (maximumIgnoreLength == null) {
          this.maximumIgnoreLength = MtasIgnoreItem.DEFAULT_MAXIMUM_IGNORE_LENGTH;
        } else {
          this.maximumIgnoreLength = maximumIgnoreLength;
        }
      } else {
        throw new IllegalArgumentException(
            "ignore must have same field as clauses");
      }
      if (maximum != null && items.size() > 1) {
        if (this.ignoreQuery.getMaximumWidth() != null) {
          maximum += (items.size() - 1) * this.maximumIgnoreLength
              * this.ignoreQuery.getMaximumWidth();
        } else {
          maximum = null;
        }
      }
    } else {
      this.ignoreQuery = null;
      this.maximumIgnoreLength = null;
    }
    setWidth(minimum, maximum);
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.SpanQuery#getField()
   */
  @Override
  public String getField() {
    return field;
  }

  /**
   * Gets the items.
   *
   * @return the items
   */
  public List<MtasSpanSequenceItem> getItems() {
    return items;
  }

  /**
   * Gets the ignore query.
   *
   * @return the ignore query
   */
  public MtasSpanQuery getIgnoreQuery() {
    return ignoreQuery;
  }

  /**
   * Gets the maximum ignore length.
   *
   * @return the maximum ignore length
   */
  public Integer getMaximumIgnoreLength() {
    return maximumIgnoreLength;
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader)
   */
  @Override
  public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
    if (items.size() == 1) {
      MtasSpanQuery singleQuery = items.get(0).getQuery();
      if (leftMaximum != 0 || rightMaximum != 0) {
        singleQuery = new MtasExpandSpanQuery(singleQuery, leftMinimum,
            leftMaximum, rightMinimum, rightMaximum);
      }
      return singleQuery.rewrite(reader);
    } else {
      MtasSpanSequenceItem newItem;
      MtasSpanSequenceItem previousNewItem = null;
      ArrayList<MtasSpanSequenceItem> newItems = new ArrayList<>(items.size());
      int newLeftMinimum = leftMinimum;
      int newLeftMaximum = leftMaximum;
      int newRightMinimum = rightMinimum;
      int newRightMaximum = rightMaximum;
      MtasSpanQuery newIgnoreClause = ignoreQuery != null
          ? ignoreQuery.rewrite(reader) : null;
      boolean actuallyRewritten = ignoreQuery != null
          ? !newIgnoreClause.equals(ignoreQuery) : false;
      for (int i = 0; i < items.size(); i++) {
        newItem = items.get(i).rewrite(reader);
        if (newItem.getQuery() instanceof MtasSpanMatchNoneQuery) {
          if (!newItem.isOptional()) {
            return new MtasSpanMatchNoneQuery(field);
          } else {
            actuallyRewritten = true;
          }
        } else {
          actuallyRewritten |= !items.get(i).equals(newItem);
          MtasSpanSequenceItem previousMergedItem = MtasSpanSequenceItem.merge(
              previousNewItem, newItem, ignoreQuery, maximumIgnoreLength);
          if (previousMergedItem != null) {
            newItems.set((newItems.size() - 1), previousMergedItem);
            actuallyRewritten = true;
          } else {
            newItems.add(newItem);
          }
          previousNewItem = newItem;
        }
      }
      // check first and last
      if (ignoreQuery == null) {
        ArrayList<MtasSpanSequenceItem> possibleTrimmedItems = new ArrayList<>(
            newItems.size());
        MtasSpanSequenceItem firstItem = newItems.get(0);
        MtasSpanQuery firstQuery = firstItem.getQuery();
        if (firstQuery instanceof MtasSpanMatchAllQuery) {
          newLeftMaximum++;
          if (!firstItem.isOptional()) {
            newLeftMinimum++;
          }
        } else if (firstQuery instanceof MtasSpanRecurrenceQuery) {
          MtasSpanRecurrenceQuery firstRecurrenceQuery = (MtasSpanRecurrenceQuery) firstQuery;
          if (firstRecurrenceQuery.getQuery() instanceof MtasSpanMatchAllQuery
              && firstRecurrenceQuery.getIgnoreQuery() == null) {
            if (!firstItem.isOptional()) {
              newLeftMinimum += firstRecurrenceQuery.getMinimumRecurrence();
              newLeftMaximum += firstRecurrenceQuery.getMaximumRecurrence();
            } else {
              if (firstRecurrenceQuery.getMinimumRecurrence() == 1
                  || firstRecurrenceQuery
                      .getMinimumRecurrence() <= newLeftMinimum) {
                newLeftMinimum += 0;
                newLeftMaximum += firstRecurrenceQuery.getMaximumRecurrence();
              } else {
                possibleTrimmedItems.add(firstItem);
              }
            }
          } else {
            possibleTrimmedItems.add(firstItem);
          }
        } else {
          possibleTrimmedItems.add(firstItem);
        }
        for (int i = 1; i < (newItems.size() - 1); i++) {
          possibleTrimmedItems.add(newItems.get(i));
        }
        if (newItems.size() > 1) {
          MtasSpanSequenceItem lastItem = newItems.get((newItems.size() - 1));
          MtasSpanQuery lastQuery = lastItem.getQuery();
          if (lastQuery instanceof MtasSpanMatchAllQuery) {
            newRightMaximum++;
            if (!lastItem.isOptional()) {
              newRightMinimum++;
            }
          } else if (lastQuery instanceof MtasSpanRecurrenceQuery) {
            MtasSpanRecurrenceQuery lastRecurrenceQuery = (MtasSpanRecurrenceQuery) lastQuery;
            if (lastRecurrenceQuery.getQuery() instanceof MtasSpanMatchAllQuery
                && lastRecurrenceQuery.getIgnoreQuery() == null) {
              if (!lastItem.isOptional()) {
                newRightMinimum += lastRecurrenceQuery.getMinimumRecurrence();
                newRightMaximum += lastRecurrenceQuery.getMaximumRecurrence();
              } else if (lastRecurrenceQuery.getMinimumRecurrence() == 1
                  || lastRecurrenceQuery
                      .getMinimumRecurrence() <= newRightMinimum) {
                newRightMinimum += 0;
                newRightMaximum += lastRecurrenceQuery.getMaximumRecurrence();
              } else {
                possibleTrimmedItems.add(lastItem);
              }
            } else {
              possibleTrimmedItems.add(lastItem);
            }
          } else {
            possibleTrimmedItems.add(lastItem);
          }
        }
        if (possibleTrimmedItems.size() < newItems.size()) {
          actuallyRewritten = true;
          newItems = possibleTrimmedItems;
        }
      }
      if (!actuallyRewritten) {
        if (leftMaximum != 0 || rightMaximum != 0) {
          newLeftMinimum = leftMinimum;
          newLeftMaximum = leftMaximum;
          newRightMinimum = rightMinimum;
          newRightMaximum = rightMaximum;
          leftMinimum = 0;
          leftMaximum = 0;
          rightMinimum = 0;
          rightMaximum = 0;
          MtasSpanQuery finalQuery = new MtasExpandSpanQuery(this,
              newLeftMinimum, newLeftMaximum, newRightMinimum, newRightMaximum);
          return finalQuery.rewrite(reader);
        } else {
          return super.rewrite(reader);
        }
      } else {
        if (!newItems.isEmpty()) {
          return new MtasSpanSequenceQuery(newItems, newLeftMinimum,
              newLeftMaximum, newRightMinimum, newRightMaximum, newIgnoreClause,
              maximumIgnoreLength).rewrite(reader);
        } else {
          return new MtasSpanMatchNoneQuery(field);
        }
      }
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.Query#toString(java.lang.String)
   */
  @Override
  public String toString(String field) {
    StringBuilder buffer = new StringBuilder();
    buffer.append(this.getClass().getSimpleName() + "([");
    Iterator<MtasSpanSequenceItem> i = items.iterator();
    while (i.hasNext()) {
      MtasSpanSequenceItem item = i.next();
      MtasSpanQuery clause = item.getQuery();
      buffer.append(clause.toString(field));
      if (item.isOptional()) {
        buffer.append("{OPTIONAL}");
      }
      if (i.hasNext()) {
        buffer.append(", ");
      }
    }
    buffer.append("[" + leftMinimum + "," + leftMaximum + "]");
    buffer.append("[" + rightMinimum + "," + rightMaximum + "]");
    buffer.append("]");
    buffer.append(", ");
    buffer.append(ignoreQuery);
    buffer.append(")");
    return buffer.toString();
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.Query#equals(java.lang.Object)
   */
  @Override
  public boolean equals(Object obj) {
    if (this == obj)
      return true;
    if (obj == null)
      return false;
    if (getClass() != obj.getClass())
      return false;
    MtasSpanSequenceQuery other = (MtasSpanSequenceQuery) obj;
    boolean isEqual;
    isEqual = field.equals(other.field);
    isEqual &= items.equals(other.items);
    isEqual &= leftMinimum == other.leftMinimum;
    isEqual &= leftMaximum == other.leftMaximum;
    isEqual &= rightMinimum == other.rightMinimum;
    isEqual &= rightMaximum == other.rightMaximum;
    isEqual &= ((ignoreQuery == null && other.ignoreQuery == null)
        || (ignoreQuery != null && other.ignoreQuery != null
            && ignoreQuery.equals(other.ignoreQuery)));
    return isEqual;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.Query#hashCode()
   */
  @Override
  public int hashCode() {
    int h = this.getClass().getSimpleName().hashCode();
    h = (h * 3) ^ field.hashCode();
    h = (h * 5) ^ items.hashCode();
    h = Integer.rotateLeft(h, leftMinimum) + leftMinimum;
    h ^= 11;
    h = Integer.rotateLeft(h, leftMaximum) + leftMaximum;
    h ^= 13;
    h = Integer.rotateLeft(h, rightMinimum) + rightMinimum;
    h ^= 17;
    h = Integer.rotateLeft(h, rightMaximum) + rightMaximum;
    if (ignoreQuery != null) {
      h = (h * 7) ^ ignoreQuery.hashCode();
      h = (h * 11) ^ maximumIgnoreLength.hashCode();
    }
    return h;
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.search.spans.SpanQuery#createWeight(org.apache.lucene.
   * search.IndexSearcher, boolean)
   */
  @Override
  public MtasSpanWeight createWeight(IndexSearcher searcher,
      boolean needsScores, float boost) throws IOException {
    List<MtasSpanSequenceQueryWeight> subWeights = new ArrayList<>();
    SpanWeight ignoreWeight = null;
    for (MtasSpanSequenceItem item : items) {
      subWeights.add(new MtasSpanSequenceQueryWeight(
          item.getQuery().createWeight(searcher, false, boost), item.isOptional()));
    }
    if (ignoreQuery != null) {
      ignoreWeight = ignoreQuery.createWeight(searcher, false, boost);
    }
    return new SpanSequenceWeight(subWeights, ignoreWeight, maximumIgnoreLength,
        searcher, needsScores ? getTermContexts(subWeights) : null, boost);
  }

  /**
   * Gets the term contexts.
   *
   * @param items the items
   * @return the term contexts
   */
  protected Map<Term, TermContext> getTermContexts(
      List<MtasSpanSequenceQueryWeight> items) {
    List<SpanWeight> weights = new ArrayList<>();
    for (MtasSpanSequenceQueryWeight item : items) {
      weights.add(item.spanWeight);
    }
    return getTermContexts(weights);
  }

  /*
   * (non-Javadoc)
   * 
   * @see mtas.search.spans.util.MtasSpanQuery#disableTwoPhaseIterator()
   */
  @Override
  public void disableTwoPhaseIterator() {
    super.disableTwoPhaseIterator();
    for (MtasSpanSequenceItem item : items) {
      item.getQuery().disableTwoPhaseIterator();
    }
    if (ignoreQuery != null) {
      ignoreQuery.disableTwoPhaseIterator();
    }
  }

  /**
   * The Class SpanSequenceWeight.
   */
  protected class SpanSequenceWeight extends MtasSpanWeight {

    /** The sub weights. */
    final List<MtasSpanSequenceQueryWeight> subWeights;

    /** The ignore weight. */
    final SpanWeight ignoreWeight;

    /** The maximum ignore length. */
    final Integer maximumIgnoreLength;

    /**
     * Instantiates a new span sequence weight.
     *
     * @param subWeights the sub weights
     * @param ignoreWeight the ignore weight
     * @param maximumIgnoreLength the maximum ignore length
     * @param searcher the searcher
     * @param terms the terms
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public SpanSequenceWeight(List<MtasSpanSequenceQueryWeight> subWeights,
        SpanWeight ignoreWeight, Integer maximumIgnoreLength,
        IndexSearcher searcher, Map<Term, TermContext> terms, float boost)
        throws IOException {
      super(MtasSpanSequenceQuery.this, searcher, terms, boost);
      this.subWeights = subWeights;
      this.ignoreWeight = ignoreWeight;
      this.maximumIgnoreLength = maximumIgnoreLength;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.lucene.search.spans.SpanWeight#extractTermContexts(java.util.
     * Map)
     */
    @Override
    public void extractTermContexts(Map<Term, TermContext> contexts) {
      for (MtasSpanSequenceQueryWeight w : subWeights) {
        w.spanWeight.extractTermContexts(contexts);
      }
      if (ignoreWeight != null) {
        ignoreWeight.extractTermContexts(contexts);
      }
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.lucene.search.spans.SpanWeight#getSpans(org.apache.lucene.
     * index.LeafReaderContext,
     * org.apache.lucene.search.spans.SpanWeight.Postings)
     */
    @Override
    public MtasSpans getSpans(LeafReaderContext context,
        Postings requiredPostings) throws IOException {
      if (field == null) {
        return null;
      } else {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
          return null; // field does not exist
        }
        List<MtasSpanSequenceQuerySpans> setSequenceSpans = new ArrayList<>(
            items.size());
        Spans ignoreSpans = null;
        boolean allSpansEmpty = true;
        for (MtasSpanSequenceQueryWeight w : subWeights) {
          Spans sequenceSpans = w.spanWeight.getSpans(context,
              requiredPostings);
          if (sequenceSpans != null) {
            setSequenceSpans.add(new MtasSpanSequenceQuerySpans(
                MtasSpanSequenceQuery.this, sequenceSpans, w.optional));
            allSpansEmpty = false;
          } else {
            if (w.optional) {
              setSequenceSpans.add(new MtasSpanSequenceQuerySpans(
                  MtasSpanSequenceQuery.this, null, w.optional));
            } else {
              return null;
            }
          }
        }
        if (allSpansEmpty) {
          return null; // at least one required
        } else if (ignoreWeight != null) {
          ignoreSpans = ignoreWeight.getSpans(context, requiredPostings);
        }
        return new MtasSpanSequenceSpans(MtasSpanSequenceQuery.this,
            setSequenceSpans, ignoreSpans, maximumIgnoreLength);
      }
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.lucene.search.Weight#extractTerms(java.util.Set)
     */
    @Override
    public void extractTerms(Set<Term> terms) {
      for (MtasSpanSequenceQueryWeight w : subWeights) {
        w.spanWeight.extractTerms(terms);
      }
      if (ignoreWeight != null) {
        ignoreWeight.extractTerms(terms);
      }
    }
    
//    @Override
//    public boolean isCacheable(LeafReaderContext arg0) {      
//      for(MtasSpanSequenceQueryWeight sqw : subWeights) {
//        if(!sqw.spanWeight.isCacheable(arg0)) {
//          return false;
//        }
//      }
//      if(ignoreWeight!=null) {
//        return ignoreWeight.isCacheable(arg0);
//      }
//      return true;            
//    }

  }

  /**
   * The Class MtasSpanSequenceQuerySpans.
   */
  protected static class MtasSpanSequenceQuerySpans {

    /** The spans. */
    public Spans spans;

    /** The optional. */
    public boolean optional;

    /**
     * Instantiates a new mtas span sequence query spans.
     *
     * @param query the query
     * @param spans the spans
     * @param optional the optional
     */
    public MtasSpanSequenceQuerySpans(MtasSpanSequenceQuery query, Spans spans,
        boolean optional) {
      this.spans = spans != null ? spans : new MtasSpanMatchNoneSpans(query);
      this.optional = optional;
    }
  }

  /**
   * The Class MtasSpanSequenceQueryWeight.
   */
  private static class MtasSpanSequenceQueryWeight {

    /** The span weight. */
    public SpanWeight spanWeight;

    /** The optional. */
    public boolean optional;

    /**
     * Instantiates a new mtas span sequence query weight.
     *
     * @param spanWeight the span weight
     * @param optional the optional
     */
    public MtasSpanSequenceQueryWeight(SpanWeight spanWeight,
        boolean optional) {
      this.spanWeight = spanWeight;
      this.optional = optional;
    }
  }
  
  @Override
  public boolean isMatchAllPositionsQuery() {
    return false;
  }

}