MtasMaximumExpandSpans.java

package mtas.search.spans.util;

import java.io.IOException;

import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.Spans;

import mtas.codec.util.CodecInfo;
import mtas.codec.util.CodecInfo.IndexDoc;

/**
 * The Class MtasMaximumExpandSpans.
 */
public class MtasMaximumExpandSpans extends MtasSpans {

  /** The sub spans. */
  Spans subSpans;

  /** The query. */
  MtasMaximumExpandSpanQuery query;

  /** The min position. */
  int minPosition;

  /** The max position. */
  int maxPosition;

  /** The field. */
  String field;

  /** The mtas codec info. */
  CodecInfo mtasCodecInfo;

  /** The start position. */
  int startPosition;

  /** The end position. */
  int endPosition;

  /** The called next start position. */
  private boolean calledNextStartPosition;

  /** The doc id. */
  int docId;

  /**
   * Instantiates a new mtas maximum expand spans.
   *
   * @param query the query
   * @param mtasCodecInfo the mtas codec info
   * @param field the field
   * @param subSpans the sub spans
   */
  public MtasMaximumExpandSpans(MtasMaximumExpandSpanQuery query,
      CodecInfo mtasCodecInfo, String field, Spans subSpans) {
    super();
    this.subSpans = subSpans;
    this.field = field;
    this.mtasCodecInfo = mtasCodecInfo;
    this.query = query;
    docId = -1;
    reset();
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#nextStartPosition()
   */
  @Override
  public int nextStartPosition() throws IOException {
    if (docId == -1 || docId == NO_MORE_DOCS) {
      throw new IOException("no document");
    } else if (!calledNextStartPosition) {
      calledNextStartPosition = true;
      return startPosition;
      // compute next match
    } else {
      if (goToNextStartPosition()) {
        // match found
        return startPosition;
      } else {
        // no more matches: document finished
        return NO_MORE_POSITIONS;
      }
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#startPosition()
   */
  @Override
  public int startPosition() {
    return startPosition;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#endPosition()
   */
  @Override
  public int endPosition() {
    return endPosition;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#width()
   */
  @Override
  public int width() {
    return endPosition - startPosition;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.
   * spans.SpanCollector)
   */
  @Override
  public void collect(SpanCollector collector) throws IOException {
    subSpans.collect(collector);
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#positionsCost()
   */
  @Override
  public float positionsCost() {
    // return subSpans.positionsCost();
    return 0;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#docID()
   */
  @Override
  public int docID() {
    return docId;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
   */
  @Override
  public int nextDoc() throws IOException {
    reset();
    while (!goToNextDoc())
      ;
    return docId;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#advance(int)
   */
  @Override
  public int advance(int target) throws IOException {
    reset();
    if (docId == NO_MORE_DOCS) {
      return docId;
    } else if (target <= docId) {
      // should not happen
      docId = NO_MORE_DOCS;
      return docId;
    } else {
      docId = subSpans.advance(target);
      if (docId == NO_MORE_DOCS) {
        return docId;
      } else {
        IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
        if (doc != null) {
          minPosition = doc.minPosition;
          maxPosition = doc.maxPosition;
        } else {
          minPosition = NO_MORE_POSITIONS;
          maxPosition = NO_MORE_POSITIONS;
        }
        if (goToNextStartPosition()) {
          return docId;
        } else {
          return nextDoc();
        }
      }
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#asTwoPhaseIterator()
   */
  @Override
  public TwoPhaseIterator asTwoPhaseIterator() {
    if (!query.twoPhaseIteratorAllowed()) {
      return null;
    } else {
      TwoPhaseIterator originalTwoPhaseIterator = subSpans.asTwoPhaseIterator();
      if (originalTwoPhaseIterator != null) {
        return new TwoPhaseIterator(originalTwoPhaseIterator.approximation()) {
          @Override
          public boolean matches() throws IOException {
            return originalTwoPhaseIterator.matches()
                && twoPhaseCurrentDocMatches();
          }

          @Override
          public float matchCost() {
            return originalTwoPhaseIterator.matchCost();
          }
        };
      } else {
        return new TwoPhaseIterator(subSpans) {

          @Override
          public boolean matches() throws IOException {
            return twoPhaseCurrentDocMatches();
          }

          @Override
          public float matchCost() {
            return subSpans.positionsCost();
          }
        };
      }
    }
  }

  /**
   * Two phase current doc matches.
   *
   * @return true, if successful
   * @throws IOException Signals that an I/O exception has occurred.
   */
  private boolean twoPhaseCurrentDocMatches() throws IOException {
    if (docId != subSpans.docID()) {
      reset();
      docId = subSpans.docID();
      IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
      if (doc != null) {
        minPosition = doc.minPosition;
        maxPosition = doc.maxPosition;
      } else {
        minPosition = NO_MORE_POSITIONS;
        maxPosition = NO_MORE_POSITIONS;
      }
    }
    if (docId == NO_MORE_DOCS) {
      return false;
    } else {
      return goToNextStartPosition();
    }
  }

  /**
   * Go to next doc.
   *
   * @return true, if successful
   * @throws IOException Signals that an I/O exception has occurred.
   */
  private boolean goToNextDoc() throws IOException {
    reset();
    if (docId == NO_MORE_DOCS) {
      minPosition = NO_MORE_POSITIONS;
      maxPosition = NO_MORE_POSITIONS;
      return true;
    } else {
      docId = subSpans.nextDoc();
      if (docId == NO_MORE_DOCS) {
        minPosition = NO_MORE_POSITIONS;
        maxPosition = NO_MORE_POSITIONS;
        return true;
      } else {
        IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
        if (doc != null) {
          minPosition = doc.minPosition;
          maxPosition = doc.maxPosition;
        } else {
          minPosition = NO_MORE_POSITIONS;
          maxPosition = NO_MORE_POSITIONS;
        }
        if (goToNextStartPosition()) {
          return true;
        } else {
          return false;
        }
      }
    }
  }

  /**
   * Go to next start position.
   *
   * @return true, if successful
   * @throws IOException Signals that an I/O exception has occurred.
   */
  private boolean goToNextStartPosition() throws IOException {
    int basicStartPosition;
    int basicEndPosition;
    if (docId == -1 || docId == NO_MORE_DOCS) {
      throw new IOException("no document");
    } else {
      while ((basicStartPosition = subSpans
          .nextStartPosition()) != NO_MORE_POSITIONS) {
        basicEndPosition = subSpans.endPosition();
        startPosition = Math.max(minPosition,
            (basicStartPosition - query.maximumLeft));
        endPosition = Math.min(maxPosition + 1,
            (basicEndPosition + query.maximumRight));
        if (startPosition <= (basicStartPosition - query.minimumLeft)
            && endPosition >= (basicEndPosition + query.minimumRight)) {
          return true;
        }
      }
      return false;
    }
  }

  /**
   * Reset.
   */
  private void reset() {
    calledNextStartPosition = false;
    minPosition = 0;
    maxPosition = 0;
    startPosition = -1;
    endPosition = -1;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#cost()
   */
  @Override
  public long cost() {
    return subSpans != null ? subSpans.cost() : 0;
  }
}