MtasSpanUniquePositionSpans.java

package mtas.search.spans.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.Spans;

/**
 * The Class MtasSpanUniquePositionSpans.
 */
public class MtasSpanUniquePositionSpans extends MtasSpans {

  /** The query. */
  private MtasSpanUniquePositionQuery query;

  /** The spans. */
  private Spans spans;

  /** The queue spans. */
  private List<Match> queueSpans;

  /** The queue matches. */
  private List<Match> queueMatches;

  /** The current match. */
  private Match currentMatch;

  /** The last start position. */
  private int lastStartPosition; // startPosition of last retrieved span

  /** The last span. */
  private boolean lastSpan; // last span for this document added to queue

  /** The no more positions. */
  private boolean noMorePositions;

  /**
   * Instantiates a new mtas span unique position spans.
   *
   * @param query the query
   * @param spans the spans
   */
  public MtasSpanUniquePositionSpans(MtasSpanUniquePositionQuery query,
      Spans spans) {
    super();
    this.query = query;
    this.spans = spans;
    queueSpans = new ArrayList<>();
    queueMatches = new ArrayList<>();
    resetQueue();
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#nextStartPosition()
   */
  @Override
  public int nextStartPosition() throws IOException {
    if (findMatches()) {
      currentMatch = queueMatches.get(0);
      queueMatches.remove(0);
      noMorePositions = false;
      return currentMatch.startPosition();
    } else {
      currentMatch = null;
      noMorePositions = true;
      return NO_MORE_POSITIONS;
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#startPosition()
   */
  @Override
  public int startPosition() {
    if (currentMatch == null) {
      if (noMorePositions) {
        return NO_MORE_POSITIONS;
      } else {
        return -1;
      }
    } else {
      return currentMatch.startPosition();
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#endPosition()
   */
  @Override
  public int endPosition() {
    if (currentMatch == null) {
      if (noMorePositions) {
        return NO_MORE_POSITIONS;
      } else {
        return -1;
      }
    } else {
      return currentMatch.endPosition();
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#width()
   */
  @Override
  public int width() {
    // return (currentMatch.endPosition() - currentMatch.startPosition());
    return 1;
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.spans
   * .SpanCollector)
   */
  @Override
  public void collect(SpanCollector collector) throws IOException {
    spans.collect(collector);
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#docID()
   */
  @Override
  public int docID() {
    return spans.docID();
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
   */
  @Override
  public int nextDoc() throws IOException {
    resetQueue();
    noMorePositions = false;
    return (spans.nextDoc() == NO_MORE_DOCS) ? NO_MORE_DOCS : toMatchDoc();
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#advance(int)
   */
  @Override
  public int advance(int target) throws IOException {
    resetQueue();
    noMorePositions = false;
    return (spans.advance(target) == NO_MORE_DOCS) ? NO_MORE_DOCS
        : toMatchDoc();
  }

  /*
   * (non-Javadoc)
   * 
   * @see mtas.search.spans.util.MtasSpans#asTwoPhaseIterator()
   */
  @Override
  public TwoPhaseIterator asTwoPhaseIterator() {
    if (spans == null || !query.twoPhaseIteratorAllowed()) {
      return null;
    } else {
      return spans.asTwoPhaseIterator();
    }
  }

  /**
   * Reset queue.
   */
  void resetQueue() {
    queueSpans.clear();
    queueMatches.clear();
    lastStartPosition = 0;
    lastSpan = false;
    currentMatch = null;
  }

  /**
   * To match doc.
   *
   * @return the int
   * @throws IOException Signals that an I/O exception has occurred.
   */
  int toMatchDoc() throws IOException {
    while (true) {
      if (findMatches()) {
        return docID();
      }
      if (spans.nextDoc() == NO_MORE_DOCS) {
        return NO_MORE_DOCS;
      }
    }
  }

  /**
   * Collect span.
   *
   * @return true, if successful
   * @throws IOException Signals that an I/O exception has occurred.
   */
  // try to get something in the queue of spans
  private boolean collectSpan() throws IOException {
    if (lastSpan) {
      return false;
    } else if (spans.nextStartPosition() == NO_MORE_POSITIONS) {
      lastSpan = true;
      return false;
    } else {
      queueSpans.add(new Match(spans.startPosition(), spans.endPosition()));
      lastStartPosition = spans.startPosition();
      return true;
    }
  }

  /**
   * Find matches.
   *
   * @return true, if successful
   * @throws IOException Signals that an I/O exception has occurred.
   */
  private boolean findMatches() throws IOException {
    // check for something in queue of matches
    if (!queueMatches.isEmpty()) {
      return true;
    } else {
      while (true) {
        // try to get something in queue of spans
        if (queueSpans.isEmpty() && !collectSpan()) {
          return false;
        }
        // try to get matches with first span in queue
        Match firstMatch = queueSpans.get(0);
        queueSpans.remove(0);
        // create a list of matches with same startposition as firstMatch
        List<Match> matches = new ArrayList<>();
        matches.add(firstMatch);
        // try to collect spans until lastStartPosition not equal to
        // startposition of firstMatch
        while (!lastSpan && (lastStartPosition == firstMatch.startPosition())) {
          collectSpan();
        }
        while (!queueSpans.isEmpty() && (queueSpans.get(0)
            .startPosition() == firstMatch.startPosition())) {
          matches.add(queueSpans.get(0));
          queueSpans.remove(0);
        }
        // construct all matches for this startposition
        for (Match match : matches) {
          // only unique spans
          if (!queueMatches.contains(match)) {
            queueMatches.add(match);
          }
        }
        // check for something in queue of matches
        if (!queueMatches.isEmpty()) {
          return true;
        }
      }
    }
  }

  /**
   * The Class Match.
   */
  private static class Match {

    /** The start position. */
    private int startPosition;

    /** The end position. */
    private int endPosition;

    /**
     * Instantiates a new match.
     *
     * @param startPosition the start position
     * @param endPosition the end position
     */
    Match(int startPosition, int endPosition) {
      this.startPosition = startPosition;
      this.endPosition = endPosition;
    }

    /**
     * Start position.
     *
     * @return the int
     */
    public int startPosition() {
      return startPosition;
    }

    /**
     * End position.
     *
     * @return the int
     */
    public int endPosition() {
      return endPosition;
    }

    /*
     * (non-Javadoc)
     * 
     * @see java.lang.Object#equals(java.lang.Object)
     */
    @Override
    public boolean equals(Object obj) {
      if (this == obj)
        return true;
      if (obj == null)
        return false;
      if (getClass() != obj.getClass())
        return false;
      final Match that = (Match) obj;
      return startPosition == that.startPosition
          && endPosition == that.endPosition;
    }

    /*
     * (non-Javadoc)
     * 
     * @see java.lang.Object#hashCode()
     */
    @Override
    public int hashCode() {
      int h = this.getClass().getSimpleName().hashCode();
      h = (h * 5) ^ startPosition;
      h = (h * 7) ^ endPosition;
      return h;
    }

  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.DocIdSetIterator#cost()
   */
  @Override
  public long cost() {
    return (spans == null) ? 0 : spans.cost();
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.Spans#positionsCost()
   */
  @Override
  public float positionsCost() {
    return (spans == null) ? 0 : spans.positionsCost();
  }

}