MtasSpanFollowedBySpans.java

  1. package mtas.search.spans;

  2. import java.io.IOException;
  3. import java.util.HashSet;

  4. import org.apache.lucene.search.TwoPhaseIterator;
  5. import org.apache.lucene.search.spans.SpanCollector;
  6. import mtas.search.spans.MtasSpanFollowedByQuery.MtasSpanFollowedByQuerySpans;
  7. import mtas.search.spans.util.MtasSpans;

  8. /**
  9.  * The Class MtasSpanFollowedBySpans.
  10.  */
  11. public class MtasSpanFollowedBySpans extends MtasSpans {

  12.   /** The query. */
  13.   private MtasSpanFollowedByQuery query;

  14.   /** The spans 1. */
  15.   private MtasSpanFollowedByQuerySpans spans1;

  16.   /** The spans 2. */
  17.   private MtasSpanFollowedByQuerySpans spans2;

  18.   /** The last spans 2 start position. */
  19.   private int lastSpans2StartPosition;

  20.   /** The previous spans 2 start positions. */
  21.   private HashSet<Integer> previousSpans2StartPositions;

  22.   /** The called next start position. */
  23.   private boolean calledNextStartPosition;

  24.   /** The no more positions. */
  25.   private boolean noMorePositions;

  26.   /** The doc id. */
  27.   private int docId;

  28.   /**
  29.    * Instantiates a new mtas span followed by spans.
  30.    *
  31.    * @param query the query
  32.    * @param spans1 the spans 1
  33.    * @param spans2 the spans 2
  34.    */
  35.   public MtasSpanFollowedBySpans(MtasSpanFollowedByQuery query,
  36.       MtasSpanFollowedByQuerySpans spans1,
  37.       MtasSpanFollowedByQuerySpans spans2) {
  38.     super();
  39.     docId = -1;
  40.     this.query = query;
  41.     this.spans1 = spans1;
  42.     this.spans2 = spans2;
  43.     previousSpans2StartPositions = new HashSet<>();
  44.   }

  45.   /*
  46.    * (non-Javadoc)
  47.    *
  48.    * @see
  49.    * org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.spans
  50.    * .SpanCollector)
  51.    */
  52.   @Override
  53.   public void collect(SpanCollector collector) throws IOException {
  54.     spans1.spans.collect(collector);
  55.     spans2.spans.collect(collector);
  56.   }

  57.   /*
  58.    * (non-Javadoc)
  59.    *
  60.    * @see org.apache.lucene.search.spans.Spans#endPosition()
  61.    */
  62.   @Override
  63.   public int endPosition() {
  64.     return calledNextStartPosition
  65.         ? (noMorePositions ? NO_MORE_POSITIONS : spans1.spans.endPosition())
  66.         : -1;
  67.   }

  68.   /*
  69.    * (non-Javadoc)
  70.    *
  71.    * @see org.apache.lucene.search.spans.Spans#nextStartPosition()
  72.    */
  73.   @Override
  74.   public int nextStartPosition() throws IOException {
  75.     // no document
  76.     if (docId == -1 || docId == NO_MORE_DOCS) {
  77.       throw new IOException("no document");
  78.       // finished
  79.     } else if (noMorePositions) {
  80.       return NO_MORE_POSITIONS;
  81.       // littleSpans already at start match, because of check for matching
  82.       // document
  83.     } else if (!calledNextStartPosition) {
  84.       calledNextStartPosition = true;
  85.       return spans1.spans.startPosition();
  86.       // compute next match
  87.     } else {
  88.       if (goToNextStartPosition()) {
  89.         // match found
  90.         return spans1.spans.startPosition();
  91.       } else {
  92.         // no more matches: document finished
  93.         return NO_MORE_POSITIONS;
  94.       }
  95.     }
  96.   }

  97.   /*
  98.    * (non-Javadoc)
  99.    *
  100.    * @see org.apache.lucene.search.spans.Spans#positionsCost()
  101.    */
  102.   @Override
  103.   public float positionsCost() {
  104.     return 0;
  105.   }

  106.   /*
  107.    * (non-Javadoc)
  108.    *
  109.    * @see org.apache.lucene.search.spans.Spans#startPosition()
  110.    */
  111.   @Override
  112.   public int startPosition() {
  113.     return calledNextStartPosition
  114.         ? (noMorePositions ? NO_MORE_POSITIONS : spans1.spans.startPosition())
  115.         : -1;
  116.   }

  117.   /*
  118.    * (non-Javadoc)
  119.    *
  120.    * @see org.apache.lucene.search.spans.Spans#width()
  121.    */
  122.   @Override
  123.   public int width() {
  124.     return calledNextStartPosition ? (noMorePositions ? 0
  125.         : spans1.spans.endPosition() - spans1.spans.startPosition()) : 0;
  126.   }

  127.   /*
  128.    * (non-Javadoc)
  129.    *
  130.    * @see org.apache.lucene.search.DocIdSetIterator#advance(int)
  131.    */
  132.   @Override
  133.   public int advance(int target) throws IOException {
  134.     reset();
  135.     if (docId == NO_MORE_DOCS) {
  136.       return docId;
  137.     } else if (target < docId) {
  138.       // should not happen
  139.       docId = NO_MORE_DOCS;
  140.       return docId;
  141.     } else {
  142.       // advance 1
  143.       int spans1DocId = spans1.spans.docID();
  144.       int newTarget = target;
  145.       if (spans1DocId < newTarget) {
  146.         spans1DocId = spans1.spans.advance(newTarget);
  147.         if (spans1DocId == NO_MORE_DOCS) {
  148.           docId = NO_MORE_DOCS;
  149.           return docId;
  150.         }
  151.         newTarget = Math.max(newTarget, spans1DocId);
  152.       }
  153.       int spans2DocId = spans2.spans.docID();
  154.       // advance 2
  155.       if (spans2DocId < newTarget) {
  156.         spans2DocId = spans2.spans.advance(newTarget);
  157.         if (spans2DocId == NO_MORE_DOCS) {
  158.           docId = NO_MORE_DOCS;
  159.           return docId;
  160.         }
  161.       }
  162.       // check equal docId, otherwise next
  163.       if (spans1DocId == spans2DocId) {
  164.         docId = spans1DocId;
  165.         // check match
  166.         if (goToNextStartPosition()) {
  167.           return docId;
  168.         } else {
  169.           return nextDoc();
  170.         }
  171.       } else {
  172.         return nextDoc();
  173.       }
  174.     }
  175.   }

  176.   /*
  177.    * (non-Javadoc)
  178.    *
  179.    * @see org.apache.lucene.search.DocIdSetIterator#cost()
  180.    */
  181.   @Override
  182.   public long cost() {
  183.     return 0;
  184.   }

  185.   /*
  186.    * (non-Javadoc)
  187.    *
  188.    * @see org.apache.lucene.search.DocIdSetIterator#docID()
  189.    */
  190.   @Override
  191.   public int docID() {
  192.     return docId;
  193.   }

  194.   /*
  195.    * (non-Javadoc)
  196.    *
  197.    * @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
  198.    */
  199.   @Override
  200.   public int nextDoc() throws IOException {
  201.     reset();
  202.     while (!goToNextDoc())
  203.       ;
  204.     return docId;
  205.   }

  206.   /**
  207.    * Go to next doc.
  208.    *
  209.    * @return true, if successful
  210.    * @throws IOException Signals that an I/O exception has occurred.
  211.    */
  212.   private boolean goToNextDoc() throws IOException {
  213.     if (docId == NO_MORE_DOCS) {
  214.       return true;
  215.     } else {
  216.       int spans1DocId = spans1.spans.nextDoc();
  217.       int spans2DocId = spans2.spans.docID();
  218.       docId = Math.max(spans1DocId, spans2DocId);
  219.       while (spans1DocId != spans2DocId && docId != NO_MORE_DOCS) {
  220.         if (spans1DocId < spans2DocId) {
  221.           spans1DocId = spans1.spans.advance(spans2DocId);
  222.           docId = spans1DocId;
  223.         } else {
  224.           spans2DocId = spans2.spans.advance(spans1DocId);
  225.           docId = spans2DocId;
  226.         }
  227.       }
  228.       if (docId != NO_MORE_DOCS) {
  229.         if (!goToNextStartPosition()) {
  230.           reset();
  231.           return false;
  232.         }
  233.       }
  234.       return true;
  235.     }
  236.   }

  237.   /**
  238.    * Go to next start position.
  239.    *
  240.    * @return true, if successful
  241.    * @throws IOException Signals that an I/O exception has occurred.
  242.    */
  243.   private boolean goToNextStartPosition() throws IOException {
  244.     int nextSpans1StartPosition;
  245.     int nextSpans1EndPosition;
  246.     while ((nextSpans1StartPosition = spans1.spans
  247.         .nextStartPosition()) != NO_MORE_POSITIONS) {
  248.       nextSpans1EndPosition = spans1.spans.endPosition();
  249.       if (nextSpans1EndPosition == lastSpans2StartPosition) {
  250.         return true;
  251.       } else {
  252.         // clean up
  253.         if (lastSpans2StartPosition < nextSpans1StartPosition) {
  254.           previousSpans2StartPositions.clear();
  255.         } else if (previousSpans2StartPositions
  256.             .contains(nextSpans1EndPosition)) {
  257.           return true;
  258.         }
  259.         // try to find match
  260.         while (lastSpans2StartPosition < nextSpans1EndPosition) {
  261.           if (lastSpans2StartPosition != NO_MORE_POSITIONS) {
  262.             lastSpans2StartPosition = spans2.spans.nextStartPosition();
  263.           }
  264.           if (lastSpans2StartPosition == NO_MORE_POSITIONS) {
  265.             if (previousSpans2StartPositions.isEmpty()) {
  266.               noMorePositions = true;
  267.               return false;
  268.             }
  269.           } else {
  270.             if (lastSpans2StartPosition >= nextSpans1StartPosition) {
  271.               previousSpans2StartPositions.add(lastSpans2StartPosition);
  272.             }
  273.             if (nextSpans1EndPosition == lastSpans2StartPosition) {
  274.               return true;
  275.             }
  276.           }
  277.         }
  278.       }
  279.     }
  280.     return false;
  281.   }

  282.   /**
  283.    * Reset.
  284.    */
  285.   private void reset() {
  286.     calledNextStartPosition = false;
  287.     noMorePositions = false;
  288.     lastSpans2StartPosition = -1;
  289.     previousSpans2StartPositions.clear();
  290.   }

  291.   /*
  292.    * (non-Javadoc)
  293.    *
  294.    * @see mtas.search.spans.util.MtasSpans#asTwoPhaseIterator()
  295.    */
  296.   @Override
  297.   public TwoPhaseIterator asTwoPhaseIterator() {
  298.     if (spans1 == null || spans2 == null || !query.twoPhaseIteratorAllowed()) {
  299.       return null;
  300.     } else {
  301.       // TODO
  302.       return null;
  303.     }
  304.   }

  305. }