MtasMaximumExpandSpans.java

  1. package mtas.search.spans.util;

  2. import java.io.IOException;

  3. import org.apache.lucene.search.TwoPhaseIterator;
  4. import org.apache.lucene.search.spans.SpanCollector;
  5. import org.apache.lucene.search.spans.Spans;

  6. import mtas.codec.util.CodecInfo;
  7. import mtas.codec.util.CodecInfo.IndexDoc;

  8. /**
  9.  * The Class MtasMaximumExpandSpans.
  10.  */
  11. public class MtasMaximumExpandSpans extends MtasSpans {

  12.   /** The sub spans. */
  13.   Spans subSpans;

  14.   /** The query. */
  15.   MtasMaximumExpandSpanQuery query;

  16.   /** The min position. */
  17.   int minPosition;

  18.   /** The max position. */
  19.   int maxPosition;

  20.   /** The field. */
  21.   String field;

  22.   /** The mtas codec info. */
  23.   CodecInfo mtasCodecInfo;

  24.   /** The start position. */
  25.   int startPosition;

  26.   /** The end position. */
  27.   int endPosition;

  28.   /** The called next start position. */
  29.   private boolean calledNextStartPosition;

  30.   /** The doc id. */
  31.   int docId;

  32.   /**
  33.    * Instantiates a new mtas maximum expand spans.
  34.    *
  35.    * @param query the query
  36.    * @param mtasCodecInfo the mtas codec info
  37.    * @param field the field
  38.    * @param subSpans the sub spans
  39.    */
  40.   public MtasMaximumExpandSpans(MtasMaximumExpandSpanQuery query,
  41.       CodecInfo mtasCodecInfo, String field, Spans subSpans) {
  42.     super();
  43.     this.subSpans = subSpans;
  44.     this.field = field;
  45.     this.mtasCodecInfo = mtasCodecInfo;
  46.     this.query = query;
  47.     docId = -1;
  48.     reset();
  49.   }

  50.   /*
  51.    * (non-Javadoc)
  52.    *
  53.    * @see org.apache.lucene.search.spans.Spans#nextStartPosition()
  54.    */
  55.   @Override
  56.   public int nextStartPosition() throws IOException {
  57.     if (docId == -1 || docId == NO_MORE_DOCS) {
  58.       throw new IOException("no document");
  59.     } else if (!calledNextStartPosition) {
  60.       calledNextStartPosition = true;
  61.       return startPosition;
  62.       // compute next match
  63.     } else {
  64.       if (goToNextStartPosition()) {
  65.         // match found
  66.         return startPosition;
  67.       } else {
  68.         // no more matches: document finished
  69.         return NO_MORE_POSITIONS;
  70.       }
  71.     }
  72.   }

  73.   /*
  74.    * (non-Javadoc)
  75.    *
  76.    * @see org.apache.lucene.search.spans.Spans#startPosition()
  77.    */
  78.   @Override
  79.   public int startPosition() {
  80.     return startPosition;
  81.   }

  82.   /*
  83.    * (non-Javadoc)
  84.    *
  85.    * @see org.apache.lucene.search.spans.Spans#endPosition()
  86.    */
  87.   @Override
  88.   public int endPosition() {
  89.     return endPosition;
  90.   }

  91.   /*
  92.    * (non-Javadoc)
  93.    *
  94.    * @see org.apache.lucene.search.spans.Spans#width()
  95.    */
  96.   @Override
  97.   public int width() {
  98.     return endPosition - startPosition;
  99.   }

  100.   /*
  101.    * (non-Javadoc)
  102.    *
  103.    * @see org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.
  104.    * spans.SpanCollector)
  105.    */
  106.   @Override
  107.   public void collect(SpanCollector collector) throws IOException {
  108.     subSpans.collect(collector);
  109.   }

  110.   /*
  111.    * (non-Javadoc)
  112.    *
  113.    * @see org.apache.lucene.search.spans.Spans#positionsCost()
  114.    */
  115.   @Override
  116.   public float positionsCost() {
  117.     // return subSpans.positionsCost();
  118.     return 0;
  119.   }

  120.   /*
  121.    * (non-Javadoc)
  122.    *
  123.    * @see org.apache.lucene.search.DocIdSetIterator#docID()
  124.    */
  125.   @Override
  126.   public int docID() {
  127.     return docId;
  128.   }

  129.   /*
  130.    * (non-Javadoc)
  131.    *
  132.    * @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
  133.    */
  134.   @Override
  135.   public int nextDoc() throws IOException {
  136.     reset();
  137.     while (!goToNextDoc())
  138.       ;
  139.     return docId;
  140.   }

  141.   /*
  142.    * (non-Javadoc)
  143.    *
  144.    * @see org.apache.lucene.search.DocIdSetIterator#advance(int)
  145.    */
  146.   @Override
  147.   public int advance(int target) throws IOException {
  148.     reset();
  149.     if (docId == NO_MORE_DOCS) {
  150.       return docId;
  151.     } else if (target <= docId) {
  152.       // should not happen
  153.       docId = NO_MORE_DOCS;
  154.       return docId;
  155.     } else {
  156.       docId = subSpans.advance(target);
  157.       if (docId == NO_MORE_DOCS) {
  158.         return docId;
  159.       } else {
  160.         IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
  161.         if (doc != null) {
  162.           minPosition = doc.minPosition;
  163.           maxPosition = doc.maxPosition;
  164.         } else {
  165.           minPosition = NO_MORE_POSITIONS;
  166.           maxPosition = NO_MORE_POSITIONS;
  167.         }
  168.         if (goToNextStartPosition()) {
  169.           return docId;
  170.         } else {
  171.           return nextDoc();
  172.         }
  173.       }
  174.     }
  175.   }

  176.   /*
  177.    * (non-Javadoc)
  178.    *
  179.    * @see org.apache.lucene.search.spans.Spans#asTwoPhaseIterator()
  180.    */
  181.   @Override
  182.   public TwoPhaseIterator asTwoPhaseIterator() {
  183.     if (!query.twoPhaseIteratorAllowed()) {
  184.       return null;
  185.     } else {
  186.       TwoPhaseIterator originalTwoPhaseIterator = subSpans.asTwoPhaseIterator();
  187.       if (originalTwoPhaseIterator != null) {
  188.         return new TwoPhaseIterator(originalTwoPhaseIterator.approximation()) {
  189.           @Override
  190.           public boolean matches() throws IOException {
  191.             return originalTwoPhaseIterator.matches()
  192.                 && twoPhaseCurrentDocMatches();
  193.           }

  194.           @Override
  195.           public float matchCost() {
  196.             return originalTwoPhaseIterator.matchCost();
  197.           }
  198.         };
  199.       } else {
  200.         return new TwoPhaseIterator(subSpans) {

  201.           @Override
  202.           public boolean matches() throws IOException {
  203.             return twoPhaseCurrentDocMatches();
  204.           }

  205.           @Override
  206.           public float matchCost() {
  207.             return subSpans.positionsCost();
  208.           }
  209.         };
  210.       }
  211.     }
  212.   }

  213.   /**
  214.    * Two phase current doc matches.
  215.    *
  216.    * @return true, if successful
  217.    * @throws IOException Signals that an I/O exception has occurred.
  218.    */
  219.   private boolean twoPhaseCurrentDocMatches() throws IOException {
  220.     if (docId != subSpans.docID()) {
  221.       reset();
  222.       docId = subSpans.docID();
  223.       IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
  224.       if (doc != null) {
  225.         minPosition = doc.minPosition;
  226.         maxPosition = doc.maxPosition;
  227.       } else {
  228.         minPosition = NO_MORE_POSITIONS;
  229.         maxPosition = NO_MORE_POSITIONS;
  230.       }
  231.     }
  232.     if (docId == NO_MORE_DOCS) {
  233.       return false;
  234.     } else {
  235.       return goToNextStartPosition();
  236.     }
  237.   }

  238.   /**
  239.    * Go to next doc.
  240.    *
  241.    * @return true, if successful
  242.    * @throws IOException Signals that an I/O exception has occurred.
  243.    */
  244.   private boolean goToNextDoc() throws IOException {
  245.     reset();
  246.     if (docId == NO_MORE_DOCS) {
  247.       minPosition = NO_MORE_POSITIONS;
  248.       maxPosition = NO_MORE_POSITIONS;
  249.       return true;
  250.     } else {
  251.       docId = subSpans.nextDoc();
  252.       if (docId == NO_MORE_DOCS) {
  253.         minPosition = NO_MORE_POSITIONS;
  254.         maxPosition = NO_MORE_POSITIONS;
  255.         return true;
  256.       } else {
  257.         IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
  258.         if (doc != null) {
  259.           minPosition = doc.minPosition;
  260.           maxPosition = doc.maxPosition;
  261.         } else {
  262.           minPosition = NO_MORE_POSITIONS;
  263.           maxPosition = NO_MORE_POSITIONS;
  264.         }
  265.         if (goToNextStartPosition()) {
  266.           return true;
  267.         } else {
  268.           return false;
  269.         }
  270.       }
  271.     }
  272.   }

  273.   /**
  274.    * Go to next start position.
  275.    *
  276.    * @return true, if successful
  277.    * @throws IOException Signals that an I/O exception has occurred.
  278.    */
  279.   private boolean goToNextStartPosition() throws IOException {
  280.     int basicStartPosition;
  281.     int basicEndPosition;
  282.     if (docId == -1 || docId == NO_MORE_DOCS) {
  283.       throw new IOException("no document");
  284.     } else {
  285.       while ((basicStartPosition = subSpans
  286.           .nextStartPosition()) != NO_MORE_POSITIONS) {
  287.         basicEndPosition = subSpans.endPosition();
  288.         startPosition = Math.max(minPosition,
  289.             (basicStartPosition - query.maximumLeft));
  290.         endPosition = Math.min(maxPosition + 1,
  291.             (basicEndPosition + query.maximumRight));
  292.         if (startPosition <= (basicStartPosition - query.minimumLeft)
  293.             && endPosition >= (basicEndPosition + query.minimumRight)) {
  294.           return true;
  295.         }
  296.       }
  297.       return false;
  298.     }
  299.   }

  300.   /**
  301.    * Reset.
  302.    */
  303.   private void reset() {
  304.     calledNextStartPosition = false;
  305.     minPosition = 0;
  306.     maxPosition = 0;
  307.     startPosition = -1;
  308.     endPosition = -1;
  309.   }

  310.   /*
  311.    * (non-Javadoc)
  312.    *
  313.    * @see org.apache.lucene.search.DocIdSetIterator#cost()
  314.    */
  315.   @Override
  316.   public long cost() {
  317.     return subSpans != null ? subSpans.cost() : 0;
  318.   }
  319. }