MtasMaximumExpandSpans.java
- package mtas.search.spans.util;
- import java.io.IOException;
- import org.apache.lucene.search.TwoPhaseIterator;
- import org.apache.lucene.search.spans.SpanCollector;
- import org.apache.lucene.search.spans.Spans;
- import mtas.codec.util.CodecInfo;
- import mtas.codec.util.CodecInfo.IndexDoc;
- /**
- * The Class MtasMaximumExpandSpans.
- */
- public class MtasMaximumExpandSpans extends MtasSpans {
- /** The sub spans. */
- Spans subSpans;
- /** The query. */
- MtasMaximumExpandSpanQuery query;
- /** The min position. */
- int minPosition;
- /** The max position. */
- int maxPosition;
- /** The field. */
- String field;
- /** The mtas codec info. */
- CodecInfo mtasCodecInfo;
- /** The start position. */
- int startPosition;
- /** The end position. */
- int endPosition;
- /** The called next start position. */
- private boolean calledNextStartPosition;
- /** The doc id. */
- int docId;
- /**
- * Instantiates a new mtas maximum expand spans.
- *
- * @param query the query
- * @param mtasCodecInfo the mtas codec info
- * @param field the field
- * @param subSpans the sub spans
- */
- public MtasMaximumExpandSpans(MtasMaximumExpandSpanQuery query,
- CodecInfo mtasCodecInfo, String field, Spans subSpans) {
- super();
- this.subSpans = subSpans;
- this.field = field;
- this.mtasCodecInfo = mtasCodecInfo;
- this.query = query;
- docId = -1;
- reset();
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.Spans#nextStartPosition()
- */
- @Override
- public int nextStartPosition() throws IOException {
- if (docId == -1 || docId == NO_MORE_DOCS) {
- throw new IOException("no document");
- } else if (!calledNextStartPosition) {
- calledNextStartPosition = true;
- return startPosition;
- // compute next match
- } else {
- if (goToNextStartPosition()) {
- // match found
- return startPosition;
- } else {
- // no more matches: document finished
- return NO_MORE_POSITIONS;
- }
- }
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.Spans#startPosition()
- */
- @Override
- public int startPosition() {
- return startPosition;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.Spans#endPosition()
- */
- @Override
- public int endPosition() {
- return endPosition;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.Spans#width()
- */
- @Override
- public int width() {
- return endPosition - startPosition;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.
- * spans.SpanCollector)
- */
- @Override
- public void collect(SpanCollector collector) throws IOException {
- subSpans.collect(collector);
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.Spans#positionsCost()
- */
- @Override
- public float positionsCost() {
- // return subSpans.positionsCost();
- return 0;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.DocIdSetIterator#docID()
- */
- @Override
- public int docID() {
- return docId;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
- */
- @Override
- public int nextDoc() throws IOException {
- reset();
- while (!goToNextDoc())
- ;
- return docId;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.DocIdSetIterator#advance(int)
- */
- @Override
- public int advance(int target) throws IOException {
- reset();
- if (docId == NO_MORE_DOCS) {
- return docId;
- } else if (target <= docId) {
- // should not happen
- docId = NO_MORE_DOCS;
- return docId;
- } else {
- docId = subSpans.advance(target);
- if (docId == NO_MORE_DOCS) {
- return docId;
- } else {
- IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
- if (doc != null) {
- minPosition = doc.minPosition;
- maxPosition = doc.maxPosition;
- } else {
- minPosition = NO_MORE_POSITIONS;
- maxPosition = NO_MORE_POSITIONS;
- }
- if (goToNextStartPosition()) {
- return docId;
- } else {
- return nextDoc();
- }
- }
- }
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.spans.Spans#asTwoPhaseIterator()
- */
- @Override
- public TwoPhaseIterator asTwoPhaseIterator() {
- if (!query.twoPhaseIteratorAllowed()) {
- return null;
- } else {
- TwoPhaseIterator originalTwoPhaseIterator = subSpans.asTwoPhaseIterator();
- if (originalTwoPhaseIterator != null) {
- return new TwoPhaseIterator(originalTwoPhaseIterator.approximation()) {
- @Override
- public boolean matches() throws IOException {
- return originalTwoPhaseIterator.matches()
- && twoPhaseCurrentDocMatches();
- }
- @Override
- public float matchCost() {
- return originalTwoPhaseIterator.matchCost();
- }
- };
- } else {
- return new TwoPhaseIterator(subSpans) {
- @Override
- public boolean matches() throws IOException {
- return twoPhaseCurrentDocMatches();
- }
- @Override
- public float matchCost() {
- return subSpans.positionsCost();
- }
- };
- }
- }
- }
- /**
- * Two phase current doc matches.
- *
- * @return true, if successful
- * @throws IOException Signals that an I/O exception has occurred.
- */
- private boolean twoPhaseCurrentDocMatches() throws IOException {
- if (docId != subSpans.docID()) {
- reset();
- docId = subSpans.docID();
- IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
- if (doc != null) {
- minPosition = doc.minPosition;
- maxPosition = doc.maxPosition;
- } else {
- minPosition = NO_MORE_POSITIONS;
- maxPosition = NO_MORE_POSITIONS;
- }
- }
- if (docId == NO_MORE_DOCS) {
- return false;
- } else {
- return goToNextStartPosition();
- }
- }
- /**
- * Go to next doc.
- *
- * @return true, if successful
- * @throws IOException Signals that an I/O exception has occurred.
- */
- private boolean goToNextDoc() throws IOException {
- reset();
- if (docId == NO_MORE_DOCS) {
- minPosition = NO_MORE_POSITIONS;
- maxPosition = NO_MORE_POSITIONS;
- return true;
- } else {
- docId = subSpans.nextDoc();
- if (docId == NO_MORE_DOCS) {
- minPosition = NO_MORE_POSITIONS;
- maxPosition = NO_MORE_POSITIONS;
- return true;
- } else {
- IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
- if (doc != null) {
- minPosition = doc.minPosition;
- maxPosition = doc.maxPosition;
- } else {
- minPosition = NO_MORE_POSITIONS;
- maxPosition = NO_MORE_POSITIONS;
- }
- if (goToNextStartPosition()) {
- return true;
- } else {
- return false;
- }
- }
- }
- }
- /**
- * Go to next start position.
- *
- * @return true, if successful
- * @throws IOException Signals that an I/O exception has occurred.
- */
- private boolean goToNextStartPosition() throws IOException {
- int basicStartPosition;
- int basicEndPosition;
- if (docId == -1 || docId == NO_MORE_DOCS) {
- throw new IOException("no document");
- } else {
- while ((basicStartPosition = subSpans
- .nextStartPosition()) != NO_MORE_POSITIONS) {
- basicEndPosition = subSpans.endPosition();
- startPosition = Math.max(minPosition,
- (basicStartPosition - query.maximumLeft));
- endPosition = Math.min(maxPosition + 1,
- (basicEndPosition + query.maximumRight));
- if (startPosition <= (basicStartPosition - query.minimumLeft)
- && endPosition >= (basicEndPosition + query.minimumRight)) {
- return true;
- }
- }
- return false;
- }
- }
- /**
- * Reset.
- */
- private void reset() {
- calledNextStartPosition = false;
- minPosition = 0;
- maxPosition = 0;
- startPosition = -1;
- endPosition = -1;
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.DocIdSetIterator#cost()
- */
- @Override
- public long cost() {
- return subSpans != null ? subSpans.cost() : 0;
- }
- }