MtasMaximumExpandSpans.java
package mtas.search.spans.util;
import java.io.IOException;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.Spans;
import mtas.codec.util.CodecInfo;
import mtas.codec.util.CodecInfo.IndexDoc;
/**
* The Class MtasMaximumExpandSpans.
*/
public class MtasMaximumExpandSpans extends MtasSpans {
/** The sub spans. */
Spans subSpans;
/** The query. */
MtasMaximumExpandSpanQuery query;
/** The min position. */
int minPosition;
/** The max position. */
int maxPosition;
/** The field. */
String field;
/** The mtas codec info. */
CodecInfo mtasCodecInfo;
/** The start position. */
int startPosition;
/** The end position. */
int endPosition;
/** The called next start position. */
private boolean calledNextStartPosition;
/** The doc id. */
int docId;
/**
* Instantiates a new mtas maximum expand spans.
*
* @param query the query
* @param mtasCodecInfo the mtas codec info
* @param field the field
* @param subSpans the sub spans
*/
public MtasMaximumExpandSpans(MtasMaximumExpandSpanQuery query,
CodecInfo mtasCodecInfo, String field, Spans subSpans) {
super();
this.subSpans = subSpans;
this.field = field;
this.mtasCodecInfo = mtasCodecInfo;
this.query = query;
docId = -1;
reset();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#nextStartPosition()
*/
@Override
public int nextStartPosition() throws IOException {
if (docId == -1 || docId == NO_MORE_DOCS) {
throw new IOException("no document");
} else if (!calledNextStartPosition) {
calledNextStartPosition = true;
return startPosition;
// compute next match
} else {
if (goToNextStartPosition()) {
// match found
return startPosition;
} else {
// no more matches: document finished
return NO_MORE_POSITIONS;
}
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#startPosition()
*/
@Override
public int startPosition() {
return startPosition;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#endPosition()
*/
@Override
public int endPosition() {
return endPosition;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#width()
*/
@Override
public int width() {
return endPosition - startPosition;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.
* spans.SpanCollector)
*/
@Override
public void collect(SpanCollector collector) throws IOException {
subSpans.collect(collector);
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#positionsCost()
*/
@Override
public float positionsCost() {
// return subSpans.positionsCost();
return 0;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#docID()
*/
@Override
public int docID() {
return docId;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
*/
@Override
public int nextDoc() throws IOException {
reset();
while (!goToNextDoc())
;
return docId;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#advance(int)
*/
@Override
public int advance(int target) throws IOException {
reset();
if (docId == NO_MORE_DOCS) {
return docId;
} else if (target <= docId) {
// should not happen
docId = NO_MORE_DOCS;
return docId;
} else {
docId = subSpans.advance(target);
if (docId == NO_MORE_DOCS) {
return docId;
} else {
IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
if (doc != null) {
minPosition = doc.minPosition;
maxPosition = doc.maxPosition;
} else {
minPosition = NO_MORE_POSITIONS;
maxPosition = NO_MORE_POSITIONS;
}
if (goToNextStartPosition()) {
return docId;
} else {
return nextDoc();
}
}
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#asTwoPhaseIterator()
*/
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
if (!query.twoPhaseIteratorAllowed()) {
return null;
} else {
TwoPhaseIterator originalTwoPhaseIterator = subSpans.asTwoPhaseIterator();
if (originalTwoPhaseIterator != null) {
return new TwoPhaseIterator(originalTwoPhaseIterator.approximation()) {
@Override
public boolean matches() throws IOException {
return originalTwoPhaseIterator.matches()
&& twoPhaseCurrentDocMatches();
}
@Override
public float matchCost() {
return originalTwoPhaseIterator.matchCost();
}
};
} else {
return new TwoPhaseIterator(subSpans) {
@Override
public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches();
}
@Override
public float matchCost() {
return subSpans.positionsCost();
}
};
}
}
}
/**
* Two phase current doc matches.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean twoPhaseCurrentDocMatches() throws IOException {
if (docId != subSpans.docID()) {
reset();
docId = subSpans.docID();
IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
if (doc != null) {
minPosition = doc.minPosition;
maxPosition = doc.maxPosition;
} else {
minPosition = NO_MORE_POSITIONS;
maxPosition = NO_MORE_POSITIONS;
}
}
if (docId == NO_MORE_DOCS) {
return false;
} else {
return goToNextStartPosition();
}
}
/**
* Go to next doc.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean goToNextDoc() throws IOException {
reset();
if (docId == NO_MORE_DOCS) {
minPosition = NO_MORE_POSITIONS;
maxPosition = NO_MORE_POSITIONS;
return true;
} else {
docId = subSpans.nextDoc();
if (docId == NO_MORE_DOCS) {
minPosition = NO_MORE_POSITIONS;
maxPosition = NO_MORE_POSITIONS;
return true;
} else {
IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
if (doc != null) {
minPosition = doc.minPosition;
maxPosition = doc.maxPosition;
} else {
minPosition = NO_MORE_POSITIONS;
maxPosition = NO_MORE_POSITIONS;
}
if (goToNextStartPosition()) {
return true;
} else {
return false;
}
}
}
}
/**
* Go to next start position.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean goToNextStartPosition() throws IOException {
int basicStartPosition;
int basicEndPosition;
if (docId == -1 || docId == NO_MORE_DOCS) {
throw new IOException("no document");
} else {
while ((basicStartPosition = subSpans
.nextStartPosition()) != NO_MORE_POSITIONS) {
basicEndPosition = subSpans.endPosition();
startPosition = Math.max(minPosition,
(basicStartPosition - query.maximumLeft));
endPosition = Math.min(maxPosition + 1,
(basicEndPosition + query.maximumRight));
if (startPosition <= (basicStartPosition - query.minimumLeft)
&& endPosition >= (basicEndPosition + query.minimumRight)) {
return true;
}
}
return false;
}
}
/**
* Reset.
*/
private void reset() {
calledNextStartPosition = false;
minPosition = 0;
maxPosition = 0;
startPosition = -1;
endPosition = -1;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#cost()
*/
@Override
public long cost() {
return subSpans != null ? subSpans.cost() : 0;
}
}