MtasSpanNotSpans.java
package mtas.search.spans;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.spans.SpanCollector;
import mtas.search.spans.MtasSpanNotQuery.MtasSpanNotQuerySpans;
import mtas.search.spans.util.MtasSpans;
/**
* The Class MtasSpanNotSpans.
*/
public class MtasSpanNotSpans extends MtasSpans {
/** The query. */
private MtasSpanNotQuery query;
/** The spans 1. */
private MtasSpanNotQuerySpans spans1;
/** The spans 2. */
private MtasSpanNotQuerySpans spans2;
/** The called next start position. */
private boolean calledNextStartPosition;
/** The last spans 2 start position. */
private int lastSpans2StartPosition;
/** The last spans 2 end position. */
private int lastSpans2EndPosition;
/** The last spans 2 end positions. */
private Set<Integer> lastSpans2EndPositions;
/** The next spans 2 start position. */
private int nextSpans2StartPosition;
/** The next spans 2 end position. */
private int nextSpans2EndPosition;
/** The doc id. */
private int docId;
/**
* Instantiates a new mtas span not spans.
*
* @param query the query
* @param spans1 the spans 1
* @param spans2 the spans 2
*/
public MtasSpanNotSpans(MtasSpanNotQuery query, MtasSpanNotQuerySpans spans1,
MtasSpanNotQuerySpans spans2) {
super();
docId = -1;
this.query = query;
this.spans1 = spans1;
this.spans2 = spans2;
this.lastSpans2EndPositions = new HashSet<>();
reset();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#nextStartPosition()
*/
@Override
public int nextStartPosition() throws IOException {
// no document
if (docId == -1 || docId == NO_MORE_DOCS) {
throw new IOException("no document");
// littleSpans already at start match, because of check for matching
// document
} else if (!calledNextStartPosition) {
calledNextStartPosition = true;
return spans1.spans.startPosition();
// compute next match
} else {
if (goToNextStartPosition()) {
// match found
return spans1.spans.startPosition();
} else {
// no more matches: document finished
return NO_MORE_POSITIONS;
}
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#startPosition()
*/
@Override
public int startPosition() {
if (calledNextStartPosition) {
return spans1.spans.startPosition();
} else {
return -1;
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#endPosition()
*/
@Override
public int endPosition() {
if (calledNextStartPosition) {
return spans1.spans.endPosition();
} else {
return -1;
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#width()
*/
@Override
public int width() {
if (calledNextStartPosition) {
return spans1.spans.endPosition() - spans1.spans.startPosition();
} else {
return 0;
}
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.spans
* .SpanCollector)
*/
@Override
public void collect(SpanCollector collector) throws IOException {
spans1.spans.collect(collector);
spans2.spans.collect(collector);
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#positionsCost()
*/
@Override
public float positionsCost() {
return 0;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#docID()
*/
@Override
public int docID() {
return docId;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
*/
@Override
public int nextDoc() throws IOException {
reset();
while (!goToNextDoc())
;
return docId;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#advance(int)
*/
@Override
public int advance(int target) throws IOException {
reset();
if (docId == NO_MORE_DOCS) {
return docId;
} else if (target <= docId) {
// should not happen
docId = NO_MORE_DOCS;
return docId;
} else {
docId = spans1.spans.advance(target);
if (docId == NO_MORE_DOCS) {
return docId;
} else {
int spans2DocId = spans2.spans.docID();
if (spans2DocId < docId) {
spans2DocId = spans2.spans.advance(docId);
}
if (docId != spans2DocId) {
return spans1.spans.nextStartPosition() != NO_MORE_POSITIONS ? docId
: NO_MORE_DOCS;
} else if (goToNextStartPosition()) {
return docId;
} else {
return nextDoc();
}
}
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#asTwoPhaseIterator()
*/
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
if (spans1 == null || spans2 == null || !query.twoPhaseIteratorAllowed()) {
return null;
} else {
TwoPhaseIterator twoPhaseIterator1 = spans1.spans.asTwoPhaseIterator();
if (twoPhaseIterator1 != null) {
return new TwoPhaseIterator(twoPhaseIterator1.approximation()) {
@Override
public boolean matches() throws IOException {
return twoPhaseIterator1.matches() && twoPhaseCurrentDocMatches();
}
@Override
public float matchCost() {
return twoPhaseIterator1.matchCost();
}
};
} else {
return new TwoPhaseIterator(spans1.spans) {
@Override
public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches();
}
@Override
public float matchCost() {
return spans1.spans.positionsCost();
}
};
}
}
}
/**
* Two phase current doc matches.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean twoPhaseCurrentDocMatches() throws IOException {
if (docId != spans1.spans.docID()) {
reset();
docId = spans1.spans.docID();
}
if (docId == NO_MORE_DOCS) {
return false;
} else {
int spans2DocId = spans2.spans.docID();
if (spans2DocId < docId) {
spans2DocId = spans2.spans.advance(docId);
}
if (docId != spans2DocId) {
return spans1.spans.nextStartPosition() != NO_MORE_POSITIONS;
} else {
return goToNextStartPosition();
}
}
}
/**
* Go to next doc.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean goToNextDoc() throws IOException {
if (docId == NO_MORE_DOCS) {
return true;
} else {
docId = spans1.spans.nextDoc();
if (docId == NO_MORE_DOCS) {
return true;
} else {
int spans2DocId = spans2.spans.docID();
if (spans2DocId < docId) {
spans2DocId = spans2.spans.advance(docId);
}
if (docId != spans2DocId) {
return spans1.spans.nextStartPosition() != NO_MORE_POSITIONS;
} else if (goToNextStartPosition()) {
return true;
} else {
reset();
return false;
}
}
}
}
/**
* Go to next start position.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean goToNextStartPosition() throws IOException {
int nextSpans1StartPosition;
int nextSpans1EndPosition;
while ((nextSpans1StartPosition = spans1.spans
.nextStartPosition()) != NO_MORE_POSITIONS) {
if (spans1.spans.docID() == spans2.spans.docID()) {
// clean up
if (nextSpans1StartPosition > lastSpans2StartPosition) {
lastSpans2StartPosition = -1;
}
// fast check
if (lastSpans2StartPosition == -1
&& nextSpans1StartPosition < nextSpans2StartPosition) {
return true;
}
nextSpans1EndPosition = spans1.spans.endPosition();
if (nextSpans1StartPosition == lastSpans2StartPosition) {
// try to collect all lastSpans2Endpositions, and return true if not
// contained
if (collectAndCheckLastSpans(nextSpans1StartPosition,
nextSpans1EndPosition)) {
return true;
} else {
// continue
}
} else {
// reset, assume lastSpans2StartPosition<nextSpans1StartPosition
lastSpans2StartPosition = -1;
// go to correct next
while (nextSpans2StartPosition < nextSpans1StartPosition) {
nextSpans2StartPosition = spans2.spans.nextStartPosition();
}
nextSpans2EndPosition = spans2.spans.endPosition();
if (nextSpans1StartPosition == nextSpans2StartPosition) {
// try to collect all lastSpans2Endpositions, and return true if not
// contained
if (collectAndCheckLastSpans(nextSpans1StartPosition,
nextSpans1EndPosition)) {
return true;
} else {
// continue
}
} else {
return true;
}
}
} else {
return true;
}
}
// no more positions
return false;
}
/**
* Collect and check last spans.
*
* @param nextSpans1StartPosition the next spans 1 start position
* @param nextSpans1EndPosition the next spans 1 end position
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean collectAndCheckLastSpans(int nextSpans1StartPosition,
int nextSpans1EndPosition) throws IOException {
// check next
if (nextSpans1StartPosition == nextSpans2StartPosition
&& nextSpans1EndPosition == nextSpans2EndPosition) {
return false;
}
// check last
if (nextSpans1StartPosition == lastSpans2StartPosition
&& (nextSpans1EndPosition == lastSpans2EndPosition
|| lastSpans2EndPositions.contains(nextSpans1EndPosition))) {
return false;
}
// collect
if (nextSpans1StartPosition == nextSpans2StartPosition) {
// reset
if (nextSpans2StartPosition != lastSpans2StartPosition) {
lastSpans2StartPosition = nextSpans2StartPosition;
lastSpans2EndPosition = -1;
lastSpans2EndPositions.clear();
}
while (nextSpans1StartPosition == nextSpans2StartPosition) {
if (lastSpans2EndPosition > -1) {
lastSpans2EndPositions.add(lastSpans2EndPosition);
}
lastSpans2EndPosition = nextSpans2EndPosition;
nextSpans2StartPosition = spans2.spans.nextStartPosition();
nextSpans2EndPosition = spans2.spans.endPosition();
if (nextSpans1StartPosition == nextSpans2StartPosition
&& nextSpans1EndPosition == nextSpans2EndPosition) {
return false;
}
}
}
return true;
}
/**
* Reset.
*/
private void reset() {
calledNextStartPosition = false;
lastSpans2StartPosition = -1;
lastSpans2EndPosition = -1;
lastSpans2EndPositions.clear();
nextSpans2StartPosition = -1;
nextSpans2EndPosition = -1;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#cost()
*/
@Override
public long cost() {
return 0;
}
}