MtasIgnoreItem.java
package mtas.search.spans.util;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.search.spans.Spans;
/**
* The Class MtasIgnoreItem.
*/
public class MtasIgnoreItem {
/** The Constant log. */
private static final Log log = LogFactory.getLog(MtasIgnoreItem.class);
/** The Constant DEFAULT_MAXIMUM_IGNORE_LENGTH. */
public static final int DEFAULT_MAXIMUM_IGNORE_LENGTH = 10;
/** The ignore spans. */
Spans ignoreSpans;
/** The current doc id. */
int currentDocId;
/** The current position. */
int currentPosition;
/** The minimum position. */
int minimumPosition;
/** The maximum ignore length. */
int maximumIgnoreLength;
/** The base start position list. */
HashMap<Integer, HashSet<Integer>> baseStartPositionList;
/** The base end position list. */
HashMap<Integer, HashSet<Integer>> baseEndPositionList;
/** The full end position list. */
HashMap<Integer, HashSet<Integer>> fullEndPositionList;
/** The min base start position. */
HashMap<Integer, Integer> minBaseStartPosition;
/** The max base end position. */
HashMap<Integer, Integer> maxBaseEndPosition;
/** The min full start position. */
HashMap<Integer, Integer> minFullStartPosition;
/** The max full end position. */
HashMap<Integer, Integer> maxFullEndPosition;
/**
* Instantiates a new mtas ignore item.
*
* @param ignoreSpans the ignore spans
* @param maximumIgnoreLength the maximum ignore length
*/
public MtasIgnoreItem(Spans ignoreSpans, Integer maximumIgnoreLength) {
this.ignoreSpans = ignoreSpans;
currentDocId = -1;
currentPosition = -1;
minimumPosition = -1;
baseStartPositionList = new HashMap<>();
baseEndPositionList = new HashMap<>();
fullEndPositionList = new HashMap<>();
minBaseStartPosition = new HashMap<>();
maxBaseEndPosition = new HashMap<>();
minFullStartPosition = new HashMap<>();
maxFullEndPosition = new HashMap<>();
if (maximumIgnoreLength == null) {
this.maximumIgnoreLength = DEFAULT_MAXIMUM_IGNORE_LENGTH;
} else {
this.maximumIgnoreLength = maximumIgnoreLength;
}
}
/**
* Advance to doc.
*
* @param docId the doc id
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
public boolean advanceToDoc(int docId) throws IOException {
if (ignoreSpans == null || currentDocId == Spans.NO_MORE_DOCS) {
return false;
} else if (currentDocId == docId) {
return true;
} else {
baseEndPositionList.clear();
fullEndPositionList.clear();
maxBaseEndPosition.clear();
minFullStartPosition.clear();
if (currentDocId < docId) {
currentDocId = ignoreSpans.advance(docId);
currentPosition = -1;
minimumPosition = -1;
}
return currentDocId == docId;
}
}
/**
* Gets the min start position.
*
* @param docId the doc id
* @param position the position
* @return the min start position
* @throws IOException Signals that an I/O exception has occurred.
*/
public int getMinStartPosition(int docId, int position) throws IOException {
if (ignoreSpans != null && docId == currentDocId) {
if (position < minimumPosition) {
throw new IOException(
"Unexpected position, should be >= " + minimumPosition + "!");
} else {
computeFullStartPositionMinimum(position);
if (minFullStartPosition.containsKey(position)) {
return minFullStartPosition.get(position);
} else {
return 0;
}
}
} else {
return 0;
}
}
/**
* Gets the max end position.
*
* @param docId the doc id
* @param position the position
* @return the max end position
* @throws IOException Signals that an I/O exception has occurred.
*/
public int getMaxEndPosition(int docId, int position) throws IOException {
if (ignoreSpans != null && docId == currentDocId) {
if (position < minimumPosition) {
throw new IOException(
"Unexpected position, should be >= " + minimumPosition + "!");
}
computeFullEndPositionList(position);
if (maxFullEndPosition.containsKey(position)) {
return maxFullEndPosition.get(position);
} else {
return 0;
}
} else {
return 0;
}
}
/**
* Gets the full end position list.
*
* @param docId the doc id
* @param position the position
* @return the full end position list
* @throws IOException Signals that an I/O exception has occurred.
*/
public Set<Integer> getFullEndPositionList(int docId, int position)
throws IOException {
if (ignoreSpans != null && docId == currentDocId) {
if (position < minimumPosition) {
throw new IOException(
"Unexpected startPosition, should be >= " + minimumPosition + "!");
} else {
computeFullEndPositionList(position);
return fullEndPositionList.get(position);
}
} else {
return null;
}
}
/**
* Compute full start position minimum.
*
* @param position the position
* @throws IOException Signals that an I/O exception has occurred.
*/
private void computeFullStartPositionMinimum(int position)
throws IOException {
if (ignoreSpans != null && !minFullStartPosition.containsKey(position)) {
HashSet<Integer> list = baseStartPositionList.get(position);
HashSet<Integer> newList = new HashSet<>();
int minimumStartPosition = position;
while (list != null && !list.isEmpty()) {
newList.clear();
for (int startPosition : list) {
if (minFullStartPosition.containsKey(startPosition)) {
minimumStartPosition = Math.min(minimumStartPosition,
minFullStartPosition.get(startPosition));
} else if (baseStartPositionList.containsKey(startPosition)) {
newList.addAll(baseStartPositionList.get(startPosition));
} else {
if (startPosition < minimumStartPosition) {
minimumStartPosition = startPosition;
}
}
}
list.clear();
list.addAll(newList);
}
minFullStartPosition.put(position, minimumStartPosition);
}
}
/**
* Compute full end position list.
*
* @param position the position
* @throws IOException Signals that an I/O exception has occurred.
*/
private void computeFullEndPositionList(int position) throws IOException {
if (ignoreSpans != null && !fullEndPositionList.containsKey(position)) {
// initial fill
moveTo(position);
HashSet<Integer> list = baseEndPositionList.get(position);
if (list != null && !list.isEmpty()) {
int maxEndPosition = maxBaseEndPosition.get(position);
HashSet<Integer> checkList = new HashSet<>();
HashSet<Integer> subCheckList = new HashSet<>();
checkList.addAll(list);
int depth = 1;
while (!checkList.isEmpty()) {
if (depth > maximumIgnoreLength) {
checkList.clear();
subCheckList.clear();
throw new IOException("too many successive ignores, maximum is "
+ maximumIgnoreLength);
} else {
for (Integer checkItem : checkList) {
if (fullEndPositionList.get(checkItem) != null) {
list.addAll(fullEndPositionList.get(checkItem));
maxEndPosition = Math.max(maxEndPosition,
maxFullEndPosition.get(checkItem));
} else {
moveTo(checkItem);
if (baseEndPositionList.containsKey(checkItem)) {
list.addAll(baseEndPositionList.get(checkItem));
maxEndPosition = Math.max(maxEndPosition,
maxBaseEndPosition.get(checkItem));
subCheckList.addAll(baseEndPositionList.get(checkItem));
} else {
// ready for checkItem
}
}
}
checkList.clear();
checkList.addAll(subCheckList);
subCheckList.clear();
depth++;
}
}
fullEndPositionList.put(position, list);
maxFullEndPosition.put(position, (maxEndPosition - position));
} else {
fullEndPositionList.put(position, null);
maxFullEndPosition.put(position, 0);
}
}
}
/**
* Move to.
*
* @param position the position
*/
private void moveTo(int position) {
while (position >= currentPosition) {
try {
currentPosition = ignoreSpans.nextStartPosition();
if (currentPosition != Spans.NO_MORE_POSITIONS
&& currentPosition >= minimumPosition) {
if (!baseEndPositionList.containsKey(currentPosition)) {
baseEndPositionList.put(currentPosition, new HashSet<Integer>());
maxBaseEndPosition.put(currentPosition, currentPosition);
} else {
maxBaseEndPosition.put(currentPosition,
Math.max(maxBaseEndPosition.get(currentPosition),
ignoreSpans.endPosition()));
}
if (!baseStartPositionList.containsKey(ignoreSpans.endPosition())) {
baseStartPositionList.put(ignoreSpans.endPosition(),
new HashSet<Integer>());
minBaseStartPosition.put(ignoreSpans.endPosition(),
ignoreSpans.endPosition());
} else {
minBaseStartPosition.put(ignoreSpans.endPosition(),
Math.min(minBaseStartPosition.get(ignoreSpans.endPosition()),
currentPosition));
}
baseStartPositionList.get(ignoreSpans.endPosition())
.add(currentPosition);
baseEndPositionList.get(currentPosition)
.add(ignoreSpans.endPosition());
}
} catch (IOException e) {
log.debug(e);
currentPosition = Spans.NO_MORE_POSITIONS;
break;
}
}
}
/**
* Removes the before.
*
* @param docId the doc id
* @param position the position
*/
public void removeBefore(int docId, int position) {
if (ignoreSpans != null && docId == currentDocId) {
baseStartPositionList.entrySet()
.removeIf(entry -> entry.getKey() < position);
baseEndPositionList.entrySet()
.removeIf(entry -> entry.getKey() < position);
fullEndPositionList.entrySet()
.removeIf(entry -> entry.getKey() < position);
minBaseStartPosition.entrySet()
.removeIf(entry -> entry.getKey() < position);
maxBaseEndPosition.entrySet()
.removeIf(entry -> entry.getKey() < position);
minFullStartPosition.entrySet()
.removeIf(entry -> entry.getKey() < position);
maxFullEndPosition.entrySet()
.removeIf(entry -> entry.getKey() < position);
if (minimumPosition < position) {
minimumPosition = position;
}
if (currentPosition < position) {
currentPosition = position;
}
}
}
}