MtasFieldsConsumer.java
- package mtas.codec;
- import java.io.Closeable;
- import java.io.EOFException;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Collections;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Map.Entry;
- import java.util.SortedMap;
- import java.util.SortedSet;
- import java.util.TreeMap;
- import java.util.TreeSet;
- import mtas.analysis.token.MtasOffset;
- import mtas.analysis.token.MtasPosition;
- import mtas.analysis.token.MtasToken;
- import mtas.analysis.token.MtasTokenString;
- import mtas.codec.payload.MtasPayloadDecoder;
- import mtas.codec.tree.MtasRBTree;
- import mtas.codec.tree.MtasTree;
- import mtas.codec.tree.MtasTreeNode;
- import mtas.codec.tree.MtasTreeNodeId;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- import org.apache.lucene.codecs.CodecUtil;
- import org.apache.lucene.codecs.FieldsConsumer;
- import org.apache.lucene.codecs.FieldsProducer;
- import org.apache.lucene.index.FieldInfo;
- import org.apache.lucene.index.FieldInfos;
- import org.apache.lucene.index.Fields;
- import org.apache.lucene.index.IndexFileNames;
- import org.apache.lucene.index.MappedMultiFields;
- import org.apache.lucene.index.MergeState;
- import org.apache.lucene.index.MultiFields;
- import org.apache.lucene.index.PostingsEnum;
- import org.apache.lucene.index.ReaderSlice;
- import org.apache.lucene.index.SegmentWriteState;
- import org.apache.lucene.index.Terms;
- import org.apache.lucene.index.TermsEnum;
- import org.apache.lucene.search.DocIdSetIterator;
- import org.apache.lucene.store.IndexInput;
- import org.apache.lucene.store.IndexOutput;
- import org.apache.lucene.util.BytesRef;
- import org.apache.lucene.util.IOUtils;
- /**
- * The Class MtasFieldsConsumer.
- */
- /**
- * The Class MtasFieldsConsumer constructs several temporal and permanent files
- * to provide a forward index
- *
- * <ul>
- * <li><b>Temporary files</b><br>
- * <ul>
- * <li><b>Temporary file {@link #mtasTmpFieldFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_FIELD_EXTENSION} </b><br>
- * Contains for each field a reference to the list of documents. Structure of
- * content:
- * <ul>
- * <li><b>String</b>: field</li>
- * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li>
- * <li><b>VInt</b>: number of documents</li>
- * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li>
- * <li><b>VInt</b>: number of terms</li>
- * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li>
- * <li><b>VInt</b>: number of prefixes</li>
- * </ul>
- * </li>
- * <li><b>Temporary file {@link #mtasTmpObjectFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_OBJECT_EXTENSION}</b><br>
- * Contains for a specific field all objects constructed by
- * {@link createObjectAndRegisterPrefix}. For all fields, the objects are later
- * on copied to {@link #mtasObjectFileName} while statistics are collected.
- * Structure of content identical to {@link #mtasObjectFileName}.</li>
- * <li><b>Temporary file {@link #mtasTmpDocsFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_EXTENSION}</b> <br>
- * Contains for a specific field for each doc multiple fragments. Each occurring
- * term results in a fragment. Structure of content:
- * <ul>
- * <li><b>VInt</b>: docId</li>
- * <li><b>VInt</b>: number of objects in this fragment</li>
- * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li>
- * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in
- * {@link #mtasTmpObjectFileName} minus offset</li>
- * <li><b>VInt</b>,<b>VLong</b>: ...</li>
- * </ul>
- * </li>
- * <li><b>Temporary file {@link #mtasTmpDocsChainedFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_CHAINED_EXTENSION}
- * </b><br>
- * Contains for a specific field for each doc multiple chained fragments.
- * Structure of content:
- * <ul>
- * <li><b>VInt</b>: docId</li>
- * <li><b>VInt</b>: number of objects in this fragment</li>
- * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li>
- * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in
- * {@link #mtasTmpObjectFileName} minus offset</li>
- * <li><b>VInt</b>,<b>VLong</b>: ...</li>
- * <li><b>VLong</b>: reference to next fragment in
- * {@link #mtasTmpDocsChainedFileName}, self reference indicates end of chain
- * </ul>
- * </li>
- * <li><b>Temporary file {@link #mtasTmpDocFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOC_EXTENSION}</b><br>
- * For each document
- * <ul>
- * <li><b>VInt</b>: docId</li>
- * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li>
- * <li><b>VLong</b>: reference first object, used as offset for tree index
- * <li><b>VInt</b>: slope used in approximation reference objects index on id
- * </li>
- * <li><b>ZLong</b>: offset used in approximation reference objects index on id
- * </li>
- * <li><b>Byte</b>: flag indicating how corrections on the approximation
- * references objects for the index on id are stored:
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE},
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT},
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li>
- * <li><b>VInt</b>: number of objects in this document</li>
- * <li><b>VInt</b>: first position</li>
- * <li><b>VInt</b>: last position</li>
- * </ul>
- * </li>
- * </ul>
- * </li>
- * <li><b>Final files</b><br>
- * <ul>
- * <li><b>File {@link #mtasIndexFieldFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_FIELD_EXTENSION}</b><br>
- * Contains for each field a reference to the list of documents and the
- * prefixes. Structure of content:
- * <ul>
- * <li><b>String</b>: field</li>
- * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li>
- * <li><b>VLong</b>: reference to {@link #mtasIndexDocIdFileName}</li>
- * <li><b>VInt</b>: number of documents</li>
- * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li>
- * <li><b>VInt</b>: number of terms</li>
- * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li>
- * <li><b>VInt</b>: number of prefixes</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasTermFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TERM_EXTENSION}</b><br>
- * For each field, all unique terms are stored here. Structure of content:
- * <ul>
- * <li><b>String</b>: term</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasPrefixFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_PREFIX_EXTENSION}</b><br>
- * For each field, all unique prefixes are stored here. Structure of content:
- * <ul>
- * <li><b>String</b>: prefix</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasObjectFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_OBJECT_EXTENSION}</b><br>
- * Contains all objects for all fields. Structure of content:
- * <ul>
- * <li><b>VInt</b>: mtasId</li>
- * <li><b>VInt</b>: objectFlags
- * <ul>
- * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}</li>
- * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}</li>
- * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}</li>
- * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}</li>
- * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}</li>
- * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}</li>
- * </ul>
- * </li>
- * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}<br>
- * <b>VInt</b>: parentId
- * <li>Only if
- * {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}<br>
- * <b>VInt</b>,<b>VInt</b>: startPosition and (endPosition-startPosition)
- * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br>
- * <b>VInt</b>,<b>VInt</b>,<b>VInt</b>,...: number of positions, firstPosition,
- * (position-previousPosition),...
- * <li>Only if no {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}
- * or {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br>
- * <b>VInt</b>: position
- * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}<br>
- * <b>VInt</b>,<b>VInt</b>: startOffset, (endOffset-startOffset)
- * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}<br>
- * <b>VInt</b>,<b>VInt</b>: startRealOffset, (endRealOffset-startRealOffset)
- * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}<br>
- * <b>VInt</b>,<b>Bytes</b>: number of bytes, payload
- * <li><b>VLong</b>: reference to Term in {@link #mtasTermFileName}</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasIndexDocIdFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_DOC_ID_EXTENSION}
- * </b><br>
- * Contains for each field a tree structure {@link MtasTree} to search reference
- * to {@link #mtasDocFileName} by id. Structure of content for each node:
- * <ul>
- * <li><b>VLong</b>: offset references to {@link #mtasIndexDocIdFileName}, only
- * available in root node</li>
- * <li><b>Byte</b>: flag, should be zero for this tree, only available in root
- * node</li>
- * <li><b>VInt</b>: left</li>
- * <li><b>VInt</b>: right</li>
- * <li><b>VInt</b>: max</li>
- * <li><b>VLong</b>: left reference to {@link #mtasIndexDocIdFileName} minus the
- * offset stored in the root node</li>
- * <li><b>VLong</b>: right reference to {@link #mtasIndexDocIdFileName} minus
- * the offset stored in the root node</li>
- * <li><b>VInt</b>: number of objects on this node (always 1 for this tree)</li>
- * <li><b>VLong</b>: reference to {@link #mtasDocFileName} minus offset</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasDocFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_DOC_EXTENSION}</b><br>
- * For each document
- * <ul>
- * <li><b>VInt</b>: docId</li>
- * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li>
- * <li><b>VLong</b>: reference to {@link #mtasIndexObjectPositionFileName}</li>
- * <li><b>VLong</b>: reference to {@link #mtasIndexObjectParentFileName}</li>
- * <li><b>VLong</b>: reference first object, used as offset for tree index
- * <li><b>VInt</b>: slope used in approximation reference objects index on id
- * </li>
- * <li><b>ZLong</b>: offset used in approximation reference objects index on id
- * </li>
- * <li><b>Byte</b>: flag indicating how corrections on the approximation
- * references objects for the index on id are stored:
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE},
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT},
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or
- * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li>
- * <li><b>VInt</b>: number of objects</li>
- * <li><b>VInt</b>: first position</li>
- * <li><b>VInt</b>: last position</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasIndexObjectIdFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_ID_EXTENSION}
- * </b><br>
- * Provides for each mtasId the reference to {@link #mtasObjectFileName}. These
- * references are grouped by document, sorted by mtasId, and because the
- * mtasId's for each document will always start with 0 and are sequential
- * without gaps, a reference can be computed if the position of the first
- * reference for a document is known from {@link #mtasDocFileName}. The
- * reference is approximated by the reference to the first object plus the
- * mtasId times a slope. Only a correction to this approximation is stored.
- * Structure of content:
- * <ul>
- * <li><b>Byte</b>/<b>Short</b>/<b>Int</b>/<b>Long</b>: correction reference to
- * {@link #mtasObjectFileName}</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasIndexObjectPositionFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_POSITION_EXTENSION}
- * </b><br>
- * Contains for each document a tree structure {@link MtasTree} to search
- * objects by position. Structure of content for each node:
- * <ul>
- * <li><b>VLong</b>: offset references to
- * {@link #mtasIndexObjectPositionFileName}, only available in root node</li>
- * <li><b>Byte</b>: flag, should be zero for this tree, only available in root
- * node</li>
- * <li><b>VInt</b>: left</li>
- * <li><b>VInt</b>: right</li>
- * <li><b>VInt</b>: max</li>
- * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectPositionFileName}
- * minus the offset stored in the root node</li>
- * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectPositionFileName}
- * minus the offset stored in the root node</li>
- * <li><b>VInt</b>: number of objects on this node</li>
- * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to
- * {@link #mtasObjectFileName} minus offset, the prefixId referring to the
- * position the prefix in {@link #mtasPrefixFileName} and the reference to
- * {@link #mtasTermFileName} minus offset</li>
- * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of
- * reference to {@link #mtasObjectFileName}, position of the prefix in
- * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName};
- * for the first item the difference between this reference minus the previous
- * reference is stored</li>
- * </ul>
- * </li>
- * <li><b>File {@link #mtasIndexObjectParentFileName} with extension
- * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_PARENT_EXTENSION}
- * </b><br>
- * Contains for each document a tree structure {@link MtasTree} to search
- * objects by parent. Structure of content for each node:
- * <ul>
- * <li><b>VLong</b>: offset references to {@link #mtasIndexObjectParentFileName}
- * , only available in root node</li>
- * <li><b>Byte</b>: flag, for this tree equal to
- * {@link mtas.codec.tree.MtasTree#SINGLE_POSITION_TREE} indicating a tree with
- * exactly one point at each node, only available in root node</li>
- * <li><b>VInt</b>: left</li>
- * <li><b>VInt</b>: right</li>
- * <li><b>VInt</b>: max</li>
- * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectParentFileName}
- * minus the offset stored in the root node</li>
- * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectParentFileName}
- * minus the offset stored in the root node</li>
- * <li><b>VInt</b>: number of objects on this node</li>
- * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to
- * {@link #mtasObjectFileName} minus offset, the prefixId referring to the
- * position the prefix in {@link #mtasPrefixFileName} and the reference to
- * {@link #mtasTermFileName} minus offset</li>
- * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of
- * reference to {@link #mtasObjectFileName}, position of the prefix in
- * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName};
- * for the first item the difference between this reference minus the previous
- * reference is stored</li>
- * </ul>
- * </li>
- * </ul>
- * </li>
- * </ul>
- *
- */
- public class MtasFieldsConsumer extends FieldsConsumer {
- /** The Constant log. */
- private static final Log log = LogFactory.getLog(MtasFieldsConsumer.class);
- /** The delegate fields consumer. */
- private FieldsConsumer delegateFieldsConsumer;
- /** The state. */
- private SegmentWriteState state;
- /** The intersecting prefixes. */
- private HashMap<String, HashSet<String>> intersectingPrefixes;
- /** The single position prefix. */
- private HashMap<String, HashSet<String>> singlePositionPrefix;
- /** The multiple position prefix. */
- private HashMap<String, HashSet<String>> multiplePositionPrefix;
- /** The set position prefix. */
- private HashMap<String, HashSet<String>> setPositionPrefix;
- /** The prefix reference index. */
- private HashMap<String, HashMap<String, Long>> prefixReferenceIndex;
- /** The prefix id index. */
- private HashMap<String, HashMap<String, Integer>> prefixIdIndex;
- /** The token stats min pos. */
- Integer tokenStatsMinPos;
- /** The token stats max pos. */
- Integer tokenStatsMaxPos;
- /** The token stats number. */
- Integer tokenStatsNumber;
- /** The mtas tmp field file name. */
- private String mtasTmpFieldFileName;
- /** The mtas tmp object file name. */
- private String mtasTmpObjectFileName;
- /** The mtas tmp docs file name. */
- private String mtasTmpDocsFileName;
- /** The mtas tmp doc file name. */
- private String mtasTmpDocFileName;
- /** The mtas tmp docs chained file name. */
- private String mtasTmpDocsChainedFileName;
- /** The mtas object file name. */
- private String mtasObjectFileName;
- /** The mtas term file name. */
- private String mtasTermFileName;
- /** The mtas index field file name. */
- private String mtasIndexFieldFileName;
- /** The mtas prefix file name. */
- private String mtasPrefixFileName;
- /** The mtas doc file name. */
- private String mtasDocFileName;
- /** The mtas index doc id file name. */
- private String mtasIndexDocIdFileName;
- /** The mtas index object id file name. */
- private String mtasIndexObjectIdFileName;
- /** The mtas index object position file name. */
- private String mtasIndexObjectPositionFileName;
- /** The mtas index object parent file name. */
- private String mtasIndexObjectParentFileName;
- /** The name. */
- private String name;
- /** The delegate postings format name. */
- private String delegatePostingsFormatName;
- /**
- * Instantiates a new mtas fields consumer.
- *
- * @param fieldsConsumer
- * the fields consumer
- * @param state
- * the state
- * @param name
- * the name
- * @param delegatePostingsFormatName
- * the delegate postings format name
- */
- public MtasFieldsConsumer(FieldsConsumer fieldsConsumer,
- SegmentWriteState state, String name, String delegatePostingsFormatName) {
- this.delegateFieldsConsumer = fieldsConsumer;
- this.state = state;
- this.name = name;
- this.delegatePostingsFormatName = delegatePostingsFormatName;
- // temporary fileNames
- mtasTmpFieldFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_TMP_FIELD_EXTENSION);
- mtasTmpObjectFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_TMP_OBJECT_EXTENSION);
- mtasTmpDocsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
- state.segmentSuffix, MtasCodecPostingsFormat.MTAS_TMP_DOCS_EXTENSION);
- mtasTmpDocFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
- state.segmentSuffix, MtasCodecPostingsFormat.MTAS_TMP_DOC_EXTENSION);
- mtasTmpDocsChainedFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_TMP_DOCS_CHAINED_EXTENSION);
- // fileNames
- mtasObjectFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
- state.segmentSuffix, MtasCodecPostingsFormat.MTAS_OBJECT_EXTENSION);
- mtasTermFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
- state.segmentSuffix, MtasCodecPostingsFormat.MTAS_TERM_EXTENSION);
- mtasIndexFieldFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_FIELD_EXTENSION);
- mtasPrefixFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
- state.segmentSuffix, MtasCodecPostingsFormat.MTAS_PREFIX_EXTENSION);
- mtasDocFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
- state.segmentSuffix, MtasCodecPostingsFormat.MTAS_DOC_EXTENSION);
- mtasIndexDocIdFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_INDEX_DOC_ID_EXTENSION);
- mtasIndexObjectIdFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_INDEX_OBJECT_ID_EXTENSION);
- mtasIndexObjectPositionFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_INDEX_OBJECT_POSITION_EXTENSION);
- mtasIndexObjectParentFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix,
- MtasCodecPostingsFormat.MTAS_INDEX_OBJECT_PARENT_EXTENSION);
- }
- /**
- * Register prefix.
- *
- * @param field
- * the field
- * @param prefix
- * the prefix
- * @param outPrefix
- * the out prefix
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- private void registerPrefix(String field, String prefix,
- IndexOutput outPrefix) throws IOException {
- if (!prefixReferenceIndex.containsKey(field)) {
- prefixReferenceIndex.put(field, new HashMap<String, Long>());
- prefixIdIndex.put(field, new HashMap<String, Integer>());
- }
- if (!prefixReferenceIndex.get(field).containsKey(prefix)) {
- int id = 1 + prefixReferenceIndex.get(field).size();
- prefixReferenceIndex.get(field).put(prefix, outPrefix.getFilePointer());
- prefixIdIndex.get(field).put(prefix, id);
- outPrefix.writeString(prefix);
- }
- }
- /**
- * Register prefix intersection.
- *
- * @param field
- * the field
- * @param prefix
- * the prefix
- * @param start
- * the start
- * @param end
- * the end
- * @param docFieldAdministration
- * the doc field administration
- */
- private void registerPrefixIntersection(String field, String prefix,
- int start, int end,
- HashMap<String, HashSet<Integer>> docFieldAdministration) {
- if (!intersectingPrefixes.containsKey(field)) {
- intersectingPrefixes.put(field, new HashSet<String>());
- } else if (intersectingPrefixes.get(field).contains(prefix)) {
- return;
- }
- HashSet<Integer> docFieldPrefixAdministration;
- if (!docFieldAdministration.containsKey(prefix)) {
- docFieldPrefixAdministration = new HashSet<>();
- docFieldAdministration.put(prefix, docFieldPrefixAdministration);
- } else {
- docFieldPrefixAdministration = docFieldAdministration.get(prefix);
- // check
- for (int p = start; p <= end; p++) {
- if (docFieldPrefixAdministration.contains(p)) {
- intersectingPrefixes.get(field).add(prefix);
- docFieldAdministration.remove(prefix);
- return;
- }
- }
- }
- // update
- for (int p = start; p <= end; p++) {
- docFieldPrefixAdministration.add(p);
- }
- }
- /**
- * Register prefix stats single position value.
- *
- * @param field
- * the field
- * @param prefix
- * the prefix
- * @param outPrefix
- * the out prefix
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- public void registerPrefixStatsSinglePositionValue(String field,
- String prefix, IndexOutput outPrefix) throws IOException {
- initPrefixStatsField(field);
- registerPrefix(field, prefix, outPrefix);
- if (!multiplePositionPrefix.get(field).contains(prefix)) {
- singlePositionPrefix.get(field).add(prefix);
- }
- }
- /**
- * Register prefix stats range position value.
- *
- * @param field
- * the field
- * @param prefix
- * the prefix
- * @param outPrefix
- * the out prefix
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- public void registerPrefixStatsRangePositionValue(String field, String prefix,
- IndexOutput outPrefix) throws IOException {
- initPrefixStatsField(field);
- registerPrefix(field, prefix, outPrefix);
- singlePositionPrefix.get(field).remove(prefix);
- multiplePositionPrefix.get(field).add(prefix);
- }
- /**
- * Register prefix stats set position value.
- *
- * @param field
- * the field
- * @param prefix
- * the prefix
- * @param outPrefix
- * the out prefix
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- public void registerPrefixStatsSetPositionValue(String field, String prefix,
- IndexOutput outPrefix) throws IOException {
- initPrefixStatsField(field);
- registerPrefix(field, prefix, outPrefix);
- singlePositionPrefix.get(field).remove(prefix);
- multiplePositionPrefix.get(field).add(prefix);
- setPositionPrefix.get(field).add(prefix);
- }
- /**
- * Inits the prefix stats field.
- *
- * @param field
- * the field
- */
- private void initPrefixStatsField(String field) {
- if (!singlePositionPrefix.containsKey(field)) {
- singlePositionPrefix.put(field, new HashSet<String>());
- }
- if (!multiplePositionPrefix.containsKey(field)) {
- multiplePositionPrefix.put(field, new HashSet<String>());
- }
- if (!setPositionPrefix.containsKey(field)) {
- setPositionPrefix.put(field, new HashSet<String>());
- }
- }
- /**
- * Gets the prefix stats single position prefix attribute.
- *
- * @param field
- * the field
- * @return the prefix stats single position prefix attribute
- */
- public String getPrefixStatsSinglePositionPrefixAttribute(String field) {
- return String.join(MtasToken.DELIMITER, singlePositionPrefix.get(field));
- }
- /**
- * Gets the prefix stats multiple position prefix attribute.
- *
- * @param field
- * the field
- * @return the prefix stats multiple position prefix attribute
- */
- public String getPrefixStatsMultiplePositionPrefixAttribute(String field) {
- return String.join(MtasToken.DELIMITER, multiplePositionPrefix.get(field));
- }
- /**
- * Gets the prefix stats set position prefix attribute.
- *
- * @param field
- * the field
- * @return the prefix stats set position prefix attribute
- */
- public String getPrefixStatsSetPositionPrefixAttribute(String field) {
- return String.join(MtasToken.DELIMITER, setPositionPrefix.get(field));
- }
- /**
- * Gets the prefix stats intersection prefix attribute.
- *
- * @param field
- * the field
- * @return the prefix stats intersection prefix attribute
- */
- public String getPrefixStatsIntersectionPrefixAttribute(String field) {
- if (intersectingPrefixes.containsKey(field)) {
- return String.join(MtasToken.DELIMITER, intersectingPrefixes.get(field));
- } else {
- return "";
- }
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.codecs.FieldsConsumer#merge(org.apache.lucene.index.
- * MergeState)
- */
- @Override
- public void merge(MergeState mergeState) throws IOException {
- final List<Fields> fields = new ArrayList<>();
- final List<ReaderSlice> slices = new ArrayList<>();
- int docBase = 0;
- for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
- final FieldsProducer f = mergeState.fieldsProducers[readerIndex];
- final int maxDoc = mergeState.maxDocs[readerIndex];
- f.checkIntegrity();
- slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
- fields.add(f);
- docBase += maxDoc;
- }
- Fields mergedFields = new MappedMultiFields(mergeState,
- new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
- slices.toArray(ReaderSlice.EMPTY_ARRAY)));
- write(mergedFields);
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.codecs.FieldsConsumer#write(org.apache.lucene.index.
- * Fields )
- */
- @Override
- public void write(Fields fields) throws IOException {
- delegateFieldsConsumer.write(fields);
- write(state.fieldInfos, fields);
- }
- /**
- * Write.
- *
- * @param fieldInfos
- * the field infos
- * @param fields
- * the fields
- */
- private void write(FieldInfos fieldInfos, Fields fields) {
- IndexOutput outField;
- IndexOutput outDoc;
- IndexOutput outIndexDocId;
- IndexOutput outIndexObjectId;
- IndexOutput outIndexObjectPosition;
- IndexOutput outIndexObjectParent;
- IndexOutput outTerm;
- IndexOutput outObject;
- IndexOutput outPrefix;
- IndexOutput outTmpDoc;
- IndexOutput outTmpField;
- HashSet<Closeable> closeables = new HashSet<>();
- // prefix stats
- intersectingPrefixes = new HashMap<>();
- singlePositionPrefix = new HashMap<>();
- multiplePositionPrefix = new HashMap<>();
- setPositionPrefix = new HashMap<>();
- prefixReferenceIndex = new HashMap<>();
- prefixIdIndex = new HashMap<>();
- // temporary temporary index in memory for doc
- SortedMap<Integer, Long> memoryIndexTemporaryObject = new TreeMap<>();
- // create (backwards) chained new temporary index docs
- SortedMap<Integer, Long> memoryTmpDocChainList = new TreeMap<>();
- // list of objectIds and references to objects
- SortedMap<Integer, Long> memoryIndexDocList = new TreeMap<>();
- try {
- // create file tmpDoc
- closeables.add(outTmpDoc = state.directory
- .createOutput(mtasTmpDocFileName, state.context));
- // create file tmpField
- closeables.add(outTmpField = state.directory
- .createOutput(mtasTmpFieldFileName, state.context));
- // create file indexDoc
- closeables.add(outDoc = state.directory.createOutput(mtasDocFileName,
- state.context));
- CodecUtil.writeIndexHeader(outDoc, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outDoc.writeString(delegatePostingsFormatName);
- // create file indexDocId
- closeables.add(outIndexDocId = state.directory
- .createOutput(mtasIndexDocIdFileName, state.context));
- CodecUtil.writeIndexHeader(outIndexDocId, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outIndexDocId.writeString(delegatePostingsFormatName);
- // create file indexObjectId
- closeables.add(outIndexObjectId = state.directory
- .createOutput(mtasIndexObjectIdFileName, state.context));
- CodecUtil.writeIndexHeader(outIndexObjectId, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outIndexObjectId.writeString(delegatePostingsFormatName);
- // create file indexObjectPosition
- closeables.add(outIndexObjectPosition = state.directory
- .createOutput(mtasIndexObjectPositionFileName, state.context));
- CodecUtil.writeIndexHeader(outIndexObjectPosition, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outIndexObjectPosition.writeString(delegatePostingsFormatName);
- // create file indexObjectParent
- closeables.add(outIndexObjectParent = state.directory
- .createOutput(mtasIndexObjectParentFileName, state.context));
- CodecUtil.writeIndexHeader(outIndexObjectParent, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outIndexObjectParent.writeString(delegatePostingsFormatName);
- // create file term
- closeables.add(outTerm = state.directory.createOutput(mtasTermFileName,
- state.context));
- CodecUtil.writeIndexHeader(outTerm, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outTerm.writeString(delegatePostingsFormatName);
- // create file prefix
- closeables.add(outPrefix = state.directory
- .createOutput(mtasPrefixFileName, state.context));
- CodecUtil.writeIndexHeader(outPrefix, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outPrefix.writeString(delegatePostingsFormatName);
- // create file object
- closeables.add(outObject = state.directory
- .createOutput(mtasObjectFileName, state.context));
- CodecUtil.writeIndexHeader(outObject, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outObject.writeString(delegatePostingsFormatName);
- // For each field
- for (String field : fields) {
- Terms terms = fields.terms(field);
- if (terms == null) {
- continue;
- } else {
- // new temporary object storage for this field
- IndexOutput outTmpObject = state.directory
- .createOutput(mtasTmpObjectFileName, state.context);
- closeables.add(outTmpObject);
- // new temporary index docs for this field
- IndexOutput outTmpDocs = state.directory
- .createOutput(mtasTmpDocsFileName, state.context);
- closeables.add(outTmpDocs);
- // get fieldInfo
- FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
- // get properties terms
- boolean hasPositions = terms.hasPositions();
- boolean hasFreqs = terms.hasFreqs();
- boolean hasPayloads = fieldInfo.hasPayloads();
- boolean hasOffsets = terms.hasOffsets();
- // register references
- Long smallestTermFilepointer = outTerm.getFilePointer();
- Long smallestPrefixFilepointer = outPrefix.getFilePointer();
- int termCounter = 0;
- // only if freqs, positions and payload available
- if (hasFreqs && hasPositions && hasPayloads) {
- // compute flags
- int flags = PostingsEnum.POSITIONS | PostingsEnum.PAYLOADS;
- if (hasOffsets) {
- flags = flags | PostingsEnum.OFFSETS;
- }
- // get terms
- TermsEnum termsEnum = terms.iterator();
- PostingsEnum postingsEnum = null;
- // for each term in field
- while (true) {
- BytesRef term = termsEnum.next();
- if (term == null) {
- break;
- }
- // store term and get ref
- Long termRef = outTerm.getFilePointer();
- outTerm.writeString(term.utf8ToString());
- termCounter++;
- // get postings
- postingsEnum = termsEnum.postings(postingsEnum, flags);
- // for each doc in field+term
- while (true) {
- Integer docId = postingsEnum.nextDoc();
- if (docId.equals(DocIdSetIterator.NO_MORE_DOCS)) {
- break;
- }
- int freq = postingsEnum.freq();
- // temporary storage objects and temporary index in memory for
- // doc
- memoryIndexTemporaryObject.clear();
- Long offsetFilePointerTmpObject = outTmpObject.getFilePointer();
- for (int i = 0; i < freq; i++) {
- Long currentFilePointerTmpObject = outTmpObject
- .getFilePointer();
- Integer mtasId;
- int position = postingsEnum.nextPosition();
- BytesRef payload = postingsEnum.getPayload();
- if (hasOffsets) {
- mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
- term, termRef, position, payload,
- postingsEnum.startOffset(), postingsEnum.endOffset(),
- outPrefix);
- } else {
- mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
- term, termRef, position, payload, outPrefix);
- }
- if (mtasId != null) {
- assert !memoryIndexTemporaryObject.containsKey(
- mtasId) : "mtasId should be unique in this selection";
- memoryIndexTemporaryObject.put(mtasId,
- currentFilePointerTmpObject);
- }
- } // end loop positions
- // store temporary index for this doc
- if (memoryIndexTemporaryObject.size() > 0) {
- // docId for this part
- outTmpDocs.writeVInt(docId);
- // number of objects/tokens in this part
- outTmpDocs.writeVInt(memoryIndexTemporaryObject.size());
- // offset to be used for references
- outTmpDocs.writeVLong(offsetFilePointerTmpObject);
- // loop over tokens
- for (Entry<Integer, Long> entry : memoryIndexTemporaryObject
- .entrySet()) {
- // mtasId object
- outTmpDocs.writeVInt(entry.getKey());
- // reference object
- outTmpDocs.writeVLong(
- (entry.getValue() - offsetFilePointerTmpObject));
- }
- }
- // clean up
- memoryIndexTemporaryObject.clear();
- } // end loop docs
- } // end loop terms
- // set fieldInfo
- fieldInfos.fieldInfo(field).putAttribute(
- MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION,
- getPrefixStatsSinglePositionPrefixAttribute(field));
- fieldInfos.fieldInfo(field).putAttribute(
- MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION,
- getPrefixStatsMultiplePositionPrefixAttribute(field));
- fieldInfos.fieldInfo(field).putAttribute(
- MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION,
- getPrefixStatsSetPositionPrefixAttribute(field));
- } // end processing field with freqs, positions and payload
- // close temporary object storage and index docs
- outTmpObject.close();
- closeables.remove(outTmpObject);
- outTmpDocs.close();
- closeables.remove(outTmpDocs);
- // create (backwards) chained new temporary index docs
- IndexInput inTmpDocs = state.directory.openInput(mtasTmpDocsFileName,
- state.context);
- closeables.add(inTmpDocs);
- IndexOutput outTmpDocsChained = state.directory
- .createOutput(mtasTmpDocsChainedFileName, state.context);
- closeables.add(outTmpDocsChained);
- memoryTmpDocChainList.clear();
- while (true) {
- try {
- Long currentFilepointer = outTmpDocsChained.getFilePointer();
- // copy docId
- int docId = inTmpDocs.readVInt();
- outTmpDocsChained.writeVInt(docId);
- // copy size
- int size = inTmpDocs.readVInt();
- outTmpDocsChained.writeVInt(size);
- // offset references
- outTmpDocsChained.writeVLong(inTmpDocs.readVLong());
- for (int t = 0; t < size; t++) {
- outTmpDocsChained.writeVInt(inTmpDocs.readVInt());
- outTmpDocsChained.writeVLong(inTmpDocs.readVLong());
- }
- // set back reference to part with same docId
- if (memoryTmpDocChainList.containsKey(docId)) {
- // reference to previous
- outTmpDocsChained.writeVLong(memoryTmpDocChainList.get(docId));
- } else {
- // self reference indicates end of chain
- outTmpDocsChained.writeVLong(currentFilepointer);
- }
- // update temporary index in memory
- memoryTmpDocChainList.put(docId, currentFilepointer);
- } catch (IOException ex) {
- log.debug(ex);
- break;
- }
- }
- outTmpDocsChained.close();
- closeables.remove(outTmpDocsChained);
- inTmpDocs.close();
- closeables.remove(inTmpDocs);
- state.directory.deleteFile(mtasTmpDocsFileName);
- // set reference to tmpDoc in Field
- if (memoryTmpDocChainList.size() > 0) {
- outTmpField.writeString(field);
- outTmpField.writeVLong(outTmpDoc.getFilePointer());
- outTmpField.writeVInt(memoryTmpDocChainList.size());
- outTmpField.writeVLong(smallestTermFilepointer);
- outTmpField.writeVInt(termCounter);
- outTmpField.writeVLong(smallestPrefixFilepointer);
- outTmpField.writeVInt(prefixReferenceIndex.get(field).size());
- // fill indexDoc
- IndexInput inTmpDocsChained = state.directory
- .openInput(mtasTmpDocsChainedFileName, state.context);
- closeables.add(inTmpDocsChained);
- IndexInput inTmpObject = state.directory
- .openInput(mtasTmpObjectFileName, state.context);
- closeables.add(inTmpObject);
- for (Entry<Integer, Long> entry : memoryTmpDocChainList
- .entrySet()) {
- Integer docId = entry.getKey();
- Long currentFilePointer;
- Long newFilePointer;
- // list of objectIds and references to objects
- memoryIndexDocList.clear();
- // construct final object + indexObjectId for docId
- currentFilePointer = entry.getValue();
- // collect objects for document
- tokenStatsMinPos = null;
- tokenStatsMaxPos = null;
- tokenStatsNumber = 0;
- while (true) {
- inTmpDocsChained.seek(currentFilePointer);
- Integer docIdPart = inTmpDocsChained.readVInt();
- assert docIdPart.equals(
- docId) : "conflicting docId in reference to temporaryIndexDocsChained";
- // number of objects/tokens in part
- int size = inTmpDocsChained.readVInt();
- long offsetFilePointerTmpObject = inTmpDocsChained.readVLong();
- assert size > 0 : "number of objects/tokens in part cannot be "
- + size;
- for (int t = 0; t < size; t++) {
- int mtasId = inTmpDocsChained.readVInt();
- Long tmpObjectRef = inTmpDocsChained.readVLong()
- + offsetFilePointerTmpObject;
- assert !memoryIndexDocList.containsKey(
- mtasId) : "mtasId should be unique in this selection";
- // initially, store ref to tmpObject
- memoryIndexDocList.put(mtasId, tmpObjectRef);
- }
- // reference to next part
- newFilePointer = inTmpDocsChained.readVLong();
- if (newFilePointer.equals(currentFilePointer)) {
- break; // end of chained parts
- } else {
- currentFilePointer = newFilePointer;
- }
- }
- // now create new objects, sorted by mtasId
- Long smallestObjectFilepointer = outObject.getFilePointer();
- for (Entry<Integer, Long> objectEntry : memoryIndexDocList
- .entrySet()) {
- int mtasId = objectEntry.getKey();
- Long tmpObjectRef = objectEntry.getValue();
- Long objectRef = outObject.getFilePointer();
- copyObjectAndUpdateStats(mtasId, inTmpObject, tmpObjectRef,
- outObject);
- // update with new ref
- memoryIndexDocList.put(mtasId, objectRef);
- }
- // check mtasIds properties
- assert memoryIndexDocList.firstKey()
- .equals(0) : "first mtasId should not be "
- + memoryIndexDocList.firstKey();
- assert (1 + memoryIndexDocList.lastKey()
- - memoryIndexDocList.firstKey()) == memoryIndexDocList
- .size() : "missing mtasId";
- assert tokenStatsNumber.equals(memoryIndexDocList
- .size()) : "incorrect number of items in tokenStats";
- // store item in tmpDoc
- outTmpDoc.writeVInt(docId);
- outTmpDoc.writeVLong(outIndexObjectId.getFilePointer());
- int mtasId = 0;
- // compute linear approximation (least squares method, integer
- // constants)
- long tmpN = memoryIndexDocList.size();
- long tmpSumY = 0;
- long tmpSumXY = 0;
- long tmpSumX = 0;
- long tmpSumXX = 0;
- for (Entry<Integer, Long> objectEntry : memoryIndexDocList
- .entrySet()) {
- assert objectEntry.getKey()
- .equals(mtasId) : "unexpected mtasId";
- tmpSumY += objectEntry.getValue();
- tmpSumX += mtasId;
- tmpSumXY += mtasId * objectEntry.getValue();
- tmpSumXX += mtasId * mtasId;
- mtasId++;
- }
- int objectRefApproxQuotient;
- if(tmpN>1) {
- objectRefApproxQuotient= (int) (((tmpN * tmpSumXY)
- - (tmpSumX * tmpSumY))
- / ((tmpN * tmpSumXX) - (tmpSumX * tmpSumX)));
- } else {
- objectRefApproxQuotient = 0;
- }
- long objectRefApproxOffset = (tmpSumY
- - objectRefApproxQuotient * tmpSumX) / tmpN;
- Long objectRefApproxCorrection;
- long maxAbsObjectRefApproxCorrection = 0;
- // compute maximum correction
- mtasId = 0;
- for (Entry<Integer, Long> objectEntry : memoryIndexDocList
- .entrySet()) {
- objectRefApproxCorrection = (objectEntry.getValue()
- - (objectRefApproxOffset
- + (mtasId * objectRefApproxQuotient)));
- maxAbsObjectRefApproxCorrection = Math.max(
- maxAbsObjectRefApproxCorrection,
- Math.abs(objectRefApproxCorrection));
- mtasId++;
- }
- byte storageFlags;
- if (maxAbsObjectRefApproxCorrection <= Long
- .valueOf(Byte.MAX_VALUE)) {
- storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_BYTE;
- } else if (maxAbsObjectRefApproxCorrection <= Long
- .valueOf(Short.MAX_VALUE)) {
- storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_SHORT;
- } else if (maxAbsObjectRefApproxCorrection <= Long
- .valueOf(Integer.MAX_VALUE)) {
- storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER;
- } else {
- storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_LONG;
- }
- // update indexObjectId with correction on approximated ref
- // (assume
- // can be stored as int)
- mtasId = 0;
- for (Entry<Integer, Long> objectEntry : memoryIndexDocList
- .entrySet()) {
- objectRefApproxCorrection = (objectEntry.getValue()
- - (objectRefApproxOffset
- + (mtasId * objectRefApproxQuotient)));
- if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
- outIndexObjectId
- .writeByte(objectRefApproxCorrection.byteValue());
- } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
- outIndexObjectId
- .writeShort(objectRefApproxCorrection.shortValue());
- } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
- outIndexObjectId
- .writeInt(objectRefApproxCorrection.intValue());
- } else {
- outIndexObjectId.writeLong(objectRefApproxCorrection);
- }
- mtasId++;
- }
- outTmpDoc.writeVLong(smallestObjectFilepointer);
- outTmpDoc.writeVInt(objectRefApproxQuotient);
- outTmpDoc.writeZLong(objectRefApproxOffset);
- outTmpDoc.writeByte(storageFlags);
- outTmpDoc.writeVInt(tokenStatsNumber);
- outTmpDoc.writeVInt(tokenStatsMinPos);
- outTmpDoc.writeVInt(tokenStatsMaxPos);
- // clean up
- memoryIndexDocList.clear();
- } // end loop over docs
- inTmpDocsChained.close();
- closeables.remove(inTmpDocsChained);
- inTmpObject.close();
- closeables.remove(inTmpObject);
- }
- // clean up
- memoryTmpDocChainList.clear();
- // remove temporary files
- state.directory.deleteFile(mtasTmpObjectFileName);
- state.directory.deleteFile(mtasTmpDocsChainedFileName);
- // store references for field
- } // end processing field
- } // end loop fields
- // close temporary index doc
- outTmpDoc.close();
- closeables.remove(outTmpDoc);
- // close indexField, indexObjectId and object
- CodecUtil.writeFooter(outTmpField);
- outTmpField.close();
- closeables.remove(outTmpField);
- CodecUtil.writeFooter(outIndexObjectId);
- outIndexObjectId.close();
- closeables.remove(outIndexObjectId);
- CodecUtil.writeFooter(outObject);
- outObject.close();
- closeables.remove(outObject);
- CodecUtil.writeFooter(outTerm);
- outTerm.close();
- closeables.remove(outTerm);
- CodecUtil.writeFooter(outPrefix);
- outPrefix.close();
- closeables.remove(outPrefix);
- // create final doc, fill indexObjectPosition, indexObjectParent and
- // indexTermPrefixPosition, create final field
- IndexInput inTmpField = state.directory.openInput(mtasTmpFieldFileName,
- state.context);
- closeables.add(inTmpField);
- IndexInput inTmpDoc = state.directory.openInput(mtasTmpDocFileName,
- state.context);
- closeables.add(inTmpDoc);
- IndexInput inObjectId = state.directory
- .openInput(mtasIndexObjectIdFileName, state.context);
- closeables.add(inObjectId);
- IndexInput inObject = state.directory.openInput(mtasObjectFileName,
- state.context);
- closeables.add(inObject);
- IndexInput inTerm = state.directory.openInput(mtasTermFileName,
- state.context);
- closeables.add(inTerm);
- closeables.add(outField = state.directory
- .createOutput(mtasIndexFieldFileName, state.context));
- CodecUtil.writeIndexHeader(outField, name,
- MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
- state.segmentSuffix);
- outField.writeString(delegatePostingsFormatName);
- boolean doWrite = true;
- do {
- try {
- // read from tmpField
- String field = inTmpField.readString();
- long fpTmpDoc = inTmpField.readVLong();
- int numberDocs = inTmpField.readVInt();
- long fpTerm = inTmpField.readVLong();
- int numberTerms = inTmpField.readVInt();
- long fpPrefix = inTmpField.readVLong();
- int numberPrefixes = inTmpField.readVInt();
- inTmpDoc.seek(fpTmpDoc);
- long fpFirstDoc = outDoc.getFilePointer();
- // get prefixId index
- HashMap<String, Integer> prefixIdIndexField = prefixIdIndex
- .get(field);
- // construct MtasRBTree for indexDocId
- MtasRBTree mtasDocIdTree = new MtasRBTree(true, false);
- for (int docCounter = 0; docCounter < numberDocs; docCounter++) {
- // get info from tmpDoc
- int docId = inTmpDoc.readVInt();
- // filePointer indexObjectId
- Long fpIndexObjectId = inTmpDoc.readVLong();
- // filePointer indexObjectPosition (unknown)
- Long fpIndexObjectPosition;
- // filePointer indexObjectParent (unknown)
- Long fpIndexObjectParent;
- // constants for approximation object references for this document
- long smallestObjectFilepointer = inTmpDoc.readVLong();
- int objectRefApproxQuotient = inTmpDoc.readVInt();
- long objectRefApproxOffset = inTmpDoc.readZLong();
- byte storageFlags = inTmpDoc.readByte();
- // number objects/tokens
- int size = inTmpDoc.readVInt();
- // construct MtasRBTree
- MtasRBTree mtasPositionTree = new MtasRBTree(false, true);
- MtasRBTree mtasParentTree = new MtasRBTree(false, true);
- inObjectId.seek(fpIndexObjectId);
- long refCorrection;
- long ref;
- HashMap<String, HashSet<Integer>> docFieldAdministration = new HashMap<>();
- for (int mtasId = 0; mtasId < size; mtasId++) {
- if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
- refCorrection = inObjectId.readByte();
- } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
- refCorrection = inObjectId.readShort();
- } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
- refCorrection = inObjectId.readInt();
- } else {
- refCorrection = inObjectId.readLong();
- }
- ref = objectRefApproxOffset + mtasId * objectRefApproxQuotient
- + refCorrection;
- MtasTokenString token = MtasCodecPostingsFormat.getToken(inObject,
- inTerm, ref);
- String prefix = token.getPrefix();
- registerPrefixIntersection(field, prefix,
- token.getPositionStart(), token.getPositionEnd(),
- docFieldAdministration);
- int prefixId = prefixIdIndexField.containsKey(prefix)
- ? prefixIdIndexField.get(prefix) : 0;
- token.setPrefixId(prefixId);
- assert token.getId().equals(mtasId) : "unexpected mtasId "
- + mtasId;
- mtasPositionTree.addPositionAndObjectFromToken(token);
- mtasParentTree.addParentFromToken(token);
- }
- // store mtasPositionTree and mtasParentTree
- fpIndexObjectPosition = storeTree(mtasPositionTree,
- outIndexObjectPosition, smallestObjectFilepointer);
- fpIndexObjectParent = storeTree(mtasParentTree,
- outIndexObjectParent, smallestObjectFilepointer);
- long fpDoc = outDoc.getFilePointer();
- // create indexDoc with updated fpIndexObjectPosition from tmpDoc
- outDoc.writeVInt(docId); // docId
- // reference indexObjectId
- outDoc.writeVLong(fpIndexObjectId);
- // reference indexObjectPosition
- outDoc.writeVLong(fpIndexObjectPosition);
- // reference indexObjectParent
- outDoc.writeVLong(fpIndexObjectParent);
- // variables approximation and storage references object
- outDoc.writeVLong(smallestObjectFilepointer);
- outDoc.writeVInt(objectRefApproxQuotient);
- outDoc.writeZLong(objectRefApproxOffset);
- outDoc.writeByte(storageFlags);
- // number of objects
- outDoc.writeVInt(size);
- // minPosition
- outDoc.writeVInt(inTmpDoc.readVInt());
- // maxPosition
- outDoc.writeVInt(inTmpDoc.readVInt());
- // add to tree for indexDocId
- mtasDocIdTree.addIdFromDoc(docId, fpDoc);
- }
- long fpIndexDocId = storeTree(mtasDocIdTree, outIndexDocId,
- fpFirstDoc);
- // store in indexField
- outField.writeString(field);
- outField.writeVLong(fpFirstDoc);
- outField.writeVLong(fpIndexDocId);
- outField.writeVInt(numberDocs);
- outField.writeVLong(fpTerm);
- outField.writeVInt(numberTerms);
- outField.writeVLong(fpPrefix);
- outField.writeVInt(numberPrefixes);
- // register intersection
- fieldInfos.fieldInfo(field).putAttribute(
- MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_INTERSECTION,
- getPrefixStatsIntersectionPrefixAttribute(field));
- } catch (EOFException e) {
- log.debug(e);
- doWrite = false;
- }
- // end loop over fields
- } while (doWrite);
- inTerm.close();
- closeables.remove(inTerm);
- inObject.close();
- closeables.remove(inObject);
- inObjectId.close();
- closeables.remove(inObjectId);
- inTmpDoc.close();
- closeables.remove(inTmpDoc);
- inTmpField.close();
- closeables.remove(inTmpField);
- // remove temporary files
- state.directory.deleteFile(mtasTmpDocFileName);
- state.directory.deleteFile(mtasTmpFieldFileName);
- // close indexDoc, indexObjectPosition and indexObjectParent
- CodecUtil.writeFooter(outDoc);
- outDoc.close();
- closeables.remove(outDoc);
- CodecUtil.writeFooter(outIndexObjectPosition);
- outIndexObjectPosition.close();
- closeables.remove(outIndexObjectPosition);
- CodecUtil.writeFooter(outIndexObjectParent);
- outIndexObjectParent.close();
- closeables.remove(outIndexObjectParent);
- CodecUtil.writeFooter(outIndexDocId);
- outIndexDocId.close();
- closeables.remove(outIndexDocId);
- CodecUtil.writeFooter(outField);
- outField.close();
- closeables.remove(outField);
- } catch (IOException e) {
- // ignore, can happen when merging segment already written by
- // delegateFieldsConsumer
- log.error(e);
- } finally {
- IOUtils.closeWhileHandlingException(closeables);
- try {
- state.directory.deleteFile(mtasTmpDocsFileName);
- } catch (IOException e) {
- log.debug(e);
- }
- try {
- state.directory.deleteFile(mtasTmpDocFileName);
- } catch (IOException e) {
- log.debug(e);
- }
- try {
- state.directory.deleteFile(mtasTmpFieldFileName);
- } catch (IOException e) {
- log.debug(e);
- }
- }
- }
- /**
- * Creates the object and register prefix.
- *
- * @param field
- * the field
- * @param out
- * the out
- * @param term
- * the term
- * @param termRef
- * the term ref
- * @param startPosition
- * the start position
- * @param payload
- * the payload
- * @param outPrefix
- * the out prefix
- * @return the integer
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- private Integer createObjectAndRegisterPrefix(String field, IndexOutput out,
- BytesRef term, Long termRef, int startPosition, BytesRef payload,
- IndexOutput outPrefix) throws IOException {
- return createObjectAndRegisterPrefix(field, out, term, termRef,
- startPosition, payload, null, null, outPrefix);
- }
- /**
- * Creates the object and register prefix.
- *
- * @param field
- * the field
- * @param out
- * the out
- * @param term
- * the term
- * @param termRef
- * the term ref
- * @param startPosition
- * the start position
- * @param payload
- * the payload
- * @param startOffset
- * the start offset
- * @param endOffset
- * the end offset
- * @param outPrefix
- * the out prefix
- * @return the integer
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- private Integer createObjectAndRegisterPrefix(String field, IndexOutput out,
- BytesRef term, Long termRef, int startPosition, BytesRef payload,
- Integer startOffset, Integer endOffset, IndexOutput outPrefix)
- throws IOException {
- try {
- Integer mtasId = null;
- String prefix = MtasToken.getPrefixFromValue(term.utf8ToString());
- if (payload != null) {
- MtasPayloadDecoder payloadDecoder = new MtasPayloadDecoder();
- payloadDecoder.init(startPosition, Arrays.copyOfRange(payload.bytes,
- payload.offset, (payload.offset + payload.length)));
- mtasId = payloadDecoder.getMtasId();
- Integer mtasParentId = payloadDecoder.getMtasParentId();
- byte[] mtasPayload = payloadDecoder.getMtasPayload();
- MtasPosition mtasPosition = payloadDecoder.getMtasPosition();
- MtasOffset mtasOffset = payloadDecoder.getMtasOffset();
- if (mtasOffset == null && startOffset != null) {
- mtasOffset = new MtasOffset(startOffset, endOffset);
- }
- MtasOffset mtasRealOffset = payloadDecoder.getMtasRealOffset();
- // only if really mtas object
- if (mtasId != null) {
- // compute flags
- int objectFlags = 0;
- if (mtasPosition != null) {
- if (mtasPosition.checkType(MtasPosition.POSITION_RANGE)) {
- objectFlags = objectFlags
- | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE;
- registerPrefixStatsRangePositionValue(field, prefix, outPrefix);
- } else if (mtasPosition.checkType(MtasPosition.POSITION_SET)) {
- objectFlags = objectFlags
- | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET;
- registerPrefixStatsSetPositionValue(field, prefix, outPrefix);
- } else {
- registerPrefixStatsSinglePositionValue(field, prefix, outPrefix);
- }
- } else {
- throw new IOException("no position");
- }
- if (mtasParentId != null) {
- objectFlags = objectFlags
- | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT;
- }
- if (mtasOffset != null) {
- objectFlags = objectFlags
- | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET;
- }
- if (mtasRealOffset != null) {
- objectFlags = objectFlags
- | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET;
- }
- if (mtasPayload != null) {
- objectFlags = objectFlags
- | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD;
- }
- // create object
- out.writeVInt(mtasId);
- out.writeVInt(objectFlags);
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) {
- out.writeVInt(mtasParentId);
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) {
- int tmpStart = mtasPosition.getStart();
- out.writeVInt(tmpStart);
- out.writeVInt((mtasPosition.getEnd() - tmpStart));
- } else if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) {
- int[] positions = mtasPosition.getPositions();
- out.writeVInt(positions.length);
- int tmpPrevious = 0;
- for (int position : positions) {
- out.writeVInt((position - tmpPrevious));
- tmpPrevious = position;
- }
- } else {
- out.writeVInt(mtasPosition.getStart());
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) {
- int tmpStart = mtasOffset.getStart();
- out.writeVInt(mtasOffset.getStart());
- out.writeVInt((mtasOffset.getEnd() - tmpStart));
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) {
- int tmpStart = mtasRealOffset.getStart();
- out.writeVInt(mtasRealOffset.getStart());
- out.writeVInt((mtasRealOffset.getEnd() - tmpStart));
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) {
- if (mtasPayload != null) {
- out.writeVInt(mtasPayload.length);
- out.writeBytes(mtasPayload, mtasPayload.length);
- } else {
- out.writeVInt(0);
- }
- }
- out.writeVLong(termRef);
- } // storage token
- }
- return mtasId;
- } catch (Exception e) {
- log.error(e);
- throw new IOException(e);
- }
- }
- /**
- * Store tree.
- *
- * @param tree
- * the tree
- * @param out
- * the out
- * @param refApproxOffset
- * the ref approx offset
- * @return the long
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- private Long storeTree(MtasTree<?> tree, IndexOutput out,
- long refApproxOffset) throws IOException {
- return storeTree(tree.close(), tree.isSinglePoint(),
- tree.isStorePrefixAndTermRef(), out, null, refApproxOffset);
- }
- /**
- * Store tree.
- *
- * @param node
- * the node
- * @param isSinglePoint
- * the is single point
- * @param storeAdditionalInformation
- * the store additional information
- * @param out
- * the out
- * @param nodeRefApproxOffset
- * the node ref approx offset
- * @param refApproxOffset
- * the ref approx offset
- * @return the long
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- private Long storeTree(MtasTreeNode<?> node, boolean isSinglePoint,
- boolean storeAdditionalInformation, IndexOutput out,
- Long nodeRefApproxOffset, long refApproxOffset) throws IOException {
- Long localNodeRefApproxOffset = nodeRefApproxOffset;
- if (node != null) {
- Boolean isRoot = false;
- if (localNodeRefApproxOffset == null) {
- localNodeRefApproxOffset = out.getFilePointer();
- isRoot = true;
- }
- Long fpIndexObjectPositionLeftChild;
- Long fpIndexObjectPositionRightChild;
- if (node.leftChild != null) {
- fpIndexObjectPositionLeftChild = storeTree(node.leftChild,
- isSinglePoint, storeAdditionalInformation, out,
- localNodeRefApproxOffset, refApproxOffset);
- } else {
- fpIndexObjectPositionLeftChild = (long) 0; // tmp
- }
- if (node.rightChild != null) {
- fpIndexObjectPositionRightChild = storeTree(node.rightChild,
- isSinglePoint, storeAdditionalInformation, out,
- localNodeRefApproxOffset, refApproxOffset);
- } else {
- fpIndexObjectPositionRightChild = (long) 0; // tmp
- }
- Long fpIndexObjectPosition = out.getFilePointer();
- if (node.leftChild == null) {
- fpIndexObjectPositionLeftChild = fpIndexObjectPosition;
- }
- if (node.rightChild == null) {
- fpIndexObjectPositionRightChild = fpIndexObjectPosition;
- }
- if (isRoot) {
- assert localNodeRefApproxOffset >= 0 : "nodeRefApproxOffset < 0 : "
- + localNodeRefApproxOffset;
- out.writeVLong(localNodeRefApproxOffset);
- byte flag = 0;
- if (isSinglePoint) {
- flag |= MtasTree.SINGLE_POSITION_TREE;
- }
- if (storeAdditionalInformation) {
- flag |= MtasTree.STORE_ADDITIONAL_ID;
- }
- out.writeByte(flag);
- }
- assert node.left >= 0 : "node.left < 0 : " + node.left;
- out.writeVInt(node.left);
- assert node.right >= 0 : "node.right < 0 : " + node.right;
- out.writeVInt(node.right);
- assert node.max >= 0 : "node.max < 0 : " + node.max;
- out.writeVInt(node.max);
- assert fpIndexObjectPositionLeftChild >= localNodeRefApproxOffset : "fpIndexObjectPositionLeftChild<nodeRefApproxOffset : "
- + fpIndexObjectPositionLeftChild + " and " + localNodeRefApproxOffset;
- out.writeVLong(
- (fpIndexObjectPositionLeftChild - localNodeRefApproxOffset));
- assert fpIndexObjectPositionRightChild >= localNodeRefApproxOffset : "fpIndexObjectPositionRightChild<nodeRefApproxOffset"
- + fpIndexObjectPositionRightChild + " and "
- + localNodeRefApproxOffset;
- out.writeVLong(
- (fpIndexObjectPositionRightChild - localNodeRefApproxOffset));
- if (!isSinglePoint) {
- out.writeVInt(node.ids.size());
- }
- HashMap<Integer, MtasTreeNodeId> ids = node.ids;
- Long objectRefCorrected;
- long objectRefCorrectedPrevious = 0;
- // sort refs
- List<MtasTreeNodeId> nodeIds = new ArrayList<>(ids.values());
- Collections.sort(nodeIds);
- if (isSinglePoint && (nodeIds.size() != 1)) {
- throw new IOException("singlePoint tree, but missing single point...");
- }
- int counter = 0;
- for (MtasTreeNodeId nodeId : nodeIds) {
- counter++;
- objectRefCorrected = (nodeId.ref - refApproxOffset);
- assert objectRefCorrected >= objectRefCorrectedPrevious : "objectRefCorrected<objectRefCorrectedPrevious : "
- + objectRefCorrected + " and " + objectRefCorrectedPrevious;
- out.writeVLong((objectRefCorrected - objectRefCorrectedPrevious));
- objectRefCorrectedPrevious = objectRefCorrected;
- if (storeAdditionalInformation) {
- assert nodeId.additionalId >= 0 : "nodeId.additionalId < 0 for item "
- + counter + " : " + nodeId.additionalId;
- out.writeVInt(nodeId.additionalId);
- assert nodeId.additionalRef >= 0 : "nodeId.additionalRef < 0 for item "
- + counter + " : " + nodeId.additionalRef;
- out.writeVLong(nodeId.additionalRef);
- }
- }
- return fpIndexObjectPosition;
- } else {
- return null;
- }
- }
- /**
- * Token stats add.
- *
- * @param min
- * the min
- * @param max
- * the max
- */
- private void tokenStatsAdd(int min, int max) {
- tokenStatsNumber++;
- if (tokenStatsMinPos == null) {
- tokenStatsMinPos = min;
- } else {
- tokenStatsMinPos = Math.min(tokenStatsMinPos, min);
- }
- if (tokenStatsMaxPos == null) {
- tokenStatsMaxPos = max;
- } else {
- tokenStatsMaxPos = Math.max(tokenStatsMaxPos, max);
- }
- }
- /**
- * Copy object and update stats.
- *
- * @param id
- * the id
- * @param in
- * the in
- * @param inRef
- * the in ref
- * @param out
- * the out
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- private void copyObjectAndUpdateStats(int id, IndexInput in, Long inRef,
- IndexOutput out) throws IOException {
- int mtasId;
- int objectFlags;
- // read
- in.seek(inRef);
- mtasId = in.readVInt();
- assert id == mtasId : "wrong id detected while copying object";
- objectFlags = in.readVInt();
- out.writeVInt(mtasId);
- out.writeVInt(objectFlags);
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) {
- out.writeVInt(in.readVInt());
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) {
- int minPos = in.readVInt();
- int maxPos = in.readVInt();
- out.writeVInt(minPos);
- out.writeVInt(maxPos);
- tokenStatsAdd(minPos, maxPos);
- } else if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) {
- int size = in.readVInt();
- out.writeVInt(size);
- SortedSet<Integer> list = new TreeSet<>();
- int previousPosition = 0;
- for (int t = 0; t < size; t++) {
- int pos = in.readVInt();
- out.writeVInt(pos);
- previousPosition = (pos + previousPosition);
- list.add(previousPosition);
- }
- assert list.size() == size : "duplicate positions in set are not allowed";
- tokenStatsAdd(list.first(), list.last());
- } else {
- int pos = in.readVInt();
- out.writeVInt(pos);
- tokenStatsAdd(pos, pos);
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) {
- out.writeVInt(in.readVInt());
- out.writeVInt(in.readVInt());
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) {
- out.writeVInt(in.readVInt());
- out.writeVInt(in.readVInt());
- }
- if ((objectFlags
- & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) {
- int length = in.readVInt();
- out.writeVInt(length);
- byte[] payload = new byte[length];
- in.readBytes(payload, 0, length);
- out.writeBytes(payload, payload.length);
- }
- out.writeVLong(in.readVLong());
- }
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.codecs.FieldsConsumer#close()
- */
- @Override
- public void close() throws IOException {
- delegateFieldsConsumer.close();
- }
- }