MtasFieldsConsumer.java

  1. package mtas.codec;

  2. import java.io.Closeable;
  3. import java.io.EOFException;
  4. import java.io.IOException;
  5. import java.util.ArrayList;
  6. import java.util.Arrays;
  7. import java.util.Collections;
  8. import java.util.HashMap;
  9. import java.util.HashSet;
  10. import java.util.List;
  11. import java.util.Map.Entry;
  12. import java.util.SortedMap;
  13. import java.util.SortedSet;
  14. import java.util.TreeMap;
  15. import java.util.TreeSet;

  16. import mtas.analysis.token.MtasOffset;
  17. import mtas.analysis.token.MtasPosition;
  18. import mtas.analysis.token.MtasToken;
  19. import mtas.analysis.token.MtasTokenString;
  20. import mtas.codec.payload.MtasPayloadDecoder;
  21. import mtas.codec.tree.MtasRBTree;
  22. import mtas.codec.tree.MtasTree;
  23. import mtas.codec.tree.MtasTreeNode;
  24. import mtas.codec.tree.MtasTreeNodeId;

  25. import org.apache.commons.logging.Log;
  26. import org.apache.commons.logging.LogFactory;
  27. import org.apache.lucene.codecs.CodecUtil;
  28. import org.apache.lucene.codecs.FieldsConsumer;
  29. import org.apache.lucene.codecs.FieldsProducer;
  30. import org.apache.lucene.index.FieldInfo;
  31. import org.apache.lucene.index.FieldInfos;
  32. import org.apache.lucene.index.Fields;
  33. import org.apache.lucene.index.IndexFileNames;
  34. import org.apache.lucene.index.MappedMultiFields;
  35. import org.apache.lucene.index.MergeState;
  36. import org.apache.lucene.index.MultiFields;
  37. import org.apache.lucene.index.PostingsEnum;
  38. import org.apache.lucene.index.ReaderSlice;
  39. import org.apache.lucene.index.SegmentWriteState;
  40. import org.apache.lucene.index.Terms;
  41. import org.apache.lucene.index.TermsEnum;
  42. import org.apache.lucene.search.DocIdSetIterator;
  43. import org.apache.lucene.store.IndexInput;
  44. import org.apache.lucene.store.IndexOutput;
  45. import org.apache.lucene.util.BytesRef;
  46. import org.apache.lucene.util.IOUtils;

  47. /**
  48.  * The Class MtasFieldsConsumer.
  49.  */

  50. /**
  51.  * The Class MtasFieldsConsumer constructs several temporal and permanent files
  52.  * to provide a forward index
  53.  *
  54.  * <ul>
  55.  * <li><b>Temporary files</b><br>
  56.  * <ul>
  57.  * <li><b>Temporary file {@link #mtasTmpFieldFileName} with extension
  58.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_FIELD_EXTENSION} </b><br>
  59.  * Contains for each field a reference to the list of documents. Structure of
  60.  * content:
  61.  * <ul>
  62.  * <li><b>String</b>: field</li>
  63.  * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li>
  64.  * <li><b>VInt</b>: number of documents</li>
  65.  * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li>
  66.  * <li><b>VInt</b>: number of terms</li>
  67.  * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li>
  68.  * <li><b>VInt</b>: number of prefixes</li>
  69.  * </ul>
  70.  * </li>
  71.  * <li><b>Temporary file {@link #mtasTmpObjectFileName} with extension
  72.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_OBJECT_EXTENSION}</b><br>
  73.  * Contains for a specific field all objects constructed by
  74.  * {@link createObjectAndRegisterPrefix}. For all fields, the objects are later
  75.  * on copied to {@link #mtasObjectFileName} while statistics are collected.
  76.  * Structure of content identical to {@link #mtasObjectFileName}.</li>
  77.  * <li><b>Temporary file {@link #mtasTmpDocsFileName} with extension
  78.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_EXTENSION}</b> <br>
  79.  * Contains for a specific field for each doc multiple fragments. Each occurring
  80.  * term results in a fragment. Structure of content:
  81.  * <ul>
  82.  * <li><b>VInt</b>: docId</li>
  83.  * <li><b>VInt</b>: number of objects in this fragment</li>
  84.  * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li>
  85.  * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in
  86.  * {@link #mtasTmpObjectFileName} minus offset</li>
  87.  * <li><b>VInt</b>,<b>VLong</b>: ...</li>
  88.  * </ul>
  89.  * </li>
  90.  * <li><b>Temporary file {@link #mtasTmpDocsChainedFileName} with extension
  91.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_CHAINED_EXTENSION}
  92.  * </b><br>
  93.  * Contains for a specific field for each doc multiple chained fragments.
  94.  * Structure of content:
  95.  * <ul>
  96.  * <li><b>VInt</b>: docId</li>
  97.  * <li><b>VInt</b>: number of objects in this fragment</li>
  98.  * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li>
  99.  * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in
  100.  * {@link #mtasTmpObjectFileName} minus offset</li>
  101.  * <li><b>VInt</b>,<b>VLong</b>: ...</li>
  102.  * <li><b>VLong</b>: reference to next fragment in
  103.  * {@link #mtasTmpDocsChainedFileName}, self reference indicates end of chain
  104.  * </ul>
  105.  * </li>
  106.  * <li><b>Temporary file {@link #mtasTmpDocFileName} with extension
  107.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOC_EXTENSION}</b><br>
  108.  * For each document
  109.  * <ul>
  110.  * <li><b>VInt</b>: docId</li>
  111.  * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li>
  112.  * <li><b>VLong</b>: reference first object, used as offset for tree index
  113.  * <li><b>VInt</b>: slope used in approximation reference objects index on id
  114.  * </li>
  115.  * <li><b>ZLong</b>: offset used in approximation reference objects index on id
  116.  * </li>
  117.  * <li><b>Byte</b>: flag indicating how corrections on the approximation
  118.  * references objects for the index on id are stored:
  119.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE},
  120.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT},
  121.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or
  122.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li>
  123.  * <li><b>VInt</b>: number of objects in this document</li>
  124.  * <li><b>VInt</b>: first position</li>
  125.  * <li><b>VInt</b>: last position</li>
  126.  * </ul>
  127.  * </li>
  128.  * </ul>
  129.  * </li>
  130.  * <li><b>Final files</b><br>
  131.  * <ul>
  132.  * <li><b>File {@link #mtasIndexFieldFileName} with extension
  133.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_FIELD_EXTENSION}</b><br>
  134.  * Contains for each field a reference to the list of documents and the
  135.  * prefixes. Structure of content:
  136.  * <ul>
  137.  * <li><b>String</b>: field</li>
  138.  * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li>
  139.  * <li><b>VLong</b>: reference to {@link #mtasIndexDocIdFileName}</li>
  140.  * <li><b>VInt</b>: number of documents</li>
  141.  * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li>
  142.  * <li><b>VInt</b>: number of terms</li>
  143.  * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li>
  144.  * <li><b>VInt</b>: number of prefixes</li>
  145.  * </ul>
  146.  * </li>
  147.  * <li><b>File {@link #mtasTermFileName} with extension
  148.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TERM_EXTENSION}</b><br>
  149.  * For each field, all unique terms are stored here. Structure of content:
  150.  * <ul>
  151.  * <li><b>String</b>: term</li>
  152.  * </ul>
  153.  * </li>
  154.  * <li><b>File {@link #mtasPrefixFileName} with extension
  155.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_PREFIX_EXTENSION}</b><br>
  156.  * For each field, all unique prefixes are stored here. Structure of content:
  157.  * <ul>
  158.  * <li><b>String</b>: prefix</li>
  159.  * </ul>
  160.  * </li>
  161.  * <li><b>File {@link #mtasObjectFileName} with extension
  162.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_OBJECT_EXTENSION}</b><br>
  163.  * Contains all objects for all fields. Structure of content:
  164.  * <ul>
  165.  * <li><b>VInt</b>: mtasId</li>
  166.  * <li><b>VInt</b>: objectFlags
  167.  * <ul>
  168.  * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}</li>
  169.  * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}</li>
  170.  * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}</li>
  171.  * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}</li>
  172.  * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}</li>
  173.  * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}</li>
  174.  * </ul>
  175.  * </li>
  176.  * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}<br>
  177.  * <b>VInt</b>: parentId
  178.  * <li>Only if
  179.  * {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}<br>
  180.  * <b>VInt</b>,<b>VInt</b>: startPosition and (endPosition-startPosition)
  181.  * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br>
  182.  * <b>VInt</b>,<b>VInt</b>,<b>VInt</b>,...: number of positions, firstPosition,
  183.  * (position-previousPosition),...
  184.  * <li>Only if no {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}
  185.  * or {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br>
  186.  * <b>VInt</b>: position
  187.  * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}<br>
  188.  * <b>VInt</b>,<b>VInt</b>: startOffset, (endOffset-startOffset)
  189.  * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}<br>
  190.  * <b>VInt</b>,<b>VInt</b>: startRealOffset, (endRealOffset-startRealOffset)
  191.  * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}<br>
  192.  * <b>VInt</b>,<b>Bytes</b>: number of bytes, payload
  193.  * <li><b>VLong</b>: reference to Term in {@link #mtasTermFileName}</li>
  194.  * </ul>
  195.  * </li>
  196.  * <li><b>File {@link #mtasIndexDocIdFileName} with extension
  197.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_DOC_ID_EXTENSION}
  198.  * </b><br>
  199.  * Contains for each field a tree structure {@link MtasTree} to search reference
  200.  * to {@link #mtasDocFileName} by id. Structure of content for each node:
  201.  * <ul>
  202.  * <li><b>VLong</b>: offset references to {@link #mtasIndexDocIdFileName}, only
  203.  * available in root node</li>
  204.  * <li><b>Byte</b>: flag, should be zero for this tree, only available in root
  205.  * node</li>
  206.  * <li><b>VInt</b>: left</li>
  207.  * <li><b>VInt</b>: right</li>
  208.  * <li><b>VInt</b>: max</li>
  209.  * <li><b>VLong</b>: left reference to {@link #mtasIndexDocIdFileName} minus the
  210.  * offset stored in the root node</li>
  211.  * <li><b>VLong</b>: right reference to {@link #mtasIndexDocIdFileName} minus
  212.  * the offset stored in the root node</li>
  213.  * <li><b>VInt</b>: number of objects on this node (always 1 for this tree)</li>
  214.  * <li><b>VLong</b>: reference to {@link #mtasDocFileName} minus offset</li>
  215.  * </ul>
  216.  * </li>
  217.  * <li><b>File {@link #mtasDocFileName} with extension
  218.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_DOC_EXTENSION}</b><br>
  219.  * For each document
  220.  * <ul>
  221.  * <li><b>VInt</b>: docId</li>
  222.  * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li>
  223.  * <li><b>VLong</b>: reference to {@link #mtasIndexObjectPositionFileName}</li>
  224.  * <li><b>VLong</b>: reference to {@link #mtasIndexObjectParentFileName}</li>
  225.  * <li><b>VLong</b>: reference first object, used as offset for tree index
  226.  * <li><b>VInt</b>: slope used in approximation reference objects index on id
  227.  * </li>
  228.  * <li><b>ZLong</b>: offset used in approximation reference objects index on id
  229.  * </li>
  230.  * <li><b>Byte</b>: flag indicating how corrections on the approximation
  231.  * references objects for the index on id are stored:
  232.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE},
  233.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT},
  234.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or
  235.  * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li>
  236.  * <li><b>VInt</b>: number of objects</li>
  237.  * <li><b>VInt</b>: first position</li>
  238.  * <li><b>VInt</b>: last position</li>
  239.  * </ul>
  240.  * </li>
  241.  * <li><b>File {@link #mtasIndexObjectIdFileName} with extension
  242.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_ID_EXTENSION}
  243.  * </b><br>
  244.  * Provides for each mtasId the reference to {@link #mtasObjectFileName}. These
  245.  * references are grouped by document, sorted by mtasId, and because the
  246.  * mtasId's for each document will always start with 0 and are sequential
  247.  * without gaps, a reference can be computed if the position of the first
  248.  * reference for a document is known from {@link #mtasDocFileName}. The
  249.  * reference is approximated by the reference to the first object plus the
  250.  * mtasId times a slope. Only a correction to this approximation is stored.
  251.  * Structure of content:
  252.  * <ul>
  253.  * <li><b>Byte</b>/<b>Short</b>/<b>Int</b>/<b>Long</b>: correction reference to
  254.  * {@link #mtasObjectFileName}</li>
  255.  * </ul>
  256.  * </li>
  257.  * <li><b>File {@link #mtasIndexObjectPositionFileName} with extension
  258.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_POSITION_EXTENSION}
  259.  * </b><br>
  260.  * Contains for each document a tree structure {@link MtasTree} to search
  261.  * objects by position. Structure of content for each node:
  262.  * <ul>
  263.  * <li><b>VLong</b>: offset references to
  264.  * {@link #mtasIndexObjectPositionFileName}, only available in root node</li>
  265.  * <li><b>Byte</b>: flag, should be zero for this tree, only available in root
  266.  * node</li>
  267.  * <li><b>VInt</b>: left</li>
  268.  * <li><b>VInt</b>: right</li>
  269.  * <li><b>VInt</b>: max</li>
  270.  * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectPositionFileName}
  271.  * minus the offset stored in the root node</li>
  272.  * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectPositionFileName}
  273.  * minus the offset stored in the root node</li>
  274.  * <li><b>VInt</b>: number of objects on this node</li>
  275.  * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to
  276.  * {@link #mtasObjectFileName} minus offset, the prefixId referring to the
  277.  * position the prefix in {@link #mtasPrefixFileName} and the reference to
  278.  * {@link #mtasTermFileName} minus offset</li>
  279.  * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of
  280.  * reference to {@link #mtasObjectFileName}, position of the prefix in
  281.  * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName};
  282.  * for the first item the difference between this reference minus the previous
  283.  * reference is stored</li>
  284.  * </ul>
  285.  * </li>
  286.  * <li><b>File {@link #mtasIndexObjectParentFileName} with extension
  287.  * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_PARENT_EXTENSION}
  288.  * </b><br>
  289.  * Contains for each document a tree structure {@link MtasTree} to search
  290.  * objects by parent. Structure of content for each node:
  291.  * <ul>
  292.  * <li><b>VLong</b>: offset references to {@link #mtasIndexObjectParentFileName}
  293.  * , only available in root node</li>
  294.  * <li><b>Byte</b>: flag, for this tree equal to
  295.  * {@link mtas.codec.tree.MtasTree#SINGLE_POSITION_TREE} indicating a tree with
  296.  * exactly one point at each node, only available in root node</li>
  297.  * <li><b>VInt</b>: left</li>
  298.  * <li><b>VInt</b>: right</li>
  299.  * <li><b>VInt</b>: max</li>
  300.  * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectParentFileName}
  301.  * minus the offset stored in the root node</li>
  302.  * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectParentFileName}
  303.  * minus the offset stored in the root node</li>
  304.  * <li><b>VInt</b>: number of objects on this node</li>
  305.  * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to
  306.  * {@link #mtasObjectFileName} minus offset, the prefixId referring to the
  307.  * position the prefix in {@link #mtasPrefixFileName} and the reference to
  308.  * {@link #mtasTermFileName} minus offset</li>
  309.  * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of
  310.  * reference to {@link #mtasObjectFileName}, position of the prefix in
  311.  * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName};
  312.  * for the first item the difference between this reference minus the previous
  313.  * reference is stored</li>
  314.  * </ul>
  315.  * </li>
  316.  * </ul>
  317.  * </li>
  318.  * </ul>
  319.  *
  320.  */
  321. public class MtasFieldsConsumer extends FieldsConsumer {

  322.   /** The Constant log. */
  323.   private static final Log log = LogFactory.getLog(MtasFieldsConsumer.class);

  324.   /** The delegate fields consumer. */
  325.   private FieldsConsumer delegateFieldsConsumer;

  326.   /** The state. */
  327.   private SegmentWriteState state;

  328.   /** The intersecting prefixes. */
  329.   private HashMap<String, HashSet<String>> intersectingPrefixes;

  330.   /** The single position prefix. */
  331.   private HashMap<String, HashSet<String>> singlePositionPrefix;

  332.   /** The multiple position prefix. */
  333.   private HashMap<String, HashSet<String>> multiplePositionPrefix;

  334.   /** The set position prefix. */
  335.   private HashMap<String, HashSet<String>> setPositionPrefix;

  336.   /** The prefix reference index. */
  337.   private HashMap<String, HashMap<String, Long>> prefixReferenceIndex;

  338.   /** The prefix id index. */
  339.   private HashMap<String, HashMap<String, Integer>> prefixIdIndex;

  340.   /** The token stats min pos. */
  341.   Integer tokenStatsMinPos;

  342.   /** The token stats max pos. */
  343.   Integer tokenStatsMaxPos;

  344.   /** The token stats number. */
  345.   Integer tokenStatsNumber;

  346.   /** The mtas tmp field file name. */
  347.   private String mtasTmpFieldFileName;

  348.   /** The mtas tmp object file name. */
  349.   private String mtasTmpObjectFileName;

  350.   /** The mtas tmp docs file name. */
  351.   private String mtasTmpDocsFileName;

  352.   /** The mtas tmp doc file name. */
  353.   private String mtasTmpDocFileName;

  354.   /** The mtas tmp docs chained file name. */
  355.   private String mtasTmpDocsChainedFileName;

  356.   /** The mtas object file name. */
  357.   private String mtasObjectFileName;

  358.   /** The mtas term file name. */
  359.   private String mtasTermFileName;

  360.   /** The mtas index field file name. */
  361.   private String mtasIndexFieldFileName;

  362.   /** The mtas prefix file name. */
  363.   private String mtasPrefixFileName;

  364.   /** The mtas doc file name. */
  365.   private String mtasDocFileName;

  366.   /** The mtas index doc id file name. */
  367.   private String mtasIndexDocIdFileName;

  368.   /** The mtas index object id file name. */
  369.   private String mtasIndexObjectIdFileName;

  370.   /** The mtas index object position file name. */
  371.   private String mtasIndexObjectPositionFileName;

  372.   /** The mtas index object parent file name. */
  373.   private String mtasIndexObjectParentFileName;

  374.   /** The name. */
  375.   private String name;

  376.   /** The delegate postings format name. */
  377.   private String delegatePostingsFormatName;

  378.   /**
  379.    * Instantiates a new mtas fields consumer.
  380.    *
  381.    * @param fieldsConsumer
  382.    *          the fields consumer
  383.    * @param state
  384.    *          the state
  385.    * @param name
  386.    *          the name
  387.    * @param delegatePostingsFormatName
  388.    *          the delegate postings format name
  389.    */
  390.   public MtasFieldsConsumer(FieldsConsumer fieldsConsumer,
  391.       SegmentWriteState state, String name, String delegatePostingsFormatName) {
  392.     this.delegateFieldsConsumer = fieldsConsumer;
  393.     this.state = state;
  394.     this.name = name;
  395.     this.delegatePostingsFormatName = delegatePostingsFormatName;
  396.     // temporary fileNames
  397.     mtasTmpFieldFileName = IndexFileNames.segmentFileName(
  398.         state.segmentInfo.name, state.segmentSuffix,
  399.         MtasCodecPostingsFormat.MTAS_TMP_FIELD_EXTENSION);
  400.     mtasTmpObjectFileName = IndexFileNames.segmentFileName(
  401.         state.segmentInfo.name, state.segmentSuffix,
  402.         MtasCodecPostingsFormat.MTAS_TMP_OBJECT_EXTENSION);
  403.     mtasTmpDocsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
  404.         state.segmentSuffix, MtasCodecPostingsFormat.MTAS_TMP_DOCS_EXTENSION);
  405.     mtasTmpDocFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
  406.         state.segmentSuffix, MtasCodecPostingsFormat.MTAS_TMP_DOC_EXTENSION);
  407.     mtasTmpDocsChainedFileName = IndexFileNames.segmentFileName(
  408.         state.segmentInfo.name, state.segmentSuffix,
  409.         MtasCodecPostingsFormat.MTAS_TMP_DOCS_CHAINED_EXTENSION);
  410.     // fileNames
  411.     mtasObjectFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
  412.         state.segmentSuffix, MtasCodecPostingsFormat.MTAS_OBJECT_EXTENSION);
  413.     mtasTermFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
  414.         state.segmentSuffix, MtasCodecPostingsFormat.MTAS_TERM_EXTENSION);
  415.     mtasIndexFieldFileName = IndexFileNames.segmentFileName(
  416.         state.segmentInfo.name, state.segmentSuffix,
  417.         MtasCodecPostingsFormat.MTAS_FIELD_EXTENSION);
  418.     mtasPrefixFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
  419.         state.segmentSuffix, MtasCodecPostingsFormat.MTAS_PREFIX_EXTENSION);
  420.     mtasDocFileName = IndexFileNames.segmentFileName(state.segmentInfo.name,
  421.         state.segmentSuffix, MtasCodecPostingsFormat.MTAS_DOC_EXTENSION);
  422.     mtasIndexDocIdFileName = IndexFileNames.segmentFileName(
  423.         state.segmentInfo.name, state.segmentSuffix,
  424.         MtasCodecPostingsFormat.MTAS_INDEX_DOC_ID_EXTENSION);
  425.     mtasIndexObjectIdFileName = IndexFileNames.segmentFileName(
  426.         state.segmentInfo.name, state.segmentSuffix,
  427.         MtasCodecPostingsFormat.MTAS_INDEX_OBJECT_ID_EXTENSION);
  428.     mtasIndexObjectPositionFileName = IndexFileNames.segmentFileName(
  429.         state.segmentInfo.name, state.segmentSuffix,
  430.         MtasCodecPostingsFormat.MTAS_INDEX_OBJECT_POSITION_EXTENSION);
  431.     mtasIndexObjectParentFileName = IndexFileNames.segmentFileName(
  432.         state.segmentInfo.name, state.segmentSuffix,
  433.         MtasCodecPostingsFormat.MTAS_INDEX_OBJECT_PARENT_EXTENSION);
  434.   }

  435.   /**
  436.    * Register prefix.
  437.    *
  438.    * @param field
  439.    *          the field
  440.    * @param prefix
  441.    *          the prefix
  442.    * @param outPrefix
  443.    *          the out prefix
  444.    * @throws IOException
  445.    *           Signals that an I/O exception has occurred.
  446.    */
  447.   private void registerPrefix(String field, String prefix,
  448.       IndexOutput outPrefix) throws IOException {
  449.     if (!prefixReferenceIndex.containsKey(field)) {
  450.       prefixReferenceIndex.put(field, new HashMap<String, Long>());
  451.       prefixIdIndex.put(field, new HashMap<String, Integer>());
  452.     }
  453.     if (!prefixReferenceIndex.get(field).containsKey(prefix)) {
  454.       int id = 1 + prefixReferenceIndex.get(field).size();
  455.       prefixReferenceIndex.get(field).put(prefix, outPrefix.getFilePointer());
  456.       prefixIdIndex.get(field).put(prefix, id);
  457.       outPrefix.writeString(prefix);
  458.     }
  459.   }

  460.   /**
  461.    * Register prefix intersection.
  462.    *
  463.    * @param field
  464.    *          the field
  465.    * @param prefix
  466.    *          the prefix
  467.    * @param start
  468.    *          the start
  469.    * @param end
  470.    *          the end
  471.    * @param docFieldAdministration
  472.    *          the doc field administration
  473.    */
  474.   private void registerPrefixIntersection(String field, String prefix,
  475.       int start, int end,
  476.       HashMap<String, HashSet<Integer>> docFieldAdministration) {
  477.     if (!intersectingPrefixes.containsKey(field)) {
  478.       intersectingPrefixes.put(field, new HashSet<String>());
  479.     } else if (intersectingPrefixes.get(field).contains(prefix)) {
  480.       return;
  481.     }
  482.     HashSet<Integer> docFieldPrefixAdministration;
  483.     if (!docFieldAdministration.containsKey(prefix)) {
  484.       docFieldPrefixAdministration = new HashSet<>();
  485.       docFieldAdministration.put(prefix, docFieldPrefixAdministration);
  486.     } else {
  487.       docFieldPrefixAdministration = docFieldAdministration.get(prefix);
  488.       // check
  489.       for (int p = start; p <= end; p++) {
  490.         if (docFieldPrefixAdministration.contains(p)) {
  491.           intersectingPrefixes.get(field).add(prefix);
  492.           docFieldAdministration.remove(prefix);
  493.           return;
  494.         }
  495.       }
  496.     }
  497.     // update
  498.     for (int p = start; p <= end; p++) {
  499.       docFieldPrefixAdministration.add(p);
  500.     }
  501.   }

  502.   /**
  503.    * Register prefix stats single position value.
  504.    *
  505.    * @param field
  506.    *          the field
  507.    * @param prefix
  508.    *          the prefix
  509.    * @param outPrefix
  510.    *          the out prefix
  511.    * @throws IOException
  512.    *           Signals that an I/O exception has occurred.
  513.    */
  514.   public void registerPrefixStatsSinglePositionValue(String field,
  515.       String prefix, IndexOutput outPrefix) throws IOException {
  516.     initPrefixStatsField(field);
  517.     registerPrefix(field, prefix, outPrefix);
  518.     if (!multiplePositionPrefix.get(field).contains(prefix)) {
  519.       singlePositionPrefix.get(field).add(prefix);
  520.     }
  521.   }

  522.   /**
  523.    * Register prefix stats range position value.
  524.    *
  525.    * @param field
  526.    *          the field
  527.    * @param prefix
  528.    *          the prefix
  529.    * @param outPrefix
  530.    *          the out prefix
  531.    * @throws IOException
  532.    *           Signals that an I/O exception has occurred.
  533.    */
  534.   public void registerPrefixStatsRangePositionValue(String field, String prefix,
  535.       IndexOutput outPrefix) throws IOException {
  536.     initPrefixStatsField(field);
  537.     registerPrefix(field, prefix, outPrefix);
  538.     singlePositionPrefix.get(field).remove(prefix);
  539.     multiplePositionPrefix.get(field).add(prefix);
  540.   }

  541.   /**
  542.    * Register prefix stats set position value.
  543.    *
  544.    * @param field
  545.    *          the field
  546.    * @param prefix
  547.    *          the prefix
  548.    * @param outPrefix
  549.    *          the out prefix
  550.    * @throws IOException
  551.    *           Signals that an I/O exception has occurred.
  552.    */
  553.   public void registerPrefixStatsSetPositionValue(String field, String prefix,
  554.       IndexOutput outPrefix) throws IOException {
  555.     initPrefixStatsField(field);
  556.     registerPrefix(field, prefix, outPrefix);
  557.     singlePositionPrefix.get(field).remove(prefix);
  558.     multiplePositionPrefix.get(field).add(prefix);
  559.     setPositionPrefix.get(field).add(prefix);
  560.   }

  561.   /**
  562.    * Inits the prefix stats field.
  563.    *
  564.    * @param field
  565.    *          the field
  566.    */
  567.   private void initPrefixStatsField(String field) {
  568.     if (!singlePositionPrefix.containsKey(field)) {
  569.       singlePositionPrefix.put(field, new HashSet<String>());
  570.     }
  571.     if (!multiplePositionPrefix.containsKey(field)) {
  572.       multiplePositionPrefix.put(field, new HashSet<String>());
  573.     }
  574.     if (!setPositionPrefix.containsKey(field)) {
  575.       setPositionPrefix.put(field, new HashSet<String>());
  576.     }
  577.   }

  578.   /**
  579.    * Gets the prefix stats single position prefix attribute.
  580.    *
  581.    * @param field
  582.    *          the field
  583.    * @return the prefix stats single position prefix attribute
  584.    */
  585.   public String getPrefixStatsSinglePositionPrefixAttribute(String field) {
  586.     return String.join(MtasToken.DELIMITER, singlePositionPrefix.get(field));
  587.   }

  588.   /**
  589.    * Gets the prefix stats multiple position prefix attribute.
  590.    *
  591.    * @param field
  592.    *          the field
  593.    * @return the prefix stats multiple position prefix attribute
  594.    */
  595.   public String getPrefixStatsMultiplePositionPrefixAttribute(String field) {
  596.     return String.join(MtasToken.DELIMITER, multiplePositionPrefix.get(field));
  597.   }

  598.   /**
  599.    * Gets the prefix stats set position prefix attribute.
  600.    *
  601.    * @param field
  602.    *          the field
  603.    * @return the prefix stats set position prefix attribute
  604.    */
  605.   public String getPrefixStatsSetPositionPrefixAttribute(String field) {
  606.     return String.join(MtasToken.DELIMITER, setPositionPrefix.get(field));
  607.   }

  608.   /**
  609.    * Gets the prefix stats intersection prefix attribute.
  610.    *
  611.    * @param field
  612.    *          the field
  613.    * @return the prefix stats intersection prefix attribute
  614.    */
  615.   public String getPrefixStatsIntersectionPrefixAttribute(String field) {
  616.     if (intersectingPrefixes.containsKey(field)) {
  617.       return String.join(MtasToken.DELIMITER, intersectingPrefixes.get(field));
  618.     } else {
  619.       return "";
  620.     }
  621.   }

  622.   /*
  623.    * (non-Javadoc)
  624.    *
  625.    * @see org.apache.lucene.codecs.FieldsConsumer#merge(org.apache.lucene.index.
  626.    * MergeState)
  627.    */
  628.   @Override
  629.   public void merge(MergeState mergeState) throws IOException {
  630.     final List<Fields> fields = new ArrayList<>();
  631.     final List<ReaderSlice> slices = new ArrayList<>();

  632.     int docBase = 0;

  633.     for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
  634.       final FieldsProducer f = mergeState.fieldsProducers[readerIndex];

  635.       final int maxDoc = mergeState.maxDocs[readerIndex];
  636.       f.checkIntegrity();
  637.       slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
  638.       fields.add(f);
  639.       docBase += maxDoc;
  640.     }

  641.     Fields mergedFields = new MappedMultiFields(mergeState,
  642.         new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
  643.             slices.toArray(ReaderSlice.EMPTY_ARRAY)));
  644.     write(mergedFields);
  645.   }

  646.   /*
  647.    * (non-Javadoc)
  648.    *
  649.    * @see org.apache.lucene.codecs.FieldsConsumer#write(org.apache.lucene.index.
  650.    * Fields )
  651.    */
  652.   @Override
  653.   public void write(Fields fields) throws IOException {
  654.     delegateFieldsConsumer.write(fields);
  655.     write(state.fieldInfos, fields);
  656.   }

  657.   /**
  658.    * Write.
  659.    *
  660.    * @param fieldInfos
  661.    *          the field infos
  662.    * @param fields
  663.    *          the fields
  664.    */
  665.   private void write(FieldInfos fieldInfos, Fields fields) {
  666.     IndexOutput outField;
  667.     IndexOutput outDoc;
  668.     IndexOutput outIndexDocId;
  669.     IndexOutput outIndexObjectId;
  670.     IndexOutput outIndexObjectPosition;
  671.     IndexOutput outIndexObjectParent;
  672.     IndexOutput outTerm;
  673.     IndexOutput outObject;
  674.     IndexOutput outPrefix;
  675.     IndexOutput outTmpDoc;
  676.     IndexOutput outTmpField;
  677.     HashSet<Closeable> closeables = new HashSet<>();
  678.     // prefix stats
  679.     intersectingPrefixes = new HashMap<>();
  680.     singlePositionPrefix = new HashMap<>();
  681.     multiplePositionPrefix = new HashMap<>();
  682.     setPositionPrefix = new HashMap<>();
  683.     prefixReferenceIndex = new HashMap<>();
  684.     prefixIdIndex = new HashMap<>();
  685.     // temporary temporary index in memory for doc
  686.     SortedMap<Integer, Long> memoryIndexTemporaryObject = new TreeMap<>();
  687.     // create (backwards) chained new temporary index docs
  688.     SortedMap<Integer, Long> memoryTmpDocChainList = new TreeMap<>();
  689.     // list of objectIds and references to objects
  690.     SortedMap<Integer, Long> memoryIndexDocList = new TreeMap<>();

  691.     try {
  692.       // create file tmpDoc
  693.       closeables.add(outTmpDoc = state.directory
  694.           .createOutput(mtasTmpDocFileName, state.context));
  695.       // create file tmpField
  696.       closeables.add(outTmpField = state.directory
  697.           .createOutput(mtasTmpFieldFileName, state.context));
  698.       // create file indexDoc
  699.       closeables.add(outDoc = state.directory.createOutput(mtasDocFileName,
  700.           state.context));
  701.       CodecUtil.writeIndexHeader(outDoc, name,
  702.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  703.           state.segmentSuffix);
  704.       outDoc.writeString(delegatePostingsFormatName);
  705.       // create file indexDocId
  706.       closeables.add(outIndexDocId = state.directory
  707.           .createOutput(mtasIndexDocIdFileName, state.context));
  708.       CodecUtil.writeIndexHeader(outIndexDocId, name,
  709.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  710.           state.segmentSuffix);
  711.       outIndexDocId.writeString(delegatePostingsFormatName);
  712.       // create file indexObjectId
  713.       closeables.add(outIndexObjectId = state.directory
  714.           .createOutput(mtasIndexObjectIdFileName, state.context));
  715.       CodecUtil.writeIndexHeader(outIndexObjectId, name,
  716.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  717.           state.segmentSuffix);
  718.       outIndexObjectId.writeString(delegatePostingsFormatName);
  719.       // create file indexObjectPosition
  720.       closeables.add(outIndexObjectPosition = state.directory
  721.           .createOutput(mtasIndexObjectPositionFileName, state.context));
  722.       CodecUtil.writeIndexHeader(outIndexObjectPosition, name,
  723.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  724.           state.segmentSuffix);
  725.       outIndexObjectPosition.writeString(delegatePostingsFormatName);
  726.       // create file indexObjectParent
  727.       closeables.add(outIndexObjectParent = state.directory
  728.           .createOutput(mtasIndexObjectParentFileName, state.context));
  729.       CodecUtil.writeIndexHeader(outIndexObjectParent, name,
  730.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  731.           state.segmentSuffix);
  732.       outIndexObjectParent.writeString(delegatePostingsFormatName);
  733.       // create file term
  734.       closeables.add(outTerm = state.directory.createOutput(mtasTermFileName,
  735.           state.context));
  736.       CodecUtil.writeIndexHeader(outTerm, name,
  737.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  738.           state.segmentSuffix);
  739.       outTerm.writeString(delegatePostingsFormatName);
  740.       // create file prefix
  741.       closeables.add(outPrefix = state.directory
  742.           .createOutput(mtasPrefixFileName, state.context));
  743.       CodecUtil.writeIndexHeader(outPrefix, name,
  744.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  745.           state.segmentSuffix);
  746.       outPrefix.writeString(delegatePostingsFormatName);
  747.       // create file object
  748.       closeables.add(outObject = state.directory
  749.           .createOutput(mtasObjectFileName, state.context));
  750.       CodecUtil.writeIndexHeader(outObject, name,
  751.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  752.           state.segmentSuffix);
  753.       outObject.writeString(delegatePostingsFormatName);
  754.       // For each field
  755.       for (String field : fields) {
  756.         Terms terms = fields.terms(field);
  757.         if (terms == null) {
  758.           continue;
  759.         } else {
  760.           // new temporary object storage for this field
  761.           IndexOutput outTmpObject = state.directory
  762.               .createOutput(mtasTmpObjectFileName, state.context);
  763.           closeables.add(outTmpObject);
  764.           // new temporary index docs for this field
  765.           IndexOutput outTmpDocs = state.directory
  766.               .createOutput(mtasTmpDocsFileName, state.context);
  767.           closeables.add(outTmpDocs);
  768.           // get fieldInfo
  769.           FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
  770.           // get properties terms
  771.           boolean hasPositions = terms.hasPositions();
  772.           boolean hasFreqs = terms.hasFreqs();
  773.           boolean hasPayloads = fieldInfo.hasPayloads();
  774.           boolean hasOffsets = terms.hasOffsets();
  775.           // register references
  776.           Long smallestTermFilepointer = outTerm.getFilePointer();
  777.           Long smallestPrefixFilepointer = outPrefix.getFilePointer();
  778.           int termCounter = 0;
  779.           // only if freqs, positions and payload available
  780.           if (hasFreqs && hasPositions && hasPayloads) {
  781.             // compute flags
  782.             int flags = PostingsEnum.POSITIONS | PostingsEnum.PAYLOADS;
  783.             if (hasOffsets) {
  784.               flags = flags | PostingsEnum.OFFSETS;
  785.             }
  786.             // get terms
  787.             TermsEnum termsEnum = terms.iterator();
  788.             PostingsEnum postingsEnum = null;
  789.             // for each term in field
  790.             while (true) {
  791.               BytesRef term = termsEnum.next();
  792.               if (term == null) {
  793.                 break;
  794.               }
  795.               // store term and get ref
  796.               Long termRef = outTerm.getFilePointer();
  797.               outTerm.writeString(term.utf8ToString());
  798.               termCounter++;
  799.               // get postings
  800.               postingsEnum = termsEnum.postings(postingsEnum, flags);
  801.               // for each doc in field+term
  802.               while (true) {
  803.                 Integer docId = postingsEnum.nextDoc();
  804.                 if (docId.equals(DocIdSetIterator.NO_MORE_DOCS)) {
  805.                   break;
  806.                 }
  807.                 int freq = postingsEnum.freq();
  808.                 // temporary storage objects and temporary index in memory for
  809.                 // doc
  810.                 memoryIndexTemporaryObject.clear();
  811.                 Long offsetFilePointerTmpObject = outTmpObject.getFilePointer();
  812.                 for (int i = 0; i < freq; i++) {
  813.                   Long currentFilePointerTmpObject = outTmpObject
  814.                       .getFilePointer();
  815.                   Integer mtasId;
  816.                   int position = postingsEnum.nextPosition();
  817.                   BytesRef payload = postingsEnum.getPayload();
  818.                   if (hasOffsets) {
  819.                     mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
  820.                         term, termRef, position, payload,
  821.                         postingsEnum.startOffset(), postingsEnum.endOffset(),
  822.                         outPrefix);
  823.                   } else {
  824.                     mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
  825.                         term, termRef, position, payload, outPrefix);
  826.                   }
  827.                   if (mtasId != null) {
  828.                     assert !memoryIndexTemporaryObject.containsKey(
  829.                         mtasId) : "mtasId should be unique in this selection";
  830.                     memoryIndexTemporaryObject.put(mtasId,
  831.                         currentFilePointerTmpObject);
  832.                   }
  833.                 } // end loop positions
  834.                 // store temporary index for this doc
  835.                 if (memoryIndexTemporaryObject.size() > 0) {
  836.                   // docId for this part
  837.                   outTmpDocs.writeVInt(docId);
  838.                   // number of objects/tokens in this part
  839.                   outTmpDocs.writeVInt(memoryIndexTemporaryObject.size());
  840.                   // offset to be used for references
  841.                   outTmpDocs.writeVLong(offsetFilePointerTmpObject);
  842.                   // loop over tokens
  843.                   for (Entry<Integer, Long> entry : memoryIndexTemporaryObject
  844.                       .entrySet()) {
  845.                     // mtasId object
  846.                     outTmpDocs.writeVInt(entry.getKey());
  847.                     // reference object
  848.                     outTmpDocs.writeVLong(
  849.                         (entry.getValue() - offsetFilePointerTmpObject));
  850.                   }
  851.                 }
  852.                 // clean up
  853.                 memoryIndexTemporaryObject.clear();
  854.               } // end loop docs
  855.             } // end loop terms
  856.             // set fieldInfo
  857.             fieldInfos.fieldInfo(field).putAttribute(
  858.                 MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION,
  859.                 getPrefixStatsSinglePositionPrefixAttribute(field));
  860.             fieldInfos.fieldInfo(field).putAttribute(
  861.                 MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION,
  862.                 getPrefixStatsMultiplePositionPrefixAttribute(field));
  863.             fieldInfos.fieldInfo(field).putAttribute(
  864.                 MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION,
  865.                 getPrefixStatsSetPositionPrefixAttribute(field));
  866.           } // end processing field with freqs, positions and payload
  867.           // close temporary object storage and index docs
  868.           outTmpObject.close();
  869.           closeables.remove(outTmpObject);
  870.           outTmpDocs.close();
  871.           closeables.remove(outTmpDocs);

  872.           // create (backwards) chained new temporary index docs
  873.           IndexInput inTmpDocs = state.directory.openInput(mtasTmpDocsFileName,
  874.               state.context);
  875.           closeables.add(inTmpDocs);
  876.           IndexOutput outTmpDocsChained = state.directory
  877.               .createOutput(mtasTmpDocsChainedFileName, state.context);
  878.           closeables.add(outTmpDocsChained);
  879.           memoryTmpDocChainList.clear();
  880.           while (true) {
  881.             try {
  882.               Long currentFilepointer = outTmpDocsChained.getFilePointer();
  883.               // copy docId
  884.               int docId = inTmpDocs.readVInt();
  885.               outTmpDocsChained.writeVInt(docId);
  886.               // copy size
  887.               int size = inTmpDocs.readVInt();
  888.               outTmpDocsChained.writeVInt(size);
  889.               // offset references
  890.               outTmpDocsChained.writeVLong(inTmpDocs.readVLong());
  891.               for (int t = 0; t < size; t++) {
  892.                 outTmpDocsChained.writeVInt(inTmpDocs.readVInt());
  893.                 outTmpDocsChained.writeVLong(inTmpDocs.readVLong());
  894.               }
  895.               // set back reference to part with same docId
  896.               if (memoryTmpDocChainList.containsKey(docId)) {
  897.                 // reference to previous
  898.                 outTmpDocsChained.writeVLong(memoryTmpDocChainList.get(docId));
  899.               } else {
  900.                 // self reference indicates end of chain
  901.                 outTmpDocsChained.writeVLong(currentFilepointer);
  902.               }
  903.               // update temporary index in memory
  904.               memoryTmpDocChainList.put(docId, currentFilepointer);
  905.             } catch (IOException ex) {
  906.               log.debug(ex);
  907.               break;
  908.             }
  909.           }
  910.           outTmpDocsChained.close();
  911.           closeables.remove(outTmpDocsChained);
  912.           inTmpDocs.close();
  913.           closeables.remove(inTmpDocs);
  914.           state.directory.deleteFile(mtasTmpDocsFileName);

  915.           // set reference to tmpDoc in Field
  916.           if (memoryTmpDocChainList.size() > 0) {
  917.             outTmpField.writeString(field);
  918.             outTmpField.writeVLong(outTmpDoc.getFilePointer());
  919.             outTmpField.writeVInt(memoryTmpDocChainList.size());
  920.             outTmpField.writeVLong(smallestTermFilepointer);
  921.             outTmpField.writeVInt(termCounter);
  922.             outTmpField.writeVLong(smallestPrefixFilepointer);
  923.             outTmpField.writeVInt(prefixReferenceIndex.get(field).size());
  924.             // fill indexDoc
  925.             IndexInput inTmpDocsChained = state.directory
  926.                 .openInput(mtasTmpDocsChainedFileName, state.context);
  927.             closeables.add(inTmpDocsChained);
  928.             IndexInput inTmpObject = state.directory
  929.                 .openInput(mtasTmpObjectFileName, state.context);
  930.             closeables.add(inTmpObject);
  931.             for (Entry<Integer, Long> entry : memoryTmpDocChainList
  932.                 .entrySet()) {
  933.               Integer docId = entry.getKey();
  934.               Long currentFilePointer;
  935.               Long newFilePointer;
  936.               // list of objectIds and references to objects
  937.               memoryIndexDocList.clear();
  938.               // construct final object + indexObjectId for docId
  939.               currentFilePointer = entry.getValue();
  940.               // collect objects for document
  941.               tokenStatsMinPos = null;
  942.               tokenStatsMaxPos = null;
  943.               tokenStatsNumber = 0;
  944.               while (true) {
  945.                 inTmpDocsChained.seek(currentFilePointer);
  946.                 Integer docIdPart = inTmpDocsChained.readVInt();
  947.                 assert docIdPart.equals(
  948.                     docId) : "conflicting docId in reference to temporaryIndexDocsChained";
  949.                 // number of objects/tokens in part
  950.                 int size = inTmpDocsChained.readVInt();
  951.                 long offsetFilePointerTmpObject = inTmpDocsChained.readVLong();
  952.                 assert size > 0 : "number of objects/tokens in part cannot be "
  953.                     + size;
  954.                 for (int t = 0; t < size; t++) {
  955.                   int mtasId = inTmpDocsChained.readVInt();
  956.                   Long tmpObjectRef = inTmpDocsChained.readVLong()
  957.                       + offsetFilePointerTmpObject;
  958.                   assert !memoryIndexDocList.containsKey(
  959.                       mtasId) : "mtasId should be unique in this selection";
  960.                   // initially, store ref to tmpObject
  961.                   memoryIndexDocList.put(mtasId, tmpObjectRef);
  962.                 }
  963.                 // reference to next part
  964.                 newFilePointer = inTmpDocsChained.readVLong();
  965.                 if (newFilePointer.equals(currentFilePointer)) {
  966.                   break; // end of chained parts
  967.                 } else {
  968.                   currentFilePointer = newFilePointer;
  969.                 }
  970.               }
  971.               // now create new objects, sorted by mtasId
  972.               Long smallestObjectFilepointer = outObject.getFilePointer();
  973.               for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  974.                   .entrySet()) {
  975.                 int mtasId = objectEntry.getKey();
  976.                 Long tmpObjectRef = objectEntry.getValue();
  977.                 Long objectRef = outObject.getFilePointer();
  978.                 copyObjectAndUpdateStats(mtasId, inTmpObject, tmpObjectRef,
  979.                     outObject);
  980.                 // update with new ref
  981.                 memoryIndexDocList.put(mtasId, objectRef);
  982.               }
  983.               // check mtasIds properties
  984.               assert memoryIndexDocList.firstKey()
  985.                   .equals(0) : "first mtasId should not be "
  986.                       + memoryIndexDocList.firstKey();
  987.               assert (1 + memoryIndexDocList.lastKey()
  988.                   - memoryIndexDocList.firstKey()) == memoryIndexDocList
  989.                       .size() : "missing mtasId";
  990.               assert tokenStatsNumber.equals(memoryIndexDocList
  991.                   .size()) : "incorrect number of items in tokenStats";

  992.               // store item in tmpDoc
  993.               outTmpDoc.writeVInt(docId);
  994.               outTmpDoc.writeVLong(outIndexObjectId.getFilePointer());

  995.               int mtasId = 0;
  996.               // compute linear approximation (least squares method, integer
  997.               // constants)
  998.               long tmpN = memoryIndexDocList.size();
  999.               long tmpSumY = 0;
  1000.               long tmpSumXY = 0;
  1001.               long tmpSumX = 0;
  1002.               long tmpSumXX = 0;
  1003.               for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  1004.                   .entrySet()) {
  1005.                 assert objectEntry.getKey()
  1006.                     .equals(mtasId) : "unexpected mtasId";
  1007.                 tmpSumY += objectEntry.getValue();
  1008.                 tmpSumX += mtasId;
  1009.                 tmpSumXY += mtasId * objectEntry.getValue();
  1010.                 tmpSumXX += mtasId * mtasId;
  1011.                 mtasId++;
  1012.               }
  1013.               int objectRefApproxQuotient;
  1014.               if(tmpN>1) {
  1015.                 objectRefApproxQuotient= (int) (((tmpN * tmpSumXY)              
  1016.                   - (tmpSumX * tmpSumY))
  1017.                   / ((tmpN * tmpSumXX) - (tmpSumX * tmpSumX)));
  1018.               } else {
  1019.                 objectRefApproxQuotient = 0;
  1020.               }
  1021.               long objectRefApproxOffset = (tmpSumY
  1022.                   - objectRefApproxQuotient * tmpSumX) / tmpN;
  1023.               Long objectRefApproxCorrection;
  1024.               long maxAbsObjectRefApproxCorrection = 0;
  1025.               // compute maximum correction
  1026.               mtasId = 0;
  1027.               for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  1028.                   .entrySet()) {
  1029.                 objectRefApproxCorrection = (objectEntry.getValue()
  1030.                     - (objectRefApproxOffset
  1031.                         + (mtasId * objectRefApproxQuotient)));
  1032.                 maxAbsObjectRefApproxCorrection = Math.max(
  1033.                     maxAbsObjectRefApproxCorrection,
  1034.                     Math.abs(objectRefApproxCorrection));
  1035.                 mtasId++;
  1036.               }
  1037.               byte storageFlags;
  1038.               if (maxAbsObjectRefApproxCorrection <= Long
  1039.                   .valueOf(Byte.MAX_VALUE)) {
  1040.                 storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_BYTE;
  1041.               } else if (maxAbsObjectRefApproxCorrection <= Long
  1042.                   .valueOf(Short.MAX_VALUE)) {
  1043.                 storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_SHORT;
  1044.               } else if (maxAbsObjectRefApproxCorrection <= Long
  1045.                   .valueOf(Integer.MAX_VALUE)) {
  1046.                 storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER;
  1047.               } else {
  1048.                 storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_LONG;
  1049.               }
  1050.               // update indexObjectId with correction on approximated ref
  1051.               // (assume
  1052.               // can be stored as int)
  1053.               mtasId = 0;
  1054.               for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  1055.                   .entrySet()) {
  1056.                 objectRefApproxCorrection = (objectEntry.getValue()
  1057.                     - (objectRefApproxOffset
  1058.                         + (mtasId * objectRefApproxQuotient)));
  1059.                 if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
  1060.                   outIndexObjectId
  1061.                       .writeByte(objectRefApproxCorrection.byteValue());
  1062.                 } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
  1063.                   outIndexObjectId
  1064.                       .writeShort(objectRefApproxCorrection.shortValue());
  1065.                 } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
  1066.                   outIndexObjectId
  1067.                       .writeInt(objectRefApproxCorrection.intValue());
  1068.                 } else {
  1069.                   outIndexObjectId.writeLong(objectRefApproxCorrection);
  1070.                 }
  1071.                 mtasId++;
  1072.               }
  1073.               outTmpDoc.writeVLong(smallestObjectFilepointer);
  1074.               outTmpDoc.writeVInt(objectRefApproxQuotient);
  1075.               outTmpDoc.writeZLong(objectRefApproxOffset);
  1076.               outTmpDoc.writeByte(storageFlags);
  1077.               outTmpDoc.writeVInt(tokenStatsNumber);
  1078.               outTmpDoc.writeVInt(tokenStatsMinPos);
  1079.               outTmpDoc.writeVInt(tokenStatsMaxPos);
  1080.               // clean up
  1081.               memoryIndexDocList.clear();
  1082.             } // end loop over docs
  1083.             inTmpDocsChained.close();
  1084.             closeables.remove(inTmpDocsChained);
  1085.             inTmpObject.close();
  1086.             closeables.remove(inTmpObject);
  1087.           }
  1088.           // clean up
  1089.           memoryTmpDocChainList.clear();
  1090.           // remove temporary files
  1091.           state.directory.deleteFile(mtasTmpObjectFileName);
  1092.           state.directory.deleteFile(mtasTmpDocsChainedFileName);
  1093.           // store references for field

  1094.         } // end processing field
  1095.       } // end loop fields
  1096.       // close temporary index doc
  1097.       outTmpDoc.close();
  1098.       closeables.remove(outTmpDoc);
  1099.       // close indexField, indexObjectId and object
  1100.       CodecUtil.writeFooter(outTmpField);
  1101.       outTmpField.close();
  1102.       closeables.remove(outTmpField);
  1103.       CodecUtil.writeFooter(outIndexObjectId);
  1104.       outIndexObjectId.close();
  1105.       closeables.remove(outIndexObjectId);
  1106.       CodecUtil.writeFooter(outObject);
  1107.       outObject.close();
  1108.       closeables.remove(outObject);
  1109.       CodecUtil.writeFooter(outTerm);
  1110.       outTerm.close();
  1111.       closeables.remove(outTerm);
  1112.       CodecUtil.writeFooter(outPrefix);
  1113.       outPrefix.close();
  1114.       closeables.remove(outPrefix);

  1115.       // create final doc, fill indexObjectPosition, indexObjectParent and
  1116.       // indexTermPrefixPosition, create final field
  1117.       IndexInput inTmpField = state.directory.openInput(mtasTmpFieldFileName,
  1118.           state.context);
  1119.       closeables.add(inTmpField);
  1120.       IndexInput inTmpDoc = state.directory.openInput(mtasTmpDocFileName,
  1121.           state.context);
  1122.       closeables.add(inTmpDoc);
  1123.       IndexInput inObjectId = state.directory
  1124.           .openInput(mtasIndexObjectIdFileName, state.context);
  1125.       closeables.add(inObjectId);
  1126.       IndexInput inObject = state.directory.openInput(mtasObjectFileName,
  1127.           state.context);
  1128.       closeables.add(inObject);
  1129.       IndexInput inTerm = state.directory.openInput(mtasTermFileName,
  1130.           state.context);
  1131.       closeables.add(inTerm);
  1132.       closeables.add(outField = state.directory
  1133.           .createOutput(mtasIndexFieldFileName, state.context));
  1134.       CodecUtil.writeIndexHeader(outField, name,
  1135.           MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  1136.           state.segmentSuffix);
  1137.       outField.writeString(delegatePostingsFormatName);
  1138.       boolean doWrite = true;
  1139.       do {
  1140.         try {
  1141.           // read from tmpField
  1142.           String field = inTmpField.readString();
  1143.           long fpTmpDoc = inTmpField.readVLong();
  1144.           int numberDocs = inTmpField.readVInt();
  1145.           long fpTerm = inTmpField.readVLong();
  1146.           int numberTerms = inTmpField.readVInt();
  1147.           long fpPrefix = inTmpField.readVLong();
  1148.           int numberPrefixes = inTmpField.readVInt();
  1149.           inTmpDoc.seek(fpTmpDoc);
  1150.           long fpFirstDoc = outDoc.getFilePointer();
  1151.           // get prefixId index
  1152.           HashMap<String, Integer> prefixIdIndexField = prefixIdIndex
  1153.               .get(field);
  1154.           // construct MtasRBTree for indexDocId
  1155.           MtasRBTree mtasDocIdTree = new MtasRBTree(true, false);
  1156.           for (int docCounter = 0; docCounter < numberDocs; docCounter++) {
  1157.             // get info from tmpDoc
  1158.             int docId = inTmpDoc.readVInt();
  1159.             // filePointer indexObjectId
  1160.             Long fpIndexObjectId = inTmpDoc.readVLong();
  1161.             // filePointer indexObjectPosition (unknown)
  1162.             Long fpIndexObjectPosition;
  1163.             // filePointer indexObjectParent (unknown)
  1164.             Long fpIndexObjectParent;
  1165.             // constants for approximation object references for this document
  1166.             long smallestObjectFilepointer = inTmpDoc.readVLong();
  1167.             int objectRefApproxQuotient = inTmpDoc.readVInt();
  1168.             long objectRefApproxOffset = inTmpDoc.readZLong();
  1169.             byte storageFlags = inTmpDoc.readByte();
  1170.             // number objects/tokens
  1171.             int size = inTmpDoc.readVInt();
  1172.             // construct MtasRBTree
  1173.             MtasRBTree mtasPositionTree = new MtasRBTree(false, true);
  1174.             MtasRBTree mtasParentTree = new MtasRBTree(false, true);
  1175.             inObjectId.seek(fpIndexObjectId);
  1176.             long refCorrection;
  1177.             long ref;
  1178.             HashMap<String, HashSet<Integer>> docFieldAdministration = new HashMap<>();
  1179.             for (int mtasId = 0; mtasId < size; mtasId++) {
  1180.               if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
  1181.                 refCorrection = inObjectId.readByte();
  1182.               } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
  1183.                 refCorrection = inObjectId.readShort();
  1184.               } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
  1185.                 refCorrection = inObjectId.readInt();
  1186.               } else {
  1187.                 refCorrection = inObjectId.readLong();
  1188.               }
  1189.               ref = objectRefApproxOffset + mtasId * objectRefApproxQuotient
  1190.                   + refCorrection;
  1191.               MtasTokenString token = MtasCodecPostingsFormat.getToken(inObject,
  1192.                   inTerm, ref);
  1193.               String prefix = token.getPrefix();
  1194.               registerPrefixIntersection(field, prefix,
  1195.                   token.getPositionStart(), token.getPositionEnd(),
  1196.                   docFieldAdministration);
  1197.               int prefixId = prefixIdIndexField.containsKey(prefix)
  1198.                   ? prefixIdIndexField.get(prefix) : 0;
  1199.               token.setPrefixId(prefixId);
  1200.               assert token.getId().equals(mtasId) : "unexpected mtasId "
  1201.                   + mtasId;
  1202.               mtasPositionTree.addPositionAndObjectFromToken(token);
  1203.               mtasParentTree.addParentFromToken(token);
  1204.             }
  1205.             // store mtasPositionTree and mtasParentTree
  1206.             fpIndexObjectPosition = storeTree(mtasPositionTree,
  1207.                 outIndexObjectPosition, smallestObjectFilepointer);
  1208.             fpIndexObjectParent = storeTree(mtasParentTree,
  1209.                 outIndexObjectParent, smallestObjectFilepointer);
  1210.             long fpDoc = outDoc.getFilePointer();
  1211.             // create indexDoc with updated fpIndexObjectPosition from tmpDoc
  1212.             outDoc.writeVInt(docId); // docId
  1213.             // reference indexObjectId
  1214.             outDoc.writeVLong(fpIndexObjectId);
  1215.             // reference indexObjectPosition
  1216.             outDoc.writeVLong(fpIndexObjectPosition);
  1217.             // reference indexObjectParent
  1218.             outDoc.writeVLong(fpIndexObjectParent);
  1219.             // variables approximation and storage references object
  1220.             outDoc.writeVLong(smallestObjectFilepointer);
  1221.             outDoc.writeVInt(objectRefApproxQuotient);
  1222.             outDoc.writeZLong(objectRefApproxOffset);
  1223.             outDoc.writeByte(storageFlags);
  1224.             // number of objects
  1225.             outDoc.writeVInt(size);
  1226.             // minPosition
  1227.             outDoc.writeVInt(inTmpDoc.readVInt());
  1228.             // maxPosition
  1229.             outDoc.writeVInt(inTmpDoc.readVInt());
  1230.             // add to tree for indexDocId
  1231.             mtasDocIdTree.addIdFromDoc(docId, fpDoc);
  1232.           }
  1233.           long fpIndexDocId = storeTree(mtasDocIdTree, outIndexDocId,
  1234.               fpFirstDoc);

  1235.           // store in indexField
  1236.           outField.writeString(field);
  1237.           outField.writeVLong(fpFirstDoc);
  1238.           outField.writeVLong(fpIndexDocId);
  1239.           outField.writeVInt(numberDocs);
  1240.           outField.writeVLong(fpTerm);
  1241.           outField.writeVInt(numberTerms);
  1242.           outField.writeVLong(fpPrefix);
  1243.           outField.writeVInt(numberPrefixes);
  1244.           // register intersection
  1245.           fieldInfos.fieldInfo(field).putAttribute(
  1246.               MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_INTERSECTION,
  1247.               getPrefixStatsIntersectionPrefixAttribute(field));
  1248.         } catch (EOFException e) {
  1249.           log.debug(e);
  1250.           doWrite = false;
  1251.         }
  1252.         // end loop over fields
  1253.       } while (doWrite);
  1254.       inTerm.close();
  1255.       closeables.remove(inTerm);
  1256.       inObject.close();
  1257.       closeables.remove(inObject);
  1258.       inObjectId.close();
  1259.       closeables.remove(inObjectId);
  1260.       inTmpDoc.close();
  1261.       closeables.remove(inTmpDoc);
  1262.       inTmpField.close();
  1263.       closeables.remove(inTmpField);

  1264.       // remove temporary files
  1265.       state.directory.deleteFile(mtasTmpDocFileName);
  1266.       state.directory.deleteFile(mtasTmpFieldFileName);
  1267.       // close indexDoc, indexObjectPosition and indexObjectParent
  1268.       CodecUtil.writeFooter(outDoc);
  1269.       outDoc.close();
  1270.       closeables.remove(outDoc);
  1271.       CodecUtil.writeFooter(outIndexObjectPosition);
  1272.       outIndexObjectPosition.close();
  1273.       closeables.remove(outIndexObjectPosition);
  1274.       CodecUtil.writeFooter(outIndexObjectParent);
  1275.       outIndexObjectParent.close();
  1276.       closeables.remove(outIndexObjectParent);
  1277.       CodecUtil.writeFooter(outIndexDocId);
  1278.       outIndexDocId.close();
  1279.       closeables.remove(outIndexDocId);
  1280.       CodecUtil.writeFooter(outField);
  1281.       outField.close();
  1282.       closeables.remove(outField);
  1283.     } catch (IOException e) {
  1284.       // ignore, can happen when merging segment already written by
  1285.       // delegateFieldsConsumer
  1286.       log.error(e);
  1287.     } finally {
  1288.       IOUtils.closeWhileHandlingException(closeables);
  1289.       try {
  1290.         state.directory.deleteFile(mtasTmpDocsFileName);
  1291.       } catch (IOException e) {
  1292.         log.debug(e);
  1293.       }
  1294.       try {
  1295.         state.directory.deleteFile(mtasTmpDocFileName);
  1296.       } catch (IOException e) {
  1297.         log.debug(e);
  1298.       }
  1299.       try {
  1300.         state.directory.deleteFile(mtasTmpFieldFileName);
  1301.       } catch (IOException e) {
  1302.         log.debug(e);
  1303.       }
  1304.     }
  1305.   }

  1306.   /**
  1307.    * Creates the object and register prefix.
  1308.    *
  1309.    * @param field
  1310.    *          the field
  1311.    * @param out
  1312.    *          the out
  1313.    * @param term
  1314.    *          the term
  1315.    * @param termRef
  1316.    *          the term ref
  1317.    * @param startPosition
  1318.    *          the start position
  1319.    * @param payload
  1320.    *          the payload
  1321.    * @param outPrefix
  1322.    *          the out prefix
  1323.    * @return the integer
  1324.    * @throws IOException
  1325.    *           Signals that an I/O exception has occurred.
  1326.    */
  1327.   private Integer createObjectAndRegisterPrefix(String field, IndexOutput out,
  1328.       BytesRef term, Long termRef, int startPosition, BytesRef payload,
  1329.       IndexOutput outPrefix) throws IOException {
  1330.     return createObjectAndRegisterPrefix(field, out, term, termRef,
  1331.         startPosition, payload, null, null, outPrefix);
  1332.   }

  1333.   /**
  1334.    * Creates the object and register prefix.
  1335.    *
  1336.    * @param field
  1337.    *          the field
  1338.    * @param out
  1339.    *          the out
  1340.    * @param term
  1341.    *          the term
  1342.    * @param termRef
  1343.    *          the term ref
  1344.    * @param startPosition
  1345.    *          the start position
  1346.    * @param payload
  1347.    *          the payload
  1348.    * @param startOffset
  1349.    *          the start offset
  1350.    * @param endOffset
  1351.    *          the end offset
  1352.    * @param outPrefix
  1353.    *          the out prefix
  1354.    * @return the integer
  1355.    * @throws IOException
  1356.    *           Signals that an I/O exception has occurred.
  1357.    */
  1358.   private Integer createObjectAndRegisterPrefix(String field, IndexOutput out,
  1359.       BytesRef term, Long termRef, int startPosition, BytesRef payload,
  1360.       Integer startOffset, Integer endOffset, IndexOutput outPrefix)
  1361.       throws IOException {
  1362.     try {
  1363.       Integer mtasId = null;
  1364.       String prefix = MtasToken.getPrefixFromValue(term.utf8ToString());
  1365.       if (payload != null) {
  1366.         MtasPayloadDecoder payloadDecoder = new MtasPayloadDecoder();
  1367.         payloadDecoder.init(startPosition, Arrays.copyOfRange(payload.bytes,
  1368.             payload.offset, (payload.offset + payload.length)));
  1369.         mtasId = payloadDecoder.getMtasId();
  1370.         Integer mtasParentId = payloadDecoder.getMtasParentId();
  1371.         byte[] mtasPayload = payloadDecoder.getMtasPayload();
  1372.         MtasPosition mtasPosition = payloadDecoder.getMtasPosition();
  1373.         MtasOffset mtasOffset = payloadDecoder.getMtasOffset();
  1374.         if (mtasOffset == null && startOffset != null) {
  1375.           mtasOffset = new MtasOffset(startOffset, endOffset);
  1376.         }
  1377.         MtasOffset mtasRealOffset = payloadDecoder.getMtasRealOffset();
  1378.         // only if really mtas object
  1379.         if (mtasId != null) {
  1380.           // compute flags
  1381.           int objectFlags = 0;
  1382.           if (mtasPosition != null) {
  1383.             if (mtasPosition.checkType(MtasPosition.POSITION_RANGE)) {
  1384.               objectFlags = objectFlags
  1385.                   | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE;
  1386.               registerPrefixStatsRangePositionValue(field, prefix, outPrefix);
  1387.             } else if (mtasPosition.checkType(MtasPosition.POSITION_SET)) {
  1388.               objectFlags = objectFlags
  1389.                   | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET;
  1390.               registerPrefixStatsSetPositionValue(field, prefix, outPrefix);
  1391.             } else {
  1392.               registerPrefixStatsSinglePositionValue(field, prefix, outPrefix);
  1393.             }
  1394.           } else {
  1395.             throw new IOException("no position");
  1396.           }
  1397.           if (mtasParentId != null) {
  1398.             objectFlags = objectFlags
  1399.                 | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT;
  1400.           }
  1401.           if (mtasOffset != null) {
  1402.             objectFlags = objectFlags
  1403.                 | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET;
  1404.           }
  1405.           if (mtasRealOffset != null) {
  1406.             objectFlags = objectFlags
  1407.                 | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET;
  1408.           }
  1409.           if (mtasPayload != null) {
  1410.             objectFlags = objectFlags
  1411.                 | MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD;
  1412.           }
  1413.           // create object
  1414.           out.writeVInt(mtasId);
  1415.           out.writeVInt(objectFlags);
  1416.           if ((objectFlags
  1417.               & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) {
  1418.             out.writeVInt(mtasParentId);
  1419.           }
  1420.           if ((objectFlags
  1421.               & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) {
  1422.             int tmpStart = mtasPosition.getStart();
  1423.             out.writeVInt(tmpStart);
  1424.             out.writeVInt((mtasPosition.getEnd() - tmpStart));
  1425.           } else if ((objectFlags
  1426.               & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) {
  1427.             int[] positions = mtasPosition.getPositions();
  1428.             out.writeVInt(positions.length);
  1429.             int tmpPrevious = 0;
  1430.             for (int position : positions) {
  1431.               out.writeVInt((position - tmpPrevious));
  1432.               tmpPrevious = position;
  1433.             }
  1434.           } else {
  1435.             out.writeVInt(mtasPosition.getStart());
  1436.           }
  1437.           if ((objectFlags
  1438.               & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) {
  1439.             int tmpStart = mtasOffset.getStart();
  1440.             out.writeVInt(mtasOffset.getStart());
  1441.             out.writeVInt((mtasOffset.getEnd() - tmpStart));
  1442.           }
  1443.           if ((objectFlags
  1444.               & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) {
  1445.             int tmpStart = mtasRealOffset.getStart();
  1446.             out.writeVInt(mtasRealOffset.getStart());
  1447.             out.writeVInt((mtasRealOffset.getEnd() - tmpStart));
  1448.           }
  1449.           if ((objectFlags
  1450.               & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) {
  1451.             if (mtasPayload != null) {
  1452.               out.writeVInt(mtasPayload.length);
  1453.               out.writeBytes(mtasPayload, mtasPayload.length);
  1454.             } else {
  1455.               out.writeVInt(0);
  1456.             }
  1457.           }
  1458.           out.writeVLong(termRef);
  1459.         } // storage token
  1460.       }
  1461.       return mtasId;
  1462.     } catch (Exception e) {
  1463.       log.error(e);
  1464.       throw new IOException(e);
  1465.     }
  1466.   }

  1467.   /**
  1468.    * Store tree.
  1469.    *
  1470.    * @param tree
  1471.    *          the tree
  1472.    * @param out
  1473.    *          the out
  1474.    * @param refApproxOffset
  1475.    *          the ref approx offset
  1476.    * @return the long
  1477.    * @throws IOException
  1478.    *           Signals that an I/O exception has occurred.
  1479.    */
  1480.   private Long storeTree(MtasTree<?> tree, IndexOutput out,
  1481.       long refApproxOffset) throws IOException {
  1482.     return storeTree(tree.close(), tree.isSinglePoint(),
  1483.         tree.isStorePrefixAndTermRef(), out, null, refApproxOffset);
  1484.   }

  1485.   /**
  1486.    * Store tree.
  1487.    *
  1488.    * @param node
  1489.    *          the node
  1490.    * @param isSinglePoint
  1491.    *          the is single point
  1492.    * @param storeAdditionalInformation
  1493.    *          the store additional information
  1494.    * @param out
  1495.    *          the out
  1496.    * @param nodeRefApproxOffset
  1497.    *          the node ref approx offset
  1498.    * @param refApproxOffset
  1499.    *          the ref approx offset
  1500.    * @return the long
  1501.    * @throws IOException
  1502.    *           Signals that an I/O exception has occurred.
  1503.    */
  1504.   private Long storeTree(MtasTreeNode<?> node, boolean isSinglePoint,
  1505.       boolean storeAdditionalInformation, IndexOutput out,
  1506.       Long nodeRefApproxOffset, long refApproxOffset) throws IOException {
  1507.     Long localNodeRefApproxOffset = nodeRefApproxOffset;
  1508.     if (node != null) {
  1509.       Boolean isRoot = false;
  1510.       if (localNodeRefApproxOffset == null) {
  1511.         localNodeRefApproxOffset = out.getFilePointer();
  1512.         isRoot = true;
  1513.       }
  1514.       Long fpIndexObjectPositionLeftChild;
  1515.       Long fpIndexObjectPositionRightChild;
  1516.       if (node.leftChild != null) {
  1517.         fpIndexObjectPositionLeftChild = storeTree(node.leftChild,
  1518.             isSinglePoint, storeAdditionalInformation, out,
  1519.             localNodeRefApproxOffset, refApproxOffset);
  1520.       } else {
  1521.         fpIndexObjectPositionLeftChild = (long) 0; // tmp
  1522.       }
  1523.       if (node.rightChild != null) {
  1524.         fpIndexObjectPositionRightChild = storeTree(node.rightChild,
  1525.             isSinglePoint, storeAdditionalInformation, out,
  1526.             localNodeRefApproxOffset, refApproxOffset);
  1527.       } else {
  1528.         fpIndexObjectPositionRightChild = (long) 0; // tmp
  1529.       }
  1530.       Long fpIndexObjectPosition = out.getFilePointer();
  1531.       if (node.leftChild == null) {
  1532.         fpIndexObjectPositionLeftChild = fpIndexObjectPosition;
  1533.       }
  1534.       if (node.rightChild == null) {
  1535.         fpIndexObjectPositionRightChild = fpIndexObjectPosition;
  1536.       }
  1537.       if (isRoot) {
  1538.         assert localNodeRefApproxOffset >= 0 : "nodeRefApproxOffset < 0 : "
  1539.             + localNodeRefApproxOffset;
  1540.         out.writeVLong(localNodeRefApproxOffset);
  1541.         byte flag = 0;
  1542.         if (isSinglePoint) {
  1543.           flag |= MtasTree.SINGLE_POSITION_TREE;
  1544.         }
  1545.         if (storeAdditionalInformation) {
  1546.           flag |= MtasTree.STORE_ADDITIONAL_ID;
  1547.         }
  1548.         out.writeByte(flag);
  1549.       }
  1550.       assert node.left >= 0 : "node.left < 0 : " + node.left;
  1551.       out.writeVInt(node.left);
  1552.       assert node.right >= 0 : "node.right < 0 : " + node.right;
  1553.       out.writeVInt(node.right);
  1554.       assert node.max >= 0 : "node.max < 0 : " + node.max;
  1555.       out.writeVInt(node.max);
  1556.       assert fpIndexObjectPositionLeftChild >= localNodeRefApproxOffset : "fpIndexObjectPositionLeftChild<nodeRefApproxOffset : "
  1557.           + fpIndexObjectPositionLeftChild + " and " + localNodeRefApproxOffset;
  1558.       out.writeVLong(
  1559.           (fpIndexObjectPositionLeftChild - localNodeRefApproxOffset));
  1560.       assert fpIndexObjectPositionRightChild >= localNodeRefApproxOffset : "fpIndexObjectPositionRightChild<nodeRefApproxOffset"
  1561.           + fpIndexObjectPositionRightChild + " and "
  1562.           + localNodeRefApproxOffset;
  1563.       out.writeVLong(
  1564.           (fpIndexObjectPositionRightChild - localNodeRefApproxOffset));
  1565.       if (!isSinglePoint) {
  1566.         out.writeVInt(node.ids.size());
  1567.       }
  1568.       HashMap<Integer, MtasTreeNodeId> ids = node.ids;
  1569.       Long objectRefCorrected;
  1570.       long objectRefCorrectedPrevious = 0;
  1571.       // sort refs
  1572.       List<MtasTreeNodeId> nodeIds = new ArrayList<>(ids.values());
  1573.       Collections.sort(nodeIds);
  1574.       if (isSinglePoint && (nodeIds.size() != 1)) {
  1575.         throw new IOException("singlePoint tree, but missing single point...");
  1576.       }
  1577.       int counter = 0;
  1578.       for (MtasTreeNodeId nodeId : nodeIds) {
  1579.         counter++;
  1580.         objectRefCorrected = (nodeId.ref - refApproxOffset);
  1581.         assert objectRefCorrected >= objectRefCorrectedPrevious : "objectRefCorrected<objectRefCorrectedPrevious : "
  1582.             + objectRefCorrected + " and " + objectRefCorrectedPrevious;
  1583.         out.writeVLong((objectRefCorrected - objectRefCorrectedPrevious));
  1584.         objectRefCorrectedPrevious = objectRefCorrected;
  1585.         if (storeAdditionalInformation) {
  1586.           assert nodeId.additionalId >= 0 : "nodeId.additionalId < 0 for item "
  1587.               + counter + " : " + nodeId.additionalId;
  1588.           out.writeVInt(nodeId.additionalId);
  1589.           assert nodeId.additionalRef >= 0 : "nodeId.additionalRef < 0 for item "
  1590.               + counter + " : " + nodeId.additionalRef;
  1591.           out.writeVLong(nodeId.additionalRef);
  1592.         }
  1593.       }
  1594.       return fpIndexObjectPosition;
  1595.     } else {
  1596.       return null;
  1597.     }
  1598.   }

  1599.   /**
  1600.    * Token stats add.
  1601.    *
  1602.    * @param min
  1603.    *          the min
  1604.    * @param max
  1605.    *          the max
  1606.    */
  1607.   private void tokenStatsAdd(int min, int max) {
  1608.     tokenStatsNumber++;
  1609.     if (tokenStatsMinPos == null) {
  1610.       tokenStatsMinPos = min;
  1611.     } else {
  1612.       tokenStatsMinPos = Math.min(tokenStatsMinPos, min);
  1613.     }
  1614.     if (tokenStatsMaxPos == null) {
  1615.       tokenStatsMaxPos = max;
  1616.     } else {
  1617.       tokenStatsMaxPos = Math.max(tokenStatsMaxPos, max);
  1618.     }
  1619.   }

  1620.   /**
  1621.    * Copy object and update stats.
  1622.    *
  1623.    * @param id
  1624.    *          the id
  1625.    * @param in
  1626.    *          the in
  1627.    * @param inRef
  1628.    *          the in ref
  1629.    * @param out
  1630.    *          the out
  1631.    * @throws IOException
  1632.    *           Signals that an I/O exception has occurred.
  1633.    */
  1634.   private void copyObjectAndUpdateStats(int id, IndexInput in, Long inRef,
  1635.       IndexOutput out) throws IOException {
  1636.     int mtasId;
  1637.     int objectFlags;
  1638.     // read
  1639.     in.seek(inRef);
  1640.     mtasId = in.readVInt();
  1641.     assert id == mtasId : "wrong id detected while copying object";
  1642.     objectFlags = in.readVInt();
  1643.     out.writeVInt(mtasId);
  1644.     out.writeVInt(objectFlags);
  1645.     if ((objectFlags
  1646.         & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PARENT) {
  1647.       out.writeVInt(in.readVInt());
  1648.     }
  1649.     if ((objectFlags
  1650.         & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_RANGE) {
  1651.       int minPos = in.readVInt();
  1652.       int maxPos = in.readVInt();
  1653.       out.writeVInt(minPos);
  1654.       out.writeVInt(maxPos);
  1655.       tokenStatsAdd(minPos, maxPos);
  1656.     } else if ((objectFlags
  1657.         & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_POSITION_SET) {
  1658.       int size = in.readVInt();
  1659.       out.writeVInt(size);
  1660.       SortedSet<Integer> list = new TreeSet<>();
  1661.       int previousPosition = 0;
  1662.       for (int t = 0; t < size; t++) {
  1663.         int pos = in.readVInt();
  1664.         out.writeVInt(pos);
  1665.         previousPosition = (pos + previousPosition);
  1666.         list.add(previousPosition);
  1667.       }
  1668.       assert list.size() == size : "duplicate positions in set are not allowed";
  1669.       tokenStatsAdd(list.first(), list.last());
  1670.     } else {
  1671.       int pos = in.readVInt();
  1672.       out.writeVInt(pos);
  1673.       tokenStatsAdd(pos, pos);
  1674.     }
  1675.     if ((objectFlags
  1676.         & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_OFFSET) {
  1677.       out.writeVInt(in.readVInt());
  1678.       out.writeVInt(in.readVInt());
  1679.     }
  1680.     if ((objectFlags
  1681.         & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_REALOFFSET) {
  1682.       out.writeVInt(in.readVInt());
  1683.       out.writeVInt(in.readVInt());
  1684.     }
  1685.     if ((objectFlags
  1686.         & MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) == MtasCodecPostingsFormat.MTAS_OBJECT_HAS_PAYLOAD) {
  1687.       int length = in.readVInt();
  1688.       out.writeVInt(length);
  1689.       byte[] payload = new byte[length];
  1690.       in.readBytes(payload, 0, length);
  1691.       out.writeBytes(payload, payload.length);
  1692.     }
  1693.     out.writeVLong(in.readVLong());
  1694.   }

  1695.   /*
  1696.    * (non-Javadoc)
  1697.    *
  1698.    * @see org.apache.lucene.codecs.FieldsConsumer#close()
  1699.    */
  1700.   @Override
  1701.   public void close() throws IOException {
  1702.     delegateFieldsConsumer.close();
  1703.   }

  1704. }