MtasCodecPostingsFormat.java

package mtas.codec;

import java.io.IOException;

import mtas.analysis.token.MtasTokenString;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;

/**
 * The Class MtasCodecPostingsFormat.
 */
public class MtasCodecPostingsFormat extends PostingsFormat {

  /** The Constant log. */
  private static final Log log = LogFactory
      .getLog(MtasCodecPostingsFormat.class);

  /** The Constant VERSION_START. */
  public static final int VERSION_START = 3;

  /** The Constant VERSION_CURRENT. */
  public static final int VERSION_CURRENT = 3;

  /** The Constant MTAS_OBJECT_HAS_PARENT. */
  static final int MTAS_OBJECT_HAS_PARENT = 1;

  /** The Constant MTAS_OBJECT_HAS_POSITION_RANGE. */
  static final int MTAS_OBJECT_HAS_POSITION_RANGE = 2;

  /** The Constant MTAS_OBJECT_HAS_POSITION_SET. */
  static final int MTAS_OBJECT_HAS_POSITION_SET = 4;

  /** The Constant MTAS_OBJECT_HAS_OFFSET. */
  static final int MTAS_OBJECT_HAS_OFFSET = 8;

  /** The Constant MTAS_OBJECT_HAS_REALOFFSET. */
  static final int MTAS_OBJECT_HAS_REALOFFSET = 16;

  /** The Constant MTAS_OBJECT_HAS_PAYLOAD. */
  static final int MTAS_OBJECT_HAS_PAYLOAD = 32;

  /** The Constant MTAS_STORAGE_BYTE. */
  public static final int MTAS_STORAGE_BYTE = 0;

  /** The Constant MTAS_STORAGE_SHORT. */
  public static final int MTAS_STORAGE_SHORT = 1;

  /** The Constant MTAS_STORAGE_INTEGER. */
  public static final int MTAS_STORAGE_INTEGER = 2;

  /** The Constant MTAS_STORAGE_LONG. */
  public static final int MTAS_STORAGE_LONG = 3;

  /** The Constant MTAS_TMP_FIELD_EXTENSION. */
  public static final String MTAS_TMP_FIELD_EXTENSION = "mtas.field.temporary";

  /** The Constant MTAS_TMP_OBJECT_EXTENSION. */
  public static final String MTAS_TMP_OBJECT_EXTENSION = "mtas.object.temporary";

  /** The Constant MTAS_TMP_DOCS_EXTENSION. */
  public static final String MTAS_TMP_DOCS_EXTENSION = "mtas.docs.temporary";

  /** The Constant MTAS_TMP_DOC_EXTENSION. */
  public static final String MTAS_TMP_DOC_EXTENSION = "mtas.doc.temporary";

  /** The Constant MTAS_TMP_DOCS_CHAINED_EXTENSION. */
  public static final String MTAS_TMP_DOCS_CHAINED_EXTENSION = "mtas.docs.chained.temporary";

  /** The Constant MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION. */
  public static final String MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION = "mtas.prefix.single.position";

  /** The Constant MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION. */
  public static final String MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION = "mtas.prefix.multiple.position";

  /** The Constant MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION. */
  public static final String MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION = "mtas.prefix.set.position";

  /** The Constant MTAS_FIELDINFO_ATTRIBUTE_PREFIX_INTERSECTION. */
  public static final String MTAS_FIELDINFO_ATTRIBUTE_PREFIX_INTERSECTION = "mtas.prefix.intersection";

  /** The Constant MTAS_OBJECT_EXTENSION. */
  public static final String MTAS_OBJECT_EXTENSION = "mtas.object";

  /** The Constant MTAS_TERM_EXTENSION. */
  public static final String MTAS_TERM_EXTENSION = "mtas.term";

  /** The Constant MTAS_FIELD_EXTENSION. */
  public static final String MTAS_FIELD_EXTENSION = "mtas.field";

  /** The Constant MTAS_PREFIX_EXTENSION. */
  public static final String MTAS_PREFIX_EXTENSION = "mtas.prefix";

  /** The Constant MTAS_DOC_EXTENSION. */
  public static final String MTAS_DOC_EXTENSION = "mtas.doc";

  /** The Constant MTAS_INDEX_DOC_ID_EXTENSION. */
  public static final String MTAS_INDEX_DOC_ID_EXTENSION = "mtas.index.doc.id";

  /** The Constant MTAS_INDEX_OBJECT_ID_EXTENSION. */
  public static final String MTAS_INDEX_OBJECT_ID_EXTENSION = "mtas.index.object.id";

  /** The Constant MTAS_INDEX_OBJECT_POSITION_EXTENSION. */
  public static final String MTAS_INDEX_OBJECT_POSITION_EXTENSION = "mtas.index.object.position";

  /** The Constant MTAS_INDEX_OBJECT_PARENT_EXTENSION. */
  public static final String MTAS_INDEX_OBJECT_PARENT_EXTENSION = "mtas.index.object.parent";

  /** The Constant MTAS_INDEX_TERM_PREFIX_POSITION_EXTENSION. */
  public static final String MTAS_INDEX_TERM_PREFIX_POSITION_EXTENSION = "mtas.index.term.prefix.position";

  /** The delegate codec name. */
  private String delegateCodecName = null;

  /** The delegate postings format. */
  private PostingsFormat delegatePostingsFormat = null;

  /**
   * Instantiates a new mtas codec postings format.
   */
  public MtasCodecPostingsFormat() {
    this(MtasCodec.MTAS_CODEC_NAME);
  }

  /**
   * Instantiates a new mtas codec postings format.
   *
   * @param delegate the delegate
   */
  public MtasCodecPostingsFormat(PostingsFormat delegate) {
    super(MtasCodec.MTAS_CODEC_NAME);
    delegateCodecName = delegate.getName();
    delegatePostingsFormat = delegate;
    // preload to prevent NoClassDefFoundErrors
    try {
      Class.forName("mtas.codec.payload.MtasPayloadDecoder");
      Class.forName("mtas.codec.payload.MtasBitInputStream");
      Class.forName("mtas.analysis.token.MtasPosition");
      Class.forName("mtas.analysis.token.MtasOffset");
      Class.forName("mtas.codec.tree.MtasRBTree");
      Class.forName("mtas.codec.MtasTerms");
      Class.forName("mtas.codec.util.CodecInfo");
      Class.forName("mtas.codec.tree.MtasTreeNodeId");
    } catch (ClassNotFoundException e) {
      log.error(e);
    }
  }

  /**
   * Instantiates a new mtas codec postings format.
   *
   * @param codecName the codec name
   */
  public MtasCodecPostingsFormat(String codecName) {
    super(codecName);
    delegateCodecName = codecName;
    delegatePostingsFormat = null;
    // preload to prevent NoClassDefFoundErrors
    try {
      Class.forName("mtas.codec.payload.MtasPayloadDecoder");
      Class.forName("mtas.codec.payload.MtasBitInputStream");
      Class.forName("mtas.analysis.token.MtasPosition");
      Class.forName("mtas.analysis.token.MtasOffset");
      Class.forName("mtas.codec.tree.MtasRBTree");
      Class.forName("mtas.codec.MtasTerms");
      Class.forName("mtas.codec.util.CodecInfo");
      Class.forName("mtas.codec.tree.MtasTreeNodeId");
    } catch (ClassNotFoundException e) {
      log.error(e);
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.codecs.PostingsFormat#fieldsProducer(org.apache.lucene
   * .index.SegmentReadState)
   */
  @Override
  public final FieldsProducer fieldsProducer(SegmentReadState state)
      throws IOException {
    return new MtasFieldsProducer(state, getName());
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.codecs.PostingsFormat#fieldsConsumer(org.apache.lucene
   * .index.SegmentWriteState)
   */
  @Override
  public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
      throws IOException {
    if (delegatePostingsFormat != null) {
      return new MtasFieldsConsumer(
          delegatePostingsFormat.fieldsConsumer(state), state, getName(),
          delegatePostingsFormat.getName());
    } else {
      PostingsFormat pf = Codec.forName(delegateCodecName).postingsFormat();
      return pf.fieldsConsumer(state);
    }
  }

  /**
   * Gets the token.
   *
   * @param inObject the in object
   * @param inTerm the in term
   * @param ref the ref
   * @return the token
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public static MtasTokenString getToken(IndexInput inObject, IndexInput inTerm,
      Long ref) throws IOException {
    MtasTokenString token = null;
    try {
      inObject.seek(ref);
      token = new MtasTokenString(null, "");
      token.setId(inObject.readVInt());
      token.setTokenRef(ref);
      int objectFlags = inObject.readVInt();
      int[] positions = null;
      if ((objectFlags & MTAS_OBJECT_HAS_PARENT) == MTAS_OBJECT_HAS_PARENT) {
        int parentId = inObject.readVInt();
        token.setParentId(parentId);
      }
      if ((objectFlags
          & MTAS_OBJECT_HAS_POSITION_RANGE) == MTAS_OBJECT_HAS_POSITION_RANGE) {
        int positionStart = inObject.readVInt();
        int positionEnd = positionStart + inObject.readVInt();
        token.addPositionRange(positionStart, positionEnd);
      } else if ((objectFlags
          & MTAS_OBJECT_HAS_POSITION_SET) == MTAS_OBJECT_HAS_POSITION_SET) {
        int size = inObject.readVInt();
        int tmpPrevious = 0;
        positions = new int[size];
        for (int t = 0; t < size; t++) {
          int position = tmpPrevious + inObject.readVInt();
          tmpPrevious = position;
          positions[t] = position;
        }
        token.addPositions(positions);
      } else {
        int position = inObject.readVInt();
        token.addPosition(position);
      }
      if ((objectFlags & MTAS_OBJECT_HAS_OFFSET) == MTAS_OBJECT_HAS_OFFSET) {
        int offsetStart = inObject.readVInt();
        int offsetEnd = offsetStart + inObject.readVInt();
        token.setOffset(offsetStart, offsetEnd);
      }
      if ((objectFlags
          & MTAS_OBJECT_HAS_REALOFFSET) == MTAS_OBJECT_HAS_REALOFFSET) {
        int realOffsetStart = inObject.readVInt();
        int realOffsetEnd = realOffsetStart + inObject.readVInt();
        token.setRealOffset(realOffsetStart, realOffsetEnd);
      }
      if ((objectFlags & MTAS_OBJECT_HAS_PAYLOAD) == MTAS_OBJECT_HAS_PAYLOAD) {
        int length = inObject.readVInt();
        byte[] mtasPayload = new byte[length];
        inObject.readBytes(mtasPayload, 0, length);
        token.setPayload(new BytesRef(mtasPayload));
      }
      Long termRef = inObject.readVLong();
      inTerm.seek(termRef);
      token.setTermRef(termRef);
      token.setValue(inTerm.readString());
    } catch (Exception e) {
      throw new IOException(e);
    }
    return token;
  }

  /**
   * Gets the term.
   *
   * @param inTerm the in term
   * @param ref the ref
   * @return the term
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public static String getTerm(IndexInput inTerm, Long ref) throws IOException {
    try {
      inTerm.seek(ref);
      return inTerm.readString();
    } catch (Exception e) {
      throw new IOException(e);
    }
  }

}