MtasPayloadEncoder.java

package mtas.codec.payload;

import java.io.IOException;
import java.util.Arrays;

import mtas.analysis.token.MtasPosition;
import mtas.analysis.token.MtasToken;
import mtas.analysis.token.MtasTokenString;

import org.apache.lucene.util.BytesRef;

/**
 * The Class MtasPayloadEncoder.
 */

/**
 * The Class MtasPayloadEncoder.
 */
public class MtasPayloadEncoder {

  /** The mtas token. */
  private MtasToken mtasToken;

  /** The byte stream. */
  private MtasBitOutputStream byteStream;

  /** The encoding flags. */
  private int encodingFlags;

  /** The Constant ENCODE_PAYLOAD. */
  public static final int ENCODE_PAYLOAD = 1;

  /** The Constant ENCODE_OFFSET. */
  public static final int ENCODE_OFFSET = 2;

  /** The Constant ENCODE_REALOFFSET. */
  public static final int ENCODE_REALOFFSET = 4;

  /** The Constant ENCODE_PARENT. */
  public static final int ENCODE_PARENT = 8;

  /** The Constant ENCODE_DEFAULT. */
  public static final int ENCODE_DEFAULT = ENCODE_PAYLOAD | ENCODE_OFFSET
      | ENCODE_PARENT;

  /** The Constant ENCODE_ALL. */
  public static final int ENCODE_ALL = ENCODE_PAYLOAD | ENCODE_OFFSET
      | ENCODE_REALOFFSET | ENCODE_PARENT;

  /**
   * Instantiates a new mtas payload encoder.
   *
   * @param token the token
   * @param flags the flags
   */
  public MtasPayloadEncoder(MtasToken token, int flags) {
    mtasToken = token;
    byteStream = new MtasBitOutputStream();
    encodingFlags = flags;
  }

  /**
   * Instantiates a new mtas payload encoder.
   *
   * @param token the token
   */
  public MtasPayloadEncoder(MtasToken token) {
    this(token, ENCODE_DEFAULT);
  }

  /**
   * Gets the payload.
   *
   * @return the payload
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public BytesRef getPayload() throws IOException {

    // initial bits - position
    if (mtasToken.checkPositionType(MtasPosition.POSITION_SINGLE)) {
      byteStream.writeBit(0);
      byteStream.writeBit(0);
    } else if (mtasToken.checkPositionType(MtasPosition.POSITION_RANGE)) {
      byteStream.writeBit(1);
      byteStream.writeBit(0);
    } else if (mtasToken.checkPositionType(MtasPosition.POSITION_SET)) {
      byteStream.writeBit(0);
      byteStream.writeBit(1);
    } else {
      byteStream.writeBit(1);
      byteStream.writeBit(1);
    }
    // initial bits - offset
    if ((encodingFlags & ENCODE_OFFSET) == ENCODE_OFFSET
        && mtasToken.checkOffset()) {
      byteStream.writeBit(1);
    } else {
      byteStream.writeBit(0);
    }
    // initial bits - realOffset
    if ((encodingFlags & ENCODE_REALOFFSET) == ENCODE_REALOFFSET
        && mtasToken.checkRealOffset()) {
      byteStream.writeBit(1);
    } else {
      byteStream.writeBit(0);
    }
    // initial bits - parentId
    if ((encodingFlags & ENCODE_PARENT) == ENCODE_PARENT
        && mtasToken.checkParentId()) {
      byteStream.writeBit(1);
    } else {
      byteStream.writeBit(0);
    }
    // initial bits - original payload
    if ((encodingFlags & ENCODE_PAYLOAD) == ENCODE_PAYLOAD
        && mtasToken.getPayload() != null) {
      byteStream.writeBit(1);
    } else {
      byteStream.writeBit(0);
    }
    if (mtasToken.getType().equals(MtasTokenString.TOKEN_TYPE)) {
      byteStream.writeBit(0);
    } else {
      // to add other token types later on
      byteStream.writeBit(1);
    }
    // add id (EliasGammaCoding)
    byteStream.writeEliasGammaCodingNonNegativeInteger(mtasToken.getId());
    // add position info (EliasGammaCoding)
    if (mtasToken.checkPositionType(MtasPosition.POSITION_SINGLE)) {
      // do nothing
    } else if (mtasToken.checkPositionType(MtasPosition.POSITION_RANGE)) {
      // write length
      byteStream.writeEliasGammaCodingPositiveInteger(
          1 + mtasToken.getPositionEnd() - mtasToken.getPositionStart());
    } else if (mtasToken.checkPositionType(MtasPosition.POSITION_SET)) {
      // write number of positions
      int[] positionList = mtasToken.getPositions();
      byteStream.writeEliasGammaCodingPositiveInteger(positionList.length);
      int previousPosition = positionList[0];
      for (int i = 1; i < positionList.length; i++) {
        byteStream.writeEliasGammaCodingPositiveInteger(
            positionList[i] - previousPosition);
        previousPosition = positionList[i];
      }
    } else {
      // do nothing
    }
    // add offset info (EliasGammaCoding)
    if ((encodingFlags & ENCODE_OFFSET) == ENCODE_OFFSET
        && mtasToken.checkOffset()) {
      byteStream
          .writeEliasGammaCodingNonNegativeInteger(mtasToken.getOffsetStart());
      byteStream.writeEliasGammaCodingPositiveInteger(
          1 + mtasToken.getOffsetEnd() - mtasToken.getOffsetStart());
    }
    // add realOffset info (EliasGammaCoding)
    if ((encodingFlags & ENCODE_REALOFFSET) == ENCODE_REALOFFSET
        && mtasToken.checkRealOffset()) {
      if ((encodingFlags & ENCODE_OFFSET) == ENCODE_OFFSET
          && mtasToken.checkOffset()) {
        byteStream.writeEliasGammaCodingInteger(
            mtasToken.getRealOffsetStart() - mtasToken.getOffsetStart());
        byteStream.writeEliasGammaCodingPositiveInteger(
            1 + mtasToken.getRealOffsetEnd() - mtasToken.getRealOffsetStart());
      } else {
        byteStream.writeEliasGammaCodingNonNegativeInteger(
            mtasToken.getRealOffsetStart());
        byteStream.writeEliasGammaCodingPositiveInteger(
            1 + mtasToken.getRealOffsetEnd() - mtasToken.getRealOffsetStart());
      }
    }
    // add parent info (EliasGammaCoding)
    if ((encodingFlags & ENCODE_PARENT) == ENCODE_PARENT
        && mtasToken.checkParentId()) {
      byteStream.writeEliasGammaCodingInteger(
          mtasToken.getParentId() - mtasToken.getId());
    }
    // add minimal number of zero-bits to get round number of bytes
    byteStream.createByte();
    // finally add original payload bytes
    if ((encodingFlags & ENCODE_PAYLOAD) == ENCODE_PAYLOAD
        && mtasToken.getPayload() != null) {
      BytesRef payload = mtasToken.getPayload();
      byteStream.write(Arrays.copyOfRange(payload.bytes, payload.offset,
          (payload.offset + payload.length)));
    }
    // construct new payload
    return new BytesRef(byteStream.toByteArray());
  }

}