MorseDistance.java

package mtas.codec.util.distance;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.lucene.util.BytesRef;

import mtas.analysis.token.MtasToken;

/**
 * The Class MorseDistance.
 */
public class MorseDistance extends Distance {

  /** The initial state. */
  protected final double[] initialState;

  /** The Constant defaultDeletionDistance. */
  protected final static double defaultDeletionDistance = 1.0;

  /** The Constant defaultInsertionDistance. */
  protected final static double defaultInsertionDistance = 10.0;

  /** The Constant defaultReplaceDistance. */
  protected final static double defaultReplaceDistance = 10.0;

  /** The Constant defaultTranspositionDistance. */
  protected final static double defaultTranspositionDistance = 10.0;

  /** The deletion distance. */
  protected double deletionDistance;

  /** The insertion distance. */
  protected double insertionDistance;

  /** The replace distance. */
  protected double replaceDistance;

  /** The transposition distance. */
  protected double transpositionDistance;

  /** The Constant PARAMETER_DELETIONDISTANCE. */
  protected final static String PARAMETER_DELETIONDISTANCE = "deletionDistance";

  /** The Constant PARAMETER_INSERTIONDISTANCE. */
  protected final static String PARAMETER_INSERTIONDISTANCE = "insertionDistance";

  /** The Constant PARAMETER_REPLACEDISTANCE. */
  protected final static String PARAMETER_REPLACEDISTANCE = "replaceDistance";

  /** The Constant PARAMETER_TRANSPOSITIONDISTANCE. */
  protected final static String PARAMETER_TRANSPOSITIONDISTANCE = "transpositionDistance";

  /** The morse base. */
  protected final String morseBase;

  /** The Constant ALPHABET_MORSE. */
  private static final Map<Byte, String> ALPHABET_MORSE;
  static {
    Map<Byte, String> m = new HashMap<>();
    m.put((byte) 'a', ".-");
    m.put((byte) 'b', "-...");
    m.put((byte) 'c', "-.-.");
    m.put((byte) 'd', "-..");
    m.put((byte) 'e', ".");
    m.put((byte) 'f', "..-.");
    m.put((byte) 'g', "--.");
    m.put((byte) 'h', "....");
    m.put((byte) 'i', "..");
    m.put((byte) 'j', ".---");
    m.put((byte) 'k', "-.-");
    m.put((byte) 'l', ".-..");
    m.put((byte) 'm', "--");
    m.put((byte) 'n', "-.");
    m.put((byte) 'o', "---");
    m.put((byte) 'p', ".--.");
    m.put((byte) 'q', "--.-");
    m.put((byte) 'r', ".-.");
    m.put((byte) 's', "...");
    m.put((byte) 't', "-");
    m.put((byte) 'u', "..-");
    m.put((byte) 'v', "...-");
    m.put((byte) 'w', ".--");
    m.put((byte) 'x', "-..-");
    m.put((byte) 'y', "-.--");
    m.put((byte) 'z', "--..");
    m.put((byte) '1', ".----");
    m.put((byte) '2', "..---");
    m.put((byte) '3', "...--");
    m.put((byte) '4', "....-");
    m.put((byte) '5', ".....");
    m.put((byte) '6', "-....");
    m.put((byte) '7', "--...");
    m.put((byte) '8', "---..");
    m.put((byte) '9', "----.");
    m.put((byte) '0', "-----");
    ALPHABET_MORSE = Collections.unmodifiableMap(m);
  }

  /**
   * Instantiates a new morse distance.
   *
   * @param prefix the prefix
   * @param base the base
   * @param maximum the maximum
   * @param parameters the parameters
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public MorseDistance(String prefix, String base, Double minimum, Double maximum,
      Map<String, String> parameters) throws IOException {
    super(prefix, base, minimum, maximum, parameters);
    deletionDistance = defaultDeletionDistance;
    insertionDistance = defaultInsertionDistance;
    replaceDistance = defaultReplaceDistance;
    transpositionDistance = defaultTranspositionDistance;
    if (parameters != null) {
      for (Entry<String, String> entry : parameters.entrySet()) {
        if (entry.getKey().equals(PARAMETER_DELETIONDISTANCE)) {
          deletionDistance = Double.parseDouble(entry.getValue());
        } else if (entry.getKey().equals(PARAMETER_INSERTIONDISTANCE)) {
          insertionDistance = Double.parseDouble(entry.getValue());
        } else if (entry.getKey().equals(PARAMETER_REPLACEDISTANCE)) {
          replaceDistance = Double.parseDouble(entry.getValue());
        } else if (entry.getKey().equals(PARAMETER_TRANSPOSITIONDISTANCE)) {
          transpositionDistance = Double.parseDouble(entry.getValue());
        }
      }
    }
    if (deletionDistance < 0 || insertionDistance < 0 || replaceDistance < 0
        || transpositionDistance < 0) {
      throw new IOException("distances should be zero or positive");
    }
    morseBase = computeMorse(new BytesRef(prefix + MtasToken.DELIMITER + base));
    initialState = new double[morseBase.length() + 1];
    for (int i = 0; i <= morseBase.length(); i++) {
      initialState[i] = i * insertionDistance;
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * mtas.codec.util.distance.Distance#validate(org.apache.lucene.util.BytesRef)
   */
  @Override
  public boolean validateMaximum(BytesRef term) {
    if (morseBase == null) {
      return false;
    } else if (maximum == null) {
      return true;
    } else {
      double[][] state = _start();
      byte b0;
      String morse;
      char ch2 = 0x00;
      int i = term.offset + prefixOffset;
      for (; i < term.length; i++) {
        b0 = term.bytes[i];    
        if (b0 == 0x00) {
          break;
        }
        if (ALPHABET_MORSE.containsKey(b0)) {
          morse = ALPHABET_MORSE.get(b0)+" ";
          for (char ch1 : morse.toCharArray()) {            
            state = _step(state, ch1, ch2);
            if (!_can_match(state)) {
              return false;
            }
            ch2 = ch1;
          }
        } else {
          return false;
        }        
      }
      return _is_match(state);      
    }
  }
  
  @Override
  public boolean validateMinimum(BytesRef term) {
    if (minimum == null) {
      return true;
    } else {
      return compute(term)>minimum;
    }
  }
  
  @Override
  public double compute(BytesRef term) {
    String morseKey = computeMorse(term);
    if(morseKey==null) {
      return Double.MAX_VALUE;
    } else {
      double[][] state = _start();
      char ch2 = 0x00;
      for (char ch1 : morseKey.toCharArray()) {
        if (ch1 == 0x00) {
          break;
        }
        state = _step(state, ch1, ch2);
        ch2 = ch1;
      }
      return _distance(state);
    }  
  }

  
  @Override
  public double compute(String key) {
    String morseKey = computeMorse(key);
    if(morseKey==null) {
      return Double.MAX_VALUE;
    } else {
      double[][] state = _start();
      char ch2 = 0x00;
      for (char ch1 : morseKey.toCharArray()) {
        if (ch1 == 0x00) {
          break;
        }
        state = _step(state, ch1, ch2);
        ch2 = ch1;
      }
      return _distance(state);
    }  
  }

  /**
   * Start.
   *
   * @return the double[][]
   */
  private double[][] _start() {
    double[][] startState = new double[3][];
    startState[0] = new double[initialState.length];
    startState[1] = new double[initialState.length];
    startState[2] = Arrays.copyOf(initialState, initialState.length);
    return startState;
  }

  /**
   * Step.
   *
   * @param state the state
   * @param ch1 the ch 1
   * @param ch2 the ch 2
   * @return the double[][]
   */
  private double[][] _step(double[][] state, char ch1, char ch2) {
    double cost;
    _shift(state);
    state[2][0] = state[1][0] + deletionDistance;
    for (int i = 0; i < morseBase.length(); i++) {
      cost = (morseBase.charAt(i) == ch1) ? 0 : replaceDistance;
      state[2][i + 1] = Math.min(state[2][i] + insertionDistance,
          state[1][i] + cost);
      state[2][i + 1] = Math.min(state[2][i + 1],
          state[1][i + 1] + deletionDistance);
      if (i > 0 && ch2 != 0x00 && (morseBase.charAt(i - 1) == ch1)
          && (morseBase.charAt(i) == ch2)) {
        state[2][i + 1] = Math.min(state[2][i + 1],
            state[0][i - 1] + transpositionDistance);
      }
    }
    return state;
  }

  /**
   * Shift.
   *
   * @param state the state
   */
  private void _shift(double[][] state) {
    double[] tmpState = state[0];
    state[0] = state[1];
    state[1] = state[2];
    state[2] = tmpState;
  }

  /**
   * Checks if is match.
   *
   * @param state the state
   * @return true, if successful
   */
  private boolean _is_match(double[][] state) {
    return state[2][state[2].length - 1] < maximum;
  }

  /**
   * Can match.
   *
   * @param state the state
   * @return true, if successful
   */
  private boolean _can_match(double[][] state) {
    for (double d : state[2]) {
      if (d < maximum) {
        return true;
      }
    }
    return false;
  }

  /**
   * Distance.
   *
   * @param state the state
   * @return the double
   */
  private double _distance(double[][] state) {
    return state[2][state[2].length - 1];
  }

  /**
   * Compute morse.
   *
   * @param term the term
   * @return the string
   */
  private String computeMorse(BytesRef term) {
    StringBuilder stringBuilder = new StringBuilder();
    int i = term.offset + prefixOffset;
    for (; i < term.length; i++) {
      if (ALPHABET_MORSE.containsKey(term.bytes[i])) {
        stringBuilder.append(ALPHABET_MORSE.get(term.bytes[i]) + " ");
      } else if(term.bytes[i]!=0x00){
        return null;
      } else {
        break;
      }
    }
    return stringBuilder.toString();
  }
  
  private String computeMorse(String key) {
    StringBuilder stringBuilder = new StringBuilder();
    Byte b;
    for (char ch : key.toCharArray()) {
      b = (byte) ch;
      if (ALPHABET_MORSE.containsKey(b)) {
        stringBuilder.append(ALPHABET_MORSE.get(b) + " ");
      } else {
        return null;
      } 
    }
    return stringBuilder.toString();
  }

}