MtasSpanRegexpQuery.java

package mtas.search.spans;

import java.io.IOException;

import mtas.analysis.token.MtasToken;
import mtas.codec.util.CodecUtil;
import mtas.search.spans.util.MtasSpanQuery;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;

/**
 * The Class MtasSpanRegexpQuery.
 */
public class MtasSpanRegexpQuery extends MtasSpanQuery {

  /** The Constant MTAS_REGEXP_EXPAND_BOUNDARY. */
  private static final int MTAS_REGEXP_EXPAND_BOUNDARY = 1000000;

  /** The prefix. */
  private String prefix;

  /** The value. */
  private String value;

  /** The single position. */
  private boolean singlePosition;

  /** The term. */
  private Term term;

  /** The query. */
  private SpanMultiTermQueryWrapper<RegexpQuery> query;

  /**
   * Instantiates a new mtas span regexp query.
   *
   * @param term the term
   */
  public MtasSpanRegexpQuery(Term term) {
    this(term, true);
  }

  /**
   * Instantiates a new mtas span regexp query.
   *
   * @param term the term
   * @param singlePosition the single position
   */
  public MtasSpanRegexpQuery(Term term, boolean singlePosition) {
    super(singlePosition ? 1 : null, singlePosition ? 1 : null);
    RegexpQuery req = new RegexpQuery(term);
    query = new SpanMultiTermQueryWrapper<>(req);
    this.term = term;
    this.singlePosition = singlePosition;
    int i = term.text().indexOf(MtasToken.DELIMITER);
    if (i >= 0) {
      prefix = term.text().substring(0, i);
      value = term.text().substring((i + MtasToken.DELIMITER.length()));
      value = (value.length() > 0) ? value : null;
    } else {
      prefix = term.text();
      value = null;
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader)
   */
  @Override
  public MtasSpanQuery rewrite(IndexReader reader) throws IOException {    
    Query q = query.rewrite(reader);
    if (q instanceof SpanOrQuery) {
      SpanQuery[] clauses = ((SpanOrQuery) q).getClauses();
      if (clauses.length > MTAS_REGEXP_EXPAND_BOUNDARY) {
        // forward index solution ?
        throw new IOException("Regexp \"" + CodecUtil.termValue(term.text())
            + "\" expands to " + clauses.length + " terms, too many (boundary "
            + MTAS_REGEXP_EXPAND_BOUNDARY + ")!");
      }
      MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.length];
      for (int i = 0; i < clauses.length; i++) {
        if (clauses[i] instanceof SpanTermQuery) {
          newClauses[i] = new MtasSpanTermQuery((SpanTermQuery) clauses[i],
              singlePosition).rewrite(reader);
        } else {
          throw new IOException("no SpanTermQuery after rewrite");
        }
      }
      return new MtasSpanOrQuery(newClauses).rewrite(reader);
    } else {
      throw new IOException("no SpanOrQuery after rewrite");
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.search.spans.SpanTermQuery#toString(java.lang.String)
   */
  @Override
  public String toString(String field) {
    StringBuilder buffer = new StringBuilder();
    buffer.append(this.getClass().getSimpleName() + "([");
    if (value == null) {
      buffer.append(this.query.getField() + ":" + prefix);
    } else {
      buffer.append(this.query.getField() + ":" + prefix + "=" + value);
    }
    buffer.append("])");
    return buffer.toString();
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.spans.SpanQuery#getField()
   */
  @Override
  public String getField() {
    return term.field();
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.search.spans.SpanQuery#createWeight(org.apache.lucene.
   * search.IndexSearcher, boolean)
   */
  @Override
  public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost)
      throws IOException {
    return ((SpanQuery) searcher.rewrite(query)).createWeight(searcher,
        needsScores, boost);
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.Query#equals(java.lang.Object)
   */
  @Override
  public boolean equals(Object obj) {
    if (this == obj)
      return true;
    if (obj == null)
      return false;
    if (getClass() != obj.getClass())
      return false;
    MtasSpanRegexpQuery that = (MtasSpanRegexpQuery) obj;
    return term.equals(that.term) && singlePosition == that.singlePosition;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.search.Query#hashCode()
   */
  @Override
  public int hashCode() {
    int h = this.getClass().getSimpleName().hashCode();
    h = (h * 7) ^ term.hashCode();
    h += (singlePosition ? 1 : 0);
    return h;
  }
  
  @Override
  public boolean isMatchAllPositionsQuery() {
    return false;
  }

}