MtasTokenizerFactory.java

package mtas.analysis.util;

import mtas.analysis.MtasTokenizer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * A factory for creating MtasTokenizer objects.
 */
public class MtasTokenizerFactory extends TokenizerFactory
    implements ResourceLoaderAware {

  /** The Constant log. */
  private static final Log log = LogFactory.getLog(MtasTokenizerFactory.class);

  /** The Constant ARGUMENT_CONFIGFILE. */
  public static final String ARGUMENT_CONFIGFILE = "configFile";

  /** The Constant ARGUMENT_CONFIG. */
  public static final String ARGUMENT_CONFIG = "config";

  /** The Constant ARGUMENT_ANALYZER. */
  public static final String ARGUMENT_PARSER = "parser";

  /** The Constant ARGUMENT_PARSER_ARGS. */
  public static final String ARGUMENT_PARSER_ARGS = "parserArgs";

  /** The Constant ARGUMENT_DEFAULT. */
  public static final String ARGUMENT_DEFAULT = "default";

  /** The config argument. */
  private String configArgument;

  /** The default argument. */
  private String defaultArgument;

  /** The config file argument. */
  private String configFileArgument;

  /** The analyzer argument. */
  private String analyzerArgument;

  /** The parser arguments. */
  private String analyzerArgumentParserArgs;

  /** The configs. */
  private HashMap<String, MtasConfiguration> configs = null;

  /** The config. */
  private MtasConfiguration config = null;

  /**
   * Instantiates a new mtas tokenizer factory.
   *
   * @param args the args
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public MtasTokenizerFactory(Map<String, String> args) throws IOException {
    this(args, null);
  }

  /**
   * Instantiates a new mtas tokenizer factory.
   *
   * @param args the args
   * @param resourceLoader the resource loader
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public MtasTokenizerFactory(Map<String, String> args,
      ResourceLoader resourceLoader) throws IOException {
    super(args);
    configFileArgument = get(args, ARGUMENT_CONFIGFILE);
    configArgument = get(args, ARGUMENT_CONFIG);
    analyzerArgument = get(args, ARGUMENT_PARSER);
    analyzerArgumentParserArgs = get(args, ARGUMENT_PARSER_ARGS);
    defaultArgument = get(args, ARGUMENT_DEFAULT);
    int numberOfArgs = 0;
    numberOfArgs = (configFileArgument==null)?numberOfArgs:numberOfArgs+1;
    numberOfArgs = (configArgument==null)?numberOfArgs:numberOfArgs+1;
    numberOfArgs = (analyzerArgument==null)?numberOfArgs:numberOfArgs+1;
    
    if (numberOfArgs>1) {
      throw new IOException(this.getClass().getName() + " can't have multiple of "
          + ARGUMENT_CONFIGFILE + ", " + ARGUMENT_CONFIG+" AND "+ARGUMENT_PARSER);
    } else if (configArgument == null && defaultArgument != null) {
      throw new IOException(this.getClass().getName() + " can't have "
          + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG);
    } else if (numberOfArgs==0) {
      throw new IOException(this.getClass().getName() + " should have "
          + ARGUMENT_CONFIGFILE + " or " + ARGUMENT_CONFIG+" or "+ARGUMENT_PARSER);
    }
    init(resourceLoader);
  }

  /*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.lucene.analysis.util.TokenizerFactory#create(org.apache.lucene.
   * util.AttributeFactory)
   */
  @Override
  public MtasTokenizer create(AttributeFactory factory) {
    MtasTokenizer tokenizer = null;
    try {
      tokenizer = create(factory, null);
    } catch (IOException e) {
      log.error(e);
    }
    return tokenizer;
  }

  public MtasTokenizer create(String configuration) throws IOException {
    return create(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, configuration);
  }
  
  /**
   * Creates the.
   *
   * @param configuration the configuration
   * @return the mtas tokenizer
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public MtasTokenizer create(String configuration, String defaultConfiguration) throws IOException {
    return create(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, configuration, defaultConfiguration);
  }
  
  public MtasTokenizer create(AttributeFactory factory, String configuration) throws IOException {
    return create(factory, configuration, null);
  }

  /**
   * Creates the.
   *
   * @param factory the factory
   * @param configuration the configuration
   * @return the mtas tokenizer
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public MtasTokenizer create(AttributeFactory factory, String configuration, String defaultConfiguration)
      throws IOException {
    if(defaultConfiguration==null) {
      defaultConfiguration = defaultArgument;
    }
    if (configs != null && configs.size() > 0) {
      if (configuration == null && defaultConfiguration == null) {
        throw new IOException("no (default)configuration");
      } else if (configuration == null) {
        if (configs.get(defaultConfiguration) != null) {
          return new MtasTokenizer(factory, configs.get(defaultConfiguration));
        } else {
          throw new IOException(
              "default configuration " + defaultConfiguration + " not available");
        }
      } else {
        MtasConfiguration config = configs.get(configuration);
        if (config == null) {
          if (defaultConfiguration != null) {
            if (configs.get(defaultConfiguration) != null) {
              return new MtasTokenizer(factory, configs.get(defaultConfiguration));
            } else {
              throw new IOException("configuration " + configuration
                  + " not found and default configuration " + defaultConfiguration
                  + " not available");
            }
          } else {
            throw new IOException("configuration " + configuration
                + " not available and no default configuration");
          }
        } else {
          return new MtasTokenizer(factory, config);
        }
      }
    } else if (config != null) {
      return new MtasTokenizer(factory, config);
    } else {
      throw new IOException("no configuration");
    }
  }

  /**
   * Inits the.
   *
   * @param resourceLoader the resource loader
   * @throws IOException Signals that an I/O exception has occurred.
   */
  private void init(ResourceLoader resourceLoader) throws IOException {
    if (config == null && configs == null) {
      if (resourceLoader == null) {
        return;
      } else if (configFileArgument == null && configArgument == null && analyzerArgument==null) {
        throw new IOException("no configuration");
      } else {
        if (configFileArgument != null) {
          try {
            config = MtasConfiguration.readConfiguration(
                resourceLoader.openResource(configFileArgument));
          } catch (IOException e) {
            throw new IOException(
                "Problem loading configuration from " + configFileArgument, e);
          }
        }
        if (configArgument != null) {
          try {
            configs = MtasConfiguration.readMtasTokenizerConfigurations(
                resourceLoader, configArgument);
          } catch (IOException e) {
            throw new IOException(
                "Problem loading configurations from " + configArgument, e);
          }
        }
        if (analyzerArgument != null) {
          configs = null;
          config = new MtasConfiguration();
          MtasConfiguration subConfig = new MtasConfiguration();
          subConfig.name = "parser";
          subConfig.attributes.put("name", analyzerArgument);
          subConfig.attributes.put(ARGUMENT_PARSER_ARGS, analyzerArgumentParserArgs);
          config.children.add(subConfig);
        }
      }
    }
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.lucene.analysis.util.ResourceLoaderAware#inform(org.apache.
   * lucene.analysis.util.ResourceLoader)
   */
  @Override
  public void inform(ResourceLoader loader) throws IOException {
    init(loader);
  }

}