CodecUtil.java
package mtas.codec.util;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import mtas.analysis.token.MtasToken;
import mtas.codec.MtasCodecPostingsFormat;
import mtas.parser.function.util.MtasFunctionParserFunction;
import mtas.search.spans.util.MtasSpanQuery;
import mtas.codec.util.CodecComponent.ComponentField;
import mtas.codec.util.CodecComponent.ComponentCollection;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spans.SpanWeight;
/**
* The Class CodecUtil.
*/
public class CodecUtil {
/** The Constant STATS_TYPE_GEOMETRICMEAN. */
public static final String STATS_TYPE_GEOMETRICMEAN = "geometricmean";
/** The Constant STATS_TYPE_KURTOSIS. */
public static final String STATS_TYPE_KURTOSIS = "kurtosis";
/** The Constant STATS_TYPE_MAX. */
public static final String STATS_TYPE_MAX = "max";
/** The Constant STATS_TYPE_MEAN. */
public static final String STATS_TYPE_MEAN = "mean";
/** The Constant STATS_TYPE_MIN. */
public static final String STATS_TYPE_MIN = "min";
/** The Constant STATS_TYPE_N. */
public static final String STATS_TYPE_N = "n";
/** The Constant STATS_TYPE_MEDIAN. */
public static final String STATS_TYPE_MEDIAN = "median";
/** The Constant STATS_TYPE_POPULATIONVARIANCE. */
public static final String STATS_TYPE_POPULATIONVARIANCE = "populationvariance";
/** The Constant STATS_TYPE_QUADRATICMEAN. */
public static final String STATS_TYPE_QUADRATICMEAN = "quadraticmean";
/** The Constant STATS_TYPE_SKEWNESS. */
public static final String STATS_TYPE_SKEWNESS = "skewness";
/** The Constant STATS_TYPE_STANDARDDEVIATION. */
public static final String STATS_TYPE_STANDARDDEVIATION = "standarddeviation";
/** The Constant STATS_TYPE_SUM. */
public static final String STATS_TYPE_SUM = "sum";
/** The Constant STATS_TYPE_SUMSQ. */
public static final String STATS_TYPE_SUMSQ = "sumsq";
/** The Constant STATS_TYPE_SUMOFLOGS. */
public static final String STATS_TYPE_SUMOFLOGS = "sumoflogs";
/** The Constant STATS_TYPE_VARIANCE. */
public static final String STATS_TYPE_VARIANCE = "variance";
/** The Constant STATS_TYPE_ALL. */
public static final String STATS_TYPE_ALL = "all";
/** The Constant STATS_FUNCTION_DISTRIBUTION. */
public static final String STATS_FUNCTION_DISTRIBUTION = "distribution";
/** The Constant SORT_TERM. */
public static final String SORT_TERM = "term";
/** The Constant SORT_ASC. */
public static final String SORT_ASC = "asc";
/** The Constant SORT_DESC. */
public static final String SORT_DESC = "desc";
/** The Constant STATS_FUNCTIONS. */
private static final List<String> STATS_FUNCTIONS = Arrays
.asList(STATS_FUNCTION_DISTRIBUTION);
/** The Constant STATS_TYPES. */
private static final List<String> STATS_TYPES = Arrays.asList(
STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_KURTOSIS, STATS_TYPE_MAX,
STATS_TYPE_MEAN, STATS_TYPE_MIN, STATS_TYPE_N, STATS_TYPE_MEDIAN,
STATS_TYPE_POPULATIONVARIANCE, STATS_TYPE_QUADRATICMEAN,
STATS_TYPE_SKEWNESS, STATS_TYPE_STANDARDDEVIATION, STATS_TYPE_SUM,
STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS, STATS_TYPE_VARIANCE);
/** The Constant STATS_BASIC_TYPES. */
private static final List<String> STATS_BASIC_TYPES = Arrays
.asList(STATS_TYPE_N, STATS_TYPE_SUM, STATS_TYPE_MEAN);
/** The Constant STATS_ADVANCED_TYPES. */
private static final List<String> STATS_ADVANCED_TYPES = Arrays.asList(
STATS_TYPE_MAX, STATS_TYPE_MIN, STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS,
STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_STANDARDDEVIATION,
STATS_TYPE_VARIANCE, STATS_TYPE_POPULATIONVARIANCE,
STATS_TYPE_QUADRATICMEAN);
/** The Constant STATS_FULL_TYPES. */
private static final List<String> STATS_FULL_TYPES = Arrays
.asList(STATS_TYPE_KURTOSIS, STATS_TYPE_MEDIAN, STATS_TYPE_SKEWNESS);
/** The Constant STATS_BASIC. */
public static final String STATS_BASIC = "basic";
/** The Constant STATS_ADVANCED. */
public static final String STATS_ADVANCED = "advanced";
/** The Constant STATS_FULL. */
public static final String STATS_FULL = "full";
/** The Constant DATA_TYPE_LONG. */
public static final String DATA_TYPE_LONG = "long";
/** The Constant DATA_TYPE_DOUBLE. */
public static final String DATA_TYPE_DOUBLE = "double";
/** The fp stats items. */
private static Pattern fpStatsItems = Pattern
.compile("(([^\\(,]+)(\\([^\\)]*\\))?)");
/** The fp stats function items. */
private static Pattern fpStatsFunctionItems = Pattern
.compile("(([^\\(,]+)(\\(([^\\)]*)\\)))");
/**
* Instantiates a new codec util.
*/
private CodecUtil() {
// don't do anything
}
/**
* Checks if is single position prefix.
*
* @param fieldInfo
* the field info
* @param prefix
* the prefix
* @return true, if is single position prefix
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public static boolean isSinglePositionPrefix(FieldInfo fieldInfo,
String prefix) throws IOException {
if (fieldInfo == null) {
throw new IOException("no fieldInfo");
} else {
String info = fieldInfo.getAttribute(
MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION);
if (info == null) {
throw new IOException("no "
+ MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION);
} else {
return Arrays.asList(info.split(Pattern.quote(MtasToken.DELIMITER)))
.contains(prefix);
}
}
}
/**
* Term value.
*
* @param term
* the term
* @return the string
*/
public static String termValue(String term) {
int i = term.indexOf(MtasToken.DELIMITER);
String value = null;
if (i >= 0) {
value = term.substring((i + MtasToken.DELIMITER.length()));
value = (value.length() > 0) ? value : null;
}
return (value == null) ? null : value.replace("\u0000", "");
}
/**
* Term prefix.
*
* @param term
* the term
* @return the string
*/
public static String termPrefix(String term) {
int i = term.indexOf(MtasToken.DELIMITER);
String prefix = term;
if (i >= 0) {
prefix = term.substring(0, i);
}
return prefix.replace("\u0000", "");
}
/**
* Term prefix value.
*
* @param term
* the term
* @return the string
*/
public static String termPrefixValue(String term) {
return (term == null) ? null : term.replace("\u0000", "");
}
/**
* Collect field.
*
* @param field
* the field
* @param searcher
* the searcher
* @param rawReader
* the raw reader
* @param fullDocList
* the full doc list
* @param fullDocSet
* the full doc set
* @param fieldStats
* the field stats
* @throws IllegalAccessException
* the illegal access exception
* @throws IllegalArgumentException
* the illegal argument exception
* @throws InvocationTargetException
* the invocation target exception
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public static void collectField(String field, IndexSearcher searcher,
IndexReader rawReader, ArrayList<Integer> fullDocList,
ArrayList<Integer> fullDocSet, ComponentField fieldStats, Status status)
throws IllegalAccessException, IllegalArgumentException,
InvocationTargetException, IOException {
if (fieldStats != null) {
IndexReader reader = searcher.getIndexReader();
HashMap<MtasSpanQuery, SpanWeight> spansQueryWeight = new HashMap<>();
// only if spanQueryList is not empty
if (fieldStats.spanQueryList.size() > 0) {
final float boost = 0;
for (MtasSpanQuery sq : fieldStats.spanQueryList) {
spansQueryWeight.put(sq, ((MtasSpanQuery) sq.rewrite(reader))
.createWeight(searcher, false, boost));
}
}
// collect
CodecCollector.collectField(field, searcher, reader, rawReader,
fullDocList, fullDocSet, fieldStats, spansQueryWeight, status);
}
}
/**
* Collect collection.
*
* @param reader
* the reader
* @param fullDocSet
* the full doc set
* @param collectionInfo
* the collection info
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public static void collectCollection(IndexReader reader,
List<Integer> fullDocSet, ComponentCollection collectionInfo)
throws IOException {
if (collectionInfo != null) {
CodecCollector.collectCollection(reader, fullDocSet, collectionInfo);
}
}
/**
* Creates the stats items.
*
* @param statsType
* the stats type
* @return the sorted set
* @throws IOException
* Signals that an I/O exception has occurred.
*/
static SortedSet<String> createStatsItems(String statsType)
throws IOException {
SortedSet<String> statsItems = new TreeSet<>();
SortedSet<String> functionItems = new TreeSet<>();
if (statsType != null) {
Matcher m = fpStatsItems.matcher(statsType.trim());
while (m.find()) {
String tmpStatsItem = m.group(2).trim();
if (STATS_TYPES.contains(tmpStatsItem)) {
statsItems.add(tmpStatsItem);
} else if (tmpStatsItem.equals(STATS_TYPE_ALL)) {
for (String type : STATS_TYPES) {
statsItems.add(type);
}
} else if (STATS_FUNCTIONS.contains(tmpStatsItem)) {
if (m.group(3) == null) {
throw new IOException("'" + tmpStatsItem + "' should be called as '"
+ tmpStatsItem + "()' with an optional argument");
} else {
functionItems.add(m.group(1).trim());
}
} else {
throw new IOException("unknown statsType '" + tmpStatsItem + "'");
}
}
}
if (statsItems.size() == 0 && functionItems.size() == 0) {
statsItems.add(STATS_TYPE_SUM);
statsItems.add(STATS_TYPE_N);
statsItems.add(STATS_TYPE_MEAN);
}
if (functionItems.size() > 0) {
statsItems.addAll(functionItems);
}
return statsItems;
}
/**
* Creates the stats type.
*
* @param statsItems
* the stats items
* @param sortType
* the sort type
* @param functionParser
* the function parser
* @return the string
*/
static String createStatsType(Set<String> statsItems, String sortType,
MtasFunctionParserFunction functionParser) {
String statsType = STATS_BASIC;
for (String statsItem : statsItems) {
if (STATS_FULL_TYPES.contains(statsItem)) {
statsType = STATS_FULL;
break;
} else if (STATS_ADVANCED_TYPES.contains(statsItem)) {
statsType = STATS_ADVANCED;
} else if (statsType != STATS_ADVANCED
&& STATS_BASIC_TYPES.contains(statsItem)) {
statsType = STATS_BASIC;
} else {
Matcher m = fpStatsFunctionItems.matcher(statsItem.trim());
if (m.find()) {
if (STATS_FUNCTIONS.contains(m.group(2).trim())) {
statsType = STATS_FULL;
break;
}
}
}
}
if (sortType != null && STATS_TYPES.contains(sortType)) {
if (STATS_FULL_TYPES.contains(sortType)) {
statsType = STATS_FULL;
} else if (STATS_ADVANCED_TYPES.contains(sortType)) {
statsType = (statsType == null || statsType != STATS_FULL)
? STATS_ADVANCED : statsType;
}
}
return statsType;
}
/**
* Checks if is stats type.
*
* @param type
* the type
* @return true, if is stats type
*/
public static boolean isStatsType(String type) {
return STATS_TYPES.contains(type);
}
}