public class SmartcnDictUtils
extends java.lang.Object
限定符和类型 | 字段和说明 |
---|---|
static int |
CHAR_NUM_IN_FILE |
private static java.lang.String |
CHARSET_GB2312 |
static java.util.Map<java.lang.String,java.lang.Integer> |
defaultDelimiterFreqsMap |
static int |
GB2312_CHAR_NUM |
static int |
GB2312_FIRST_CHAR |
static java.lang.String |
TYPE_BIGRAM |
static java.lang.String |
TYPE_CORE |
构造器和说明 |
---|
SmartcnDictUtils() |
限定符和类型 | 方法和说明 |
---|---|
static void |
create(java.lang.String filePath,
java.lang.String type,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTFsMap,
java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap)
创建词库文件
|
private static java.lang.String |
getCCByGB2312Id(int ccid) |
private static short |
getGB2312Id(char ch) |
private static byte[] |
intToLEBytes(int i) |
private static void |
merge(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tfsMap,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tempTfsMap,
java.util.Set<java.lang.String> keys) |
static void |
mergeTFsMap(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tfsMap,
java.util.Map<java.lang.String,java.lang.Integer> source)
增加新的分词
|
static java.lang.String |
readCnTerm(java.lang.String src) |
private static void |
readCoreMemDelimeter(java.lang.String cc,
char[][] termsArray,
int[] freqArray,
java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap) |
private static void |
readCoreMemFromArrays(char[][][] wordItemCharArrayTable,
int[][] wordItemFrequencyTable,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTermFreqsMap,
java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap) |
private static void |
readCoreMemTerms(java.lang.String cc,
char[][] termsArray,
int[] freqArray,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTermFreqsMap) |
static void |
readFromCoreMem(java.io.InputStream inputStream,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTermFreqsMap,
java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap)
读取已有词库mem文件
|
static void |
skipWord(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tfsMap,
java.util.List<java.lang.String> skipWordList)
删除分词
|
private static java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> |
transSource(java.util.Map<java.lang.String,java.lang.Integer> source) |
private static void |
writeDelimiters(java.io.OutputStream oStream,
java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap) |
private static void |
writeEmpty(java.io.OutputStream oStream) |
private static void |
writeInt(java.io.OutputStream oStream,
int i) |
private static void |
writeTFs(java.io.OutputStream oStream,
java.util.Map<java.lang.String,java.lang.Integer> tfs) |
private static final java.lang.String CHARSET_GB2312
public static final int GB2312_FIRST_CHAR
public static final int GB2312_CHAR_NUM
public static final int CHAR_NUM_IN_FILE
public static final java.lang.String TYPE_CORE
public static final java.lang.String TYPE_BIGRAM
public static final java.util.Map<java.lang.String,java.lang.Integer> defaultDelimiterFreqsMap
public static java.lang.String readCnTerm(java.lang.String src)
public static void readFromCoreMem(java.io.InputStream inputStream, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTermFreqsMap, java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap) throws java.io.IOException, java.lang.ClassNotFoundException
inputStream
- charTermFreqsMap
- delimiterFreqsMap
- java.io.IOException
java.lang.ClassNotFoundException
public static void skipWord(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tfsMap, java.util.List<java.lang.String> skipWordList)
tfsMap
- skipWordList
- public static void mergeTFsMap(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tfsMap, java.util.Map<java.lang.String,java.lang.Integer> source)
tfsMap
- source
- public static void create(java.lang.String filePath, java.lang.String type, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTFsMap, java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap)
filePath
- type
- charTFsMap
- delimiterFreqsMap
- private static java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> transSource(java.util.Map<java.lang.String,java.lang.Integer> source)
private static void readCoreMemFromArrays(char[][][] wordItemCharArrayTable, int[][] wordItemFrequencyTable, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTermFreqsMap, java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap)
private static void readCoreMemTerms(java.lang.String cc, char[][] termsArray, int[] freqArray, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> charTermFreqsMap)
private static void readCoreMemDelimeter(java.lang.String cc, char[][] termsArray, int[] freqArray, java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap)
private static void merge(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tfsMap, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Integer>> tempTfsMap, java.util.Set<java.lang.String> keys)
private static void writeDelimiters(java.io.OutputStream oStream, java.util.Map<java.lang.String,java.lang.Integer> delimiterFreqsMap) throws java.lang.Exception
java.lang.Exception
private static void writeTFs(java.io.OutputStream oStream, java.util.Map<java.lang.String,java.lang.Integer> tfs) throws java.lang.Exception
java.lang.Exception
private static void writeEmpty(java.io.OutputStream oStream) throws java.lang.Exception
java.lang.Exception
private static void writeInt(java.io.OutputStream oStream, int i) throws java.lang.Exception
java.lang.Exception
private static byte[] intToLEBytes(int i)
private static java.lang.String getCCByGB2312Id(int ccid)
private static short getGB2312Id(char ch)