类 SmartcnDictUtils
java.lang.Object
com.publiccms.common.tools.SmartcnDictUtils
原始dict文件工具类 https://github.com/Kerwin23/smartcn-dict
- 作者:
- Kerwin
-
字段概要
字段 -
构造器概要
构造器 -
方法概要
修饰符和类型方法说明static void
create
(String filePath, String type, Map<String, Map<String, Integer>> charTFsMap, Map<String, Integer> delimiterFreqsMap) 创建词库文件private static String
getCCByGB2312Id
(int ccid) private static short
getGB2312Id
(char ch) private static byte[]
intToLEBytes
(int i) private static void
merge
(Map<String, Map<String, Integer>> tfsMap, Map<String, Map<String, Integer>> tempTfsMap, Set<String> keys) static void
增加新的分词static String
readCnTerm
(String src) private static void
readCoreMemDelimeter
(String cc, char[][] termsArray, int[] freqArray, Map<String, Integer> delimiterFreqsMap) private static void
readCoreMemFromArrays
(char[][][] wordItemCharArrayTable, int[][] wordItemFrequencyTable, Map<String, Map<String, Integer>> charTermFreqsMap, Map<String, Integer> delimiterFreqsMap) private static void
readCoreMemTerms
(String cc, char[][] termsArray, int[] freqArray, Map<String, Map<String, Integer>> charTermFreqsMap) static void
readFromCoreMem
(InputStream inputStream, Map<String, Map<String, Integer>> charTermFreqsMap, Map<String, Integer> delimiterFreqsMap) 读取已有词库mem文件static void
删除分词transSource
(Map<String, Integer> source) private static void
writeDelimiters
(OutputStream oStream, Map<String, Integer> delimiterFreqsMap) private static void
writeEmpty
(OutputStream oStream) private static void
writeInt
(OutputStream oStream, int i) private static void
writeTFs
(OutputStream oStream, Map<String, Integer> tfs)
-
字段详细资料
-
CHARSET_GB2312
- 另请参阅:
-
GB2312_FIRST_CHAR
public static final int GB2312_FIRST_CHAR- 另请参阅:
-
GB2312_CHAR_NUM
public static final int GB2312_CHAR_NUM- 另请参阅:
-
CHAR_NUM_IN_FILE
public static final int CHAR_NUM_IN_FILE- 另请参阅:
-
TYPE_CORE
- 另请参阅:
-
TYPE_BIGRAM
- 另请参阅:
-
defaultDelimiterFreqsMap
-
-
构造器详细资料
-
SmartcnDictUtils
private SmartcnDictUtils()
-
-
方法详细资料
-
readCnTerm
-
readFromCoreMem
public static void readFromCoreMem(InputStream inputStream, Map<String, Map<String, throws IOException, ClassNotFoundExceptionInteger>> charTermFreqsMap, Map<String, Integer> delimiterFreqsMap) 读取已有词库mem文件- 参数:
inputStream
-charTermFreqsMap
-delimiterFreqsMap
-- 抛出:
IOException
ClassNotFoundException
-
skipWord
删除分词- 参数:
tfsMap
-skipWordList
-
-
mergeTFsMap
增加新的分词- 参数:
tfsMap
-source
-
-
create
public static void create(String filePath, String type, Map<String, Map<String, Integer>> charTFsMap, Map<String, Integer> delimiterFreqsMap) 创建词库文件- 参数:
filePath
-type
-charTFsMap
-delimiterFreqsMap
-
-
transSource
-
readCoreMemFromArrays
-
readCoreMemTerms
-
readCoreMemDelimeter
-
merge
-
writeDelimiters
private static void writeDelimiters(OutputStream oStream, Map<String, Integer> delimiterFreqsMap) throws IOException- 抛出:
IOException
-
writeTFs
- 抛出:
IOException
-
writeEmpty
- 抛出:
IOException
-
writeInt
- 抛出:
IOException
-
intToLEBytes
private static byte[] intToLEBytes(int i) -
getCCByGB2312Id
-
getGB2312Id
-