-
Notifications
You must be signed in to change notification settings - Fork 2
/
WordDict.h
66 lines (51 loc) · 1.62 KB
/
WordDict.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#ifndef _WORDDICT_H
#define _WORDDICT_H
#include <fstream>
#include "HMMDict.h"
//add by freeeyes
//用直接的hash替换tire树的方法,节省字典空间,效率一样
class CWordInfoPool
{
public:
CWordInfoPool();
~CWordInfoPool();
size_t Init(int nPoolCount, char* pData);
size_t Load(int nPoolCount, char* pData);
void Close();
char* GetCryptTable();
_Word_Info* Create();
int Get_Node_Offset(_Word_Info* pWordInfo);
_Word_Info* Get_NodeOffset_Ptr(int nOffset);
bool Delete(_Word_Info* pWordInfo);
private:
void prepareCryptTable();
private:
char* m_pCryptTable;
char* m_pBase; //整个内存开始地址
int m_nPoolCount;
int m_nCurrIndex;
_Word_Info* m_WordInfoList; //内存池列表
};
class CWordDict
{
public:
CWordDict();
~CWordDict();
size_t Get_Mem_Size(int nPoolSize);
bool Init(const char* pFile, const char* pHMMFile, char* pData);
bool Load(char* pData);
int Cut(const char* pSentence, vector<_Word_Param>& vecWord, int nType = SELECT_RUNE, bool blHMM = false);
int Cut_Rune(const char* pSentence, vector<_Word_Param>& vecWord, int nSentenceID = 1, int nType = SELECT_RUNE);
bool Add_Word(const char* pWord, int nRote, const char* pWordSpeech);
private:
bool Read_File_Line(ifstream& ifs, string& strLine);
bool Set_Dict_Hash_Table(vector<string>& objTempAttrList);
void Get_Sentence_ID(const char* pWord, int& nSentenceID);
void HMM_Cut_Word(vector<_Word_Param>& vecTempWord, vector<_Word_Param>& vecWord, int nType);
private:
CWordInfoPool m_WordInfoPool;
int m_nPoolSize;
CHashTable m_hashDict;
CHmmDict m_objHmmDict;
};
#endif