Trie树（字典树）-白红宇

Trie树（字典树）

阅读量：2378 次

发布时间：2019-05-10

本文共 3239 字，大约阅读时间需要 10 分钟。

Trie树（字典树）

什么是Trie树

Trie树是一种多叉树结构，通常用来统计和排序大量的字符串。

上图所示就是一棵Trie树。该树包含的字符串集合为{“at”, “bee”, “ben”, “bt”, “q”}。

Trie树有如下特点：

根节点不包含字符，除根节点外每一个节点都只包含一个字符。

从根节点到某一节点，路径上经过的字符连接起来，为该节点对应的字符串。

每个节点的所有子节点包含的字符都不相同。
Trie树将字符串存储在一条路径上。

Trie树的优缺点

Trie树是以空间换时间。用字符串的公共前缀来降低查询时间开销以达到提高效率的目的。

优点：

可以最大限度减少无畏的字符串比较，查询效率高。它插入和查询的时间复杂度都为O(k),其中k为key的长度。

Trie树中不同的关键字不会产生碰撞

Trie树可以对关键字按字典序排序

缺点：

空间消耗大

当hash函数很好时，效率不如哈希搜索

Trie树的应用

字符串检索：对于建立好的Trie树，查询是只需要从根节点开始一个个字符比较即可

词频统计：统计单词出现的次数。这时Trie树节点通常有一个count变量统计字符串出现的次数

字符串排序：可以先插入，然后先序遍历输出字符串

前缀匹配：找到以某个字符串开头的字符串

Trie树的数据结构

利用字符串创建一棵Trie树，这个Trie树保留了字符串的公共前缀。以小写英文单词创建字典树为例，每一个节点都有26个孩子(有26个字母)。

结构如下：

//Trie nodetypedef struct TrieNode{    int count;//统计单词前缀出现的次数    bool exist; //该节点是否存在    struct TrieNode* next[26];//指向子树的指针}TrieNode, *Trie;

简单实现

#include 
   
    #include 
    
     #include 
     
      #include 
      
       #include 
       
        #pragma warning(disable:4996)//vs//Trie nodetypedef struct TrieNode{    int count;//统计单词前缀出现的次数    struct TrieNode* next[26];//指向子树的指针    bool exist; //该节点是否存在}TrieNode, *Trie;//创建一个节点TrieNode* CreateTrieNode(){    TrieNode* node = (TrieNode*)malloc(sizeof(TrieNode));    assert(node);    //if (node == NULL)    //{
       //    printf("Create Trie node error: Out of memoey!\n");    //    return NULL;    //}    node->count = 0;    node->exist = false;    memset(node->next, 0, sizeof(node->next));    return node;}//插入到Trie中void TrieInsert(Trie root, char* word){    TrieNode* node = root;    char* p = word;    int id = 0;    //遍历每一个char    while (*p)    {        id = *p - 'a'; // find the char position in the child        if (node->next[id] == NULL) //不存在*p这条路径        {            node->next[id] = CreateTrieNode();        }        node = node->next[id];//指针向下移动.这里必须明白：每一个节点代表的是从根节点到此节点路径上所有字符组成的单词        node->count++;//前缀加1        ++p;    }    printf("insert %s ok!\n", word);    node->exist = true;}//查找，和插入类似,返回以此字符为前缀的单词的个数int TrieSearch(Trie root, char* word){    TrieNode* node = root;    char* p = word;    int id;    while (*p)    {        id = *p - 'a';        node = node->next[id];        if (node == NULL)        {            printf("not find the word:%s\n", word);            return 0;                   }        ++p;    }    printf("find the word:%s\n", word);    return node->count;}int main(){    freopen("read.txt", "r", stdin);    Trie root = CreateTrieNode();//init    char str[100][20];//100个单词，每个单词有20个字符    int n = 0;    while (~scanf("%s", str[n]))    {        TrieInsert(root, str[n++]);    }    for (int i = 0; i < 10; ++i)    {        TrieSearch(root, str[i]);    }    //无法检测的情况    char str1[20] = "carbin";    TrieSearch(root, str1);    return 0;}/*read.txt内容如下：carbohydratecartcarburetorcaramelcariboucarboniccartilagecarboncarriagecartoncarcarbonateoutput:insert carbohydrate ok!insert cart ok!insert carburetor ok!insert caramel ok!insert caribou ok!insert carbonic ok!insert cartilage ok!insert carbon ok!insert carriage ok!insert carton ok!insert car ok!insert carbonate ok!find the word:carbohydratefind the word:cartfind the word:carburetorfind the word:caramelfind the word:cariboufind the word:carbonicfind the word:cartilagefind the word:carbonfind the word:carriagefind the word:cartonnot find the word:carbin*/