Trie插入和搜索实现原理和代码实现

2021年4月16日19:01:24 发表评论 741 次浏览

本文概述

Trie是一种高效的信息检索数据结构。使用Trie,可以使搜索复杂度达到最优限度(密钥长度)。如果我们将键存储在二叉搜索树中,一个良好平衡的BST所需要的时间将与M * log N成比例,其中M是最大字符串长度,N是树中键的数量。使用Trie,我们可以在O(M)时间内搜索密钥。然而,罚款是在Trie存储要求上(请参阅Trie的申请以了解更多详情)

Trie插入和搜索实现原理和代码实现

每个Trie节点由多个分支组成。每个分支表示键的一个可能字符。我们需要将每个键的最后一个节点标记为单词节点的结尾。Trie节点字段isEndOfWord用于区分节点作为单词节点的结尾。用一个简单的结构来表示英语字母表的节点可以如下所示:

// Trie节点

struct TrieNode

{

struct TrieNode * children [ALPHABET_SIZE];

// 如果节点的isEndOfWord为真, 表示单词的末尾

bool isEndOfWord;

};

将密钥插入Trie是一种简单的方法。输入键的每个字符都作为一个单独的Trie节点插入。请注意孩子们是指向下一级Trie节点的指针(或引用)的数组。关键字符充当数组的索引孩子们。如果输入键是新键或现有键的扩展, 则需要构造键的不存在节点, 并将单词的结尾标记为最后一个节点。如果输入键是Trie中现有键的前缀, 我们只需将键的最后一个节点标记为单词的结尾即可。密钥长度确定Trie深度。

搜索键类似于插入操作, 但是, 我们仅比较字符并向下移动。搜索可能由于字符串的结尾或Trie中缺少关键字而终止。在前一种情况下, 如果isEndofWord最后一个节点的字段为true, 则密钥存在于trie中。在第二种情况下, 搜索不会终止, 而不会检查键的所有字符, 因为该键不在Trie中。

下图说明了如何使用以下示例中给出的键构造trie,

root
                    /  \    \
                    t   a     b
                    |   |     |
                    h   n     y
                    |   |  \  |
                    e   s  y  e
                 / |   |
                 i  r   w
                 |  |   |
                 r  e   e
                        |
                        r

在图片中, 每个字符都是类型trie_node_t。例如, 根是trie_node_t类型, 它是子级一种, b和Ť填充后, root的所有其他节点将为NULL。类似地, 下一级别的" a"只有一个孩子(" n"), 所有其他孩子均为NULL。叶节点在蓝色.

插入和搜索费用O(key_length), 但是Trie的内存要求是O(ALPHABET_SIZE * key_length * N)其中N是Trie中的键数。有效表示Trie节点(例如压缩的Trie, 三元搜索树等)以最小化trie的内存要求。

C++

//C++ implementation of search and insert
//operations on Trie
#include <bits/stdc++.h>
using namespace std;
  
const int ALPHABET_SIZE = 26;
  
//trie node
struct TrieNode
{
     struct TrieNode *children[ALPHABET_SIZE];
  
     //isEndOfWord is true if the node represents
     //end of a word
     bool isEndOfWord;
};
  
//Returns new trie node (initialized to NULLs)
struct TrieNode *getNode( void )
{
     struct TrieNode *pNode =  new TrieNode;
  
     pNode->isEndOfWord = false ;
  
     for ( int i = 0; i <ALPHABET_SIZE; i++)
         pNode->children[i] = NULL;
  
     return pNode;
}
  
//If not present, inserts key into trie
//If the key is prefix of trie node, just
//marks leaf node
void insert( struct TrieNode *root, string key)
{
     struct TrieNode *pCrawl = root;
  
     for ( int i = 0; i <key.length(); i++)
     {
         int index = key[i] - 'a' ;
         if (!pCrawl->children[index])
             pCrawl->children[index] = getNode();
  
         pCrawl = pCrawl->children[index];
     }
  
     //mark last node as leaf
     pCrawl->isEndOfWord = true ;
}
  
//Returns true if key presents in trie, else
//false
bool search( struct TrieNode *root, string key)
{
     struct TrieNode *pCrawl = root;
  
     for ( int i = 0; i <key.length(); i++)
     {
         int index = key[i] - 'a' ;
         if (!pCrawl->children[index])
             return false ;
  
         pCrawl = pCrawl->children[index];
     }
  
     return (pCrawl != NULL && pCrawl->isEndOfWord);
}
  
//Driver
int main()
{
     //Input keys (use only 'a' through 'z'
     //and lower case)
     string keys[] = { "the" , "a" , "there" , "answer" , "any" , "by" , "bye" , "their" };
     int n = sizeof (keys)/sizeof (keys[0]);
  
     struct TrieNode *root = getNode();
  
     //Construct trie
     for ( int i = 0; i <n; i++)
         insert(root, keys[i]);
  
     //Search for different keys
     search(root, "the" )? cout <<"Yes\n" :
                          cout <<"No\n" ;
     search(root, "these" )? cout <<"Yes\n" :
                            cout <<"No\n" ;
     return 0;
}

C

//C implementation of search and insert operations
//on Trie
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
  
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
  
//Alphabet size (# of symbols)
#define ALPHABET_SIZE (26)
  
//Converts key current character into index
//use only 'a' through 'z' and lower case
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
  
//trie node
struct TrieNode
{
     struct TrieNode *children[ALPHABET_SIZE];
  
     //isEndOfWord is true if the node represents
     //end of a word
     bool isEndOfWord;
};
  
//Returns new trie node (initialized to NULLs)
struct TrieNode *getNode( void )
{
     struct TrieNode *pNode = NULL;
  
     pNode = ( struct TrieNode *) malloc ( sizeof ( struct TrieNode));
  
     if (pNode)
     {
         int i;
  
         pNode->isEndOfWord = false ;
  
         for (i = 0; i <ALPHABET_SIZE; i++)
             pNode->children[i] = NULL;
     }
  
     return pNode;
}
  
//If not present, inserts key into trie
//If the key is prefix of trie node, just marks leaf node
void insert( struct TrieNode *root, const char *key)
{
     int level;
     int length = strlen (key);
     int index;
  
     struct TrieNode *pCrawl = root;
  
     for (level = 0; level <length; level++)
     {
         index = CHAR_TO_INDEX(key[level]);
         if (!pCrawl->children[index])
             pCrawl->children[index] = getNode();
  
         pCrawl = pCrawl->children[index];
     }
  
     //mark last node as leaf
     pCrawl->isEndOfWord = true ;
}
  
//Returns true if key presents in trie, else false
bool search( struct TrieNode *root, const char *key)
{
     int level;
     int length = strlen (key);
     int index;
     struct TrieNode *pCrawl = root;
  
     for (level = 0; level <length; level++)
     {
         index = CHAR_TO_INDEX(key[level]);
  
         if (!pCrawl->children[index])
             return false ;
  
         pCrawl = pCrawl->children[index];
     }
  
     return (pCrawl != NULL && pCrawl->isEndOfWord);
}
  
//Driver
int main()
{
     //Input keys (use only 'a' through 'z' and lower case)
     char keys[][8] = { "the" , "a" , "there" , "answer" , "any" , "by" , "bye" , "their" };
  
     char output[][32] = { "Not present in trie" , "Present in trie" };
  
  
     struct TrieNode *root = getNode();
  
     //Construct trie
     int i;
     for (i = 0; i <ARRAY_SIZE(keys); i++)
         insert(root, keys[i]);
  
     //Search for different keys
     printf ( "%s --- %s\n" , "the" , output[search(root, "the" )] );
     printf ( "%s --- %s\n" , "these" , output[search(root, "these" )] );
     printf ( "%s --- %s\n" , "their" , output[search(root, "their" )] );
     printf ( "%s --- %s\n" , "thaw" , output[search(root, "thaw" )] );
  
     return 0;
}

Java

//Java implementation of search and insert operations
//on Trie
public class Trie {
      
     //Alphabet size (# of symbols)
     static final int ALPHABET_SIZE = 26 ;
      
     //trie node
     static class TrieNode
     {
         TrieNode[] children = new TrieNode[ALPHABET_SIZE];
       
         //isEndOfWord is true if the node represents
         //end of a word
         boolean isEndOfWord;
          
         TrieNode(){
             isEndOfWord = false ;
             for ( int i = 0 ; i <ALPHABET_SIZE; i++)
                 children[i] = null ;
         }
     };
       
     static TrieNode root; 
      
     //If not present, inserts key into trie
     //If the key is prefix of trie node, //just marks leaf node
     static void insert(String key)
     {
         int level;
         int length = key.length();
         int index;
       
         TrieNode pCrawl = root;
       
         for (level = 0 ; level <length; level++)
         {
             index = key.charAt(level) - 'a' ;
             if (pCrawl.children[index] == null )
                 pCrawl.children[index] = new TrieNode();
       
             pCrawl = pCrawl.children[index];
         }
       
         //mark last node as leaf
         pCrawl.isEndOfWord = true ;
     }
       
     //Returns true if key presents in trie, else false
     static boolean search(String key)
     {
         int level;
         int length = key.length();
         int index;
         TrieNode pCrawl = root;
       
         for (level = 0 ; level <length; level++)
         {
             index = key.charAt(level) - 'a' ;
       
             if (pCrawl.children[index] == null )
                 return false ;
       
             pCrawl = pCrawl.children[index];
         }
       
         return (pCrawl != null && pCrawl.isEndOfWord);
     }
       
     //Driver
     public static void main(String args[])
     {
         //Input keys (use only 'a' through 'z' and lower case)
         String keys[] = { "the" , "a" , "there" , "answer" , "any" , "by" , "bye" , "their" };
       
         String output[] = { "Not present in trie" , "Present in trie" };
       
       
         root = new TrieNode();
       
         //Construct trie
         int i;
         for (i = 0 ; i <keys.length ; i++)
             insert(keys[i]);
       
         //Search for different keys
         if (search( "the" ) == true )
             System.out.println( "the --- " + output[ 1 ]);
         else System.out.println( "the --- " + output[ 0 ]);
          
         if (search( "these" ) == true )
             System.out.println( "these --- " + output[ 1 ]);
         else System.out.println( "these --- " + output[ 0 ]);
          
         if (search( "their" ) == true )
             System.out.println( "their --- " + output[ 1 ]);
         else System.out.println( "their --- " + output[ 0 ]);
          
         if (search( "thaw" ) == true )
             System.out.println( "thaw --- " + output[ 1 ]);
         else System.out.println( "thaw --- " + output[ 0 ]);
         
     }
}
//This code is contributed by Sumit Ghosh

python

# Python program for insert and search
# operation in a Trie
  
class TrieNode:
      
     # Trie node class
     def __init__( self ):
         self .children = [ None ] * 26
  
         # isEndOfWord is True if node represent the end of the word
         self .isEndOfWord = False
  
class Trie:
      
     # Trie data structure class
     def __init__( self ):
         self .root = self .getNode()
  
     def getNode( self ):
      
         # Returns new trie node (initialized to NULLs)
         return TrieNode()
  
     def _charToIndex( self , ch):
          
         # private helper function
         # Converts key current character into index
         # use only 'a' through 'z' and lower case
          
         return ord (ch) - ord ( 'a' )
  
  
     def insert( self , key):
          
         # If not present, inserts key into trie
         # If the key is prefix of trie node, # just marks leaf node
         pCrawl = self .root
         length = len (key)
         for level in range (length):
             index = self ._charToIndex(key[level])
  
             # if current character is not present
             if not pCrawl.children[index]:
                 pCrawl.children[index] = self .getNode()
             pCrawl = pCrawl.children[index]
  
         # mark last node as leaf
         pCrawl.isEndOfWord = True
  
     def search( self , key):
          
         # Search key in the trie
         # Returns true if key presents 
         # in trie, else false
         pCrawl = self .root
         length = len (key)
         for level in range (length):
             index = self ._charToIndex(key[level])
             if not pCrawl.children[index]:
                 return False
             pCrawl = pCrawl.children[index]
  
         return pCrawl ! = None and pCrawl.isEndOfWord
  
# driver function
def main():
  
     # Input keys (use only 'a' through 'z' and lower case)
     keys = [ "the" , "a" , "there" , "anaswe" , "any" , "by" , "their" ]
     output = [ "Not present in trie" , "Present in trie" ]
  
     # Trie object
     t = Trie()
  
     # Construct trie
     for key in keys:
         t.insert(key)
  
     # Search for different keys
     print ( "{} ---- {}" . format ( "the" , output[t.search( "the" )]))
     print ( "{} ---- {}" . format ( "these" , output[t.search( "these" )]))
     print ( "{} ---- {}" . format ( "their" , output[t.search( "their" )]))
     print ( "{} ---- {}" . format ( "thaw" , output[t.search( "thaw" )]))
  
if __name__ = = '__main__' :
     main()
  
# This code is contributed by Atul Kumar (www.facebook.com/atul.kr.007)

C#

//C# implementation of search and 
//insert operations on Trie
using System;
      
public class Trie 
{
      
     //Alphabet size (# of symbols)
     static readonly int ALPHABET_SIZE = 26;
      
     //trie node
     class TrieNode
     {
         public TrieNode[] children = new TrieNode[ALPHABET_SIZE];
      
         //isEndOfWord is true if the node represents
         //end of a word
         public bool isEndOfWord;
          
         public TrieNode()
         {
             isEndOfWord = false ;
             for ( int i = 0; i <ALPHABET_SIZE; i++)
                 children[i] = null ;
         }
     };
      
     static TrieNode root; 
      
     //If not present, inserts key into trie
     //If the key is prefix of trie node, //just marks leaf node
     static void insert(String key)
     {
         int level;
         int length = key.Length;
         int index;
      
         TrieNode pCrawl = root;
      
         for (level = 0; level <length; level++)
         {
             index = key[level] - 'a' ;
             if (pCrawl.children[index] == null )
                 pCrawl.children[index] = new TrieNode();
      
             pCrawl = pCrawl.children[index];
         }
      
         //mark last node as leaf
         pCrawl.isEndOfWord = true ;
     }
      
     //Returns true if key 
     //presents in trie, else false
     static bool search(String key)
     {
         int level;
         int length = key.Length;
         int index;
         TrieNode pCrawl = root;
      
         for (level = 0; level <length; level++)
         {
             index = key[level] - 'a' ;
      
             if (pCrawl.children[index] == null )
                 return false ;
      
             pCrawl = pCrawl.children[index];
         }
      
         return (pCrawl != null && pCrawl.isEndOfWord);
     }
      
     //Driver
     public static void Main()
     {
         //Input keys (use only 'a' 
         //through 'z' and lower case)
         String []keys = { "the" , "a" , "there" , "answer" , "any" , "by" , "bye" , "their" };
      
         String []output = { "Not present in trie" , "Present in trie" };
      
      
         root = new TrieNode();
      
         //Construct trie
         int i;
         for (i = 0; i <keys.Length ; i++)
             insert(keys[i]);
      
         //Search for different keys
         if (search( "the" ) == true )
             Console.WriteLine( "the --- " + output[1]);
         else Console.WriteLine( "the --- " + output[0]);
          
         if (search( "these" ) == true )
             Console.WriteLine( "these --- " + output[1]);
         else Console.WriteLine( "these --- " + output[0]);
          
         if (search( "their" ) == true )
             Console.WriteLine( "their --- " + output[1]);
         else Console.WriteLine( "their --- " + output[0]);
          
         if (search( "thaw" ) == true )
             Console.WriteLine( "thaw --- " + output[1]);
         else Console.WriteLine( "thaw --- " + output[0]);
          
     }
}
  
/* This code contributed by PrinciRaj1992 */

输出:

the --- Present in trie
these --- Not present in trie
their --- Present in trie
thaw --- Not present in trie

如果发现任何不正确的地方, 或者想分享有关上述主题的更多信息, 请写评论。

木子山

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: