00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __AHO_CROASICK_HPP__INCLUDED__
00022 #define __AHO_CROASICK_HPP__INCLUDED__
00023
00024 #include <map>
00025 #include <algorithm>
00026 #include <deque>
00027 #include "types.hpp"
00028
00032 template<class Type>
00033 class Aho_Corasick
00034 {
00035
00040 public:
00041 Aho_Corasick() {
00042 root = new TrieNode;
00043 }
00047 ~Aho_Corasick() {
00048 delete root;
00049 }
00053 void clean() {
00054 delete root;
00055 root = new TrieNode;
00056 }
00057 void add_pattern(const t_uid &patternId, int count, Type data[]);
00058 void remove_pattern(int count, Type data[]);
00059 void commit_trie();
00060 bool find_patterns(int count, Type textData[], bool(*CallbackProc)(const t_uid&,int,void*), void* context);
00061
00062 private:
00066 struct TrieNode
00067 {
00068 std::map<Type,TrieNode*> children;
00069 TrieNode* accept;
00070 TrieNode* pi;
00071 t_uid patternId;
00072 int patternSize;
00073 bool accept_copy;
00074
00075 TrieNode() {
00076 accept_copy = true;
00077 accept = 0;
00078 pi = 0;
00079 patternId.length = 0;
00080 patternSize = 0;
00081 }
00082 ~TrieNode() {
00083 typename std::map<Type, TrieNode* >::iterator it = this->children.begin();
00084 while(it != this->children.end()) {
00085 delete it->second;
00086 it++;
00087 }
00088 this->children.clear();
00089 }
00090 };
00091
00092 TrieNode* root;
00093 };
00094
00103 template<class Type>
00104 void Aho_Corasick<Type>::add_pattern(const t_uid &patternId, int count, Type data[]) {
00105 TrieNode* curr = this->root;
00106 for(int x=0;x<count;++x) {
00107 typename std::map<Type, TrieNode* >::iterator it = curr->children.find(data[x]);
00108 if(it == curr->children.end()) {
00109 TrieNode* tmp = new TrieNode;
00110 curr->children[data[x]] = tmp;
00111 curr = tmp;
00112 } else {
00113 curr = it->second;
00114 }
00115 }
00116 curr->accept = curr;
00117 curr->patternId = patternId;
00118 curr->patternSize = count;
00119 curr->accept_copy = false;
00120 }
00121
00129 template<class Type>
00130 void Aho_Corasick<Type>::remove_pattern(int count, Type data[]) {
00131 TrieNode* curr = this->root;
00132
00133 std::deque<TrieNode*> find_stack;
00134
00135 for(int x=0;x<count;++x) {
00136 typename std::map<Type, TrieNode* >::iterator it = curr->children.find(data[x]);
00137 if(it == curr->children.end()) {
00138 return;
00139 } else {
00140 find_stack.push_back(curr);
00141 curr = it->second;
00142 }
00143 }
00144
00145 curr->accept_copy = true;
00146
00147
00148 while(find_stack.size() > 1) {
00149 size_t n = find_stack.size() - 1;
00150 TrieNode* curr = find_stack.back();
00151 find_stack.pop_back();
00152
00153 if(curr->children.size() > 0) {
00154 break;
00155 }
00156 TrieNode* parent = find_stack.back();
00157 parent->children.erase(data[n]);
00158
00159 delete curr;
00160 }
00161 }
00162
00167 template<class Type>
00168 void Aho_Corasick<Type>::commit_trie() {
00169
00170
00171 root->pi = this->root;
00172 std::deque<TrieNode*> Queue;
00173 Queue.push_back(root);
00174
00175 while(Queue.size() != 0) {
00176 TrieNode* v = Queue.front();
00177 Queue.pop_front();
00178
00179 if(v->accept_copy) {
00180 v->accept = 0;
00181 }
00182
00183 for(typename std::map<Type, TrieNode* >::iterator it = v->children.begin(); it != v->children.end(); ++it) {
00184 Queue.push_back(it->second);
00185 }
00186 }
00187
00188 Queue.push_back(root);
00189
00190 while(Queue.size() != 0) {
00191 TrieNode* v = Queue.front();
00192 Queue.pop_front();
00193
00194 typename std::map<Type, TrieNode* >::iterator it = v->children.begin();
00195 while(it != v->children.end()) {
00196 if(v == root) {
00197 it->second->pi = root;
00198 } else {
00199 Type a = it->first;
00200
00201 TrieNode* s = v->pi;
00202 typename std::map<Type, TrieNode* >::iterator it2 = s->children.find(a);
00203 while(s != this->root && it2 == s->children.end()) {
00204 s = s->pi;
00205 it2 = s->children.find(a);
00206 }
00207 if( it2 != s->children.end() ) {
00208 s = it2->second;
00209 }
00210 it->second->pi = s;
00211 if(it->second->accept == 0) {
00212 it->second->accept = s->accept;
00213 it->second->patternId = s->patternId;
00214 it->second->patternSize = s->patternSize;
00215 }
00216 }
00217 Queue.push_back(it->second);
00218 it++;
00219 }
00220 }
00221 }
00222
00233 template<class Type>
00234 bool Aho_Corasick<Type>::find_patterns(int count, Type textData[], bool(*CallbackProc)(const t_uid&,int,void*), void* context) {
00235 bool at_last_one_found = false;
00236
00237 TrieNode* s = this->root;
00238 for(int i=0;i<count;++i) {
00239 typename std::map<Type, TrieNode* >::iterator it = s->children.find(textData[i]);
00240 while( s!=this->root && it == s->children.end()) {
00241 s = s->pi;
00242 it = s->children.find(textData[i]);
00243 }
00244 if( it != s->children.end()) {
00245 s = it->second;
00246 }
00247
00248 TrieNode* tmp = s->accept;
00249 while(tmp != 0) {
00250 at_last_one_found = true;
00251
00252 if(!CallbackProc(tmp->patternId,i-tmp->patternSize+1, context)) {
00253 return true;
00254 }
00255 tmp = tmp->pi->accept;
00256 }
00257 }
00258 return at_last_one_found;
00259 }
00260
00261 #endif