ac自动机——学习
简介
ac自动机是一种有确定限状态自动机
大众的理解就是在字典树上跑kmp
前置知识点
kmp, 字典树
正题
我们知道kmp是模式串与文本串进行匹配的算法,即两个串之间匹配。那我们思考这样一个问题。如果模式串有多个,文本串有一个,如何算出每个模式串在文本串上的匹配。按原有思维,我们需要对每个模式串与文本串进行一次kmp,这样的话,时间复杂度就很高了。
于是乎,树上看毛片 横空出世
ac自动机首先将我们要匹配的所有模式串建成了一个字典树。然后在字典树上连接fail边,也就是fail指针。fail指针指向的是当前点所在模式串的所有后缀与整个字典树的所有前缀的最长的位置。有点绕口。
上图中所有有向曲线为fail指针
上图为fail树,即由fail边构成的树
fail指针跟fail树都可以求解多模式匹配问题
fail指针相对来说暴力一点,直接对每个输入字符暴力往上递归到NULL点
fail树即先对每个输入字符所在位置标记,在进行bfs或者dfs从叶子节点到根节点的处理
获得相应的fail指针或者fail树之后,我们就可以开始多模式匹配了,将文本串的每个字符依次字典树匹配,在失配后跳转至相应的位置(当前点所在模式串的所有后缀与整个字典树的所有前缀的最长的位置)继续匹配。
例题
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
const int N = 1e6 + 5;
struct tree//字典树
{
int fail;//失配指针
int vis[26];//子节点
int end;//统计以该节点结尾的字符串数量
}AC[N];
int cnt = 0;
inline void insert(string s)//向字典树上插入字符串
{
int length = s.length();
int now = 0;
for(int i = 0; i < length; i ++)
{
if(!AC[now].vis[s[i] - 'a'])
AC[now].vis[s[i] - 'a'] = ++ cnt;
now = AC[now].vis[s[i] - 'a'];
}
AC[now].end ++;
}
void get_fail()//构建fail指针
{
queue <int> Q;
for(int i = 0; i < 26; i ++)
{
if(AC[0].vis[i] != 0)
{
AC[AC[0].vis[i]].fail = 0;
Q.push(AC[0].vis[i]);
}
}
while(!Q.empty())
{
int u = Q.front();
Q.pop();
for(int i = 0; i < 26; i ++)
{
if(AC[u].vis[i] != 0)
{
AC[AC[u].vis[i]].fail = AC[AC[u].fail].vis[i];
Q.push(AC[u].vis[i]);
}
else
AC[u].vis[i] = AC[AC[u].fail].vis[i];
}
}
}
int AC_Query(string s)//多模式匹配
{
int length = s.length(), now = 0, ans = 0;
for(int i = 0; i < length; i ++)
{
now = AC[now].vis[s[i] - 'a'];
for(int t = now; t && AC[t].end != -1; t = AC[t].fail)
{
ans += AC[t].end;
AC[t].end = -1;
}
}
return ans;
}
int main()
{
ios::sync_with_stdio(false);
int n;
cin >> n;
for(int i = 0; i < n; i ++)
{
string s;
cin >> s;
insert(s);
}
AC[0].fail = 0;//结束标志
get_fail();
string str;
cin >> str;
cout << AC_Query(str) << endl;
}
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
const int N = 1e6 + 5;
struct Tree
{
int fail;
int vis[26];
int end;
}AC[N];
struct node
{
int id, num;
}pp[N];
bool operator < (node a, node b)
{
if(a.num != b.num)
return a.num > b.num;
else
return a.id < b.id;
}
int cnt;
void clean(int x)
{
memset(AC[x].vis, 0, sizeof(AC[x].vis));
AC[x].end = 0;
AC[x].fail = 0;
}
void insert(string s, int id)
{
int length = s.length(), now = 0;
for(int i = 0; i < length; i ++)
{
if(!AC[now].vis[s[i] - 'a'])
{
AC[now].vis[s[i] - 'a'] = ++ cnt;
clean(cnt);
}
now = AC[now].vis[s[i] - 'a'];
}
AC[now].end = id;
}
void get_fail()
{
queue <int> Q;
for(int i = 0; i < 26; i ++)
{
if(AC[0].vis[i] != 0)
{
AC[AC[0].vis[i]].fail = 0;
Q.push(AC[0].vis[i]);
}
}
while(!Q.empty())
{
int u = Q.front();
Q.pop();
for(int i = 0; i < 26; i ++)
{
if(AC[u].vis[i] != 0)
{
AC[AC[u].vis[i]].fail = AC[AC[u].fail].vis[i];
Q.push(AC[u].vis[i]);
}
else
AC[u].vis[i] = AC[AC[u].fail].vis[i];
}
}
}
void AC_Query(string s)
{
int length = s.length(), now = 0;
for(int i = 0; i < length; i ++)
{
now = AC[now].vis[s[i] - 'a'];
for(int t = now; t; t = AC[t].fail)
{
pp[AC[t].end].num ++;
}
}
}
string str[200];
int main()
{
ios::sync_with_stdio(false);
int n;
while(cin >> n, n)
{
cnt = 0;
clean(0);
for(int i = 1; i <= n; i ++)
{
cin >> str[i];
pp[i].num = 0;
pp[i].id = i;
insert(str[i], i);
}
string s;
cin >> s;
AC[0].fail = 0;
get_fail();
AC_Query(s);
sort(pp + 1, pp + n + 1);
cout << pp[1].num << endl;
cout << str[pp[1].id] << endl;
for(int i = 2; i <= n; i ++)
{
if(pp[i].num == pp[i - 1].num)
cout << str[pp[i].id] << endl;
else
break;
}
}
}
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
const int N = 2e6 + 5;
struct Tree
{
int fail;
int vis[26];
int end;
int ans;
}AC[N];
int cnt, num[N], Mp[N], inq[N];
inline void insert(string s, int id)
{
int length = s.length(), now = 0;
for(int i = 0; i < length; i ++)
{
if(!AC[now].vis[s[i] - 'a'])
AC[now].vis[s[i] - 'a'] = ++ cnt;
now = AC[now].vis[s[i] - 'a'];
}
if(!AC[now].end)
AC[now].end = id;
Mp[id] = AC[now].end;
}
void get_fail()
{
queue <int> Q;
for(int i = 0; i < 26; i ++)
{
if(AC[0].vis[i] != 0)
{
inq[0] ++;
AC[AC[0].vis[i]].fail = 0;
Q.push(AC[0].vis[i]);
}
}
while(!Q.empty())
{
int u = Q.front();
Q.pop();
for(int i = 0; i < 26; i ++)
{
if(AC[u].vis[i] != 0)
{
AC[AC[u].vis[i]].fail = AC[AC[u].fail].vis[i];
inq[AC[AC[u].fail].vis[i]] ++;
Q.push(AC[u].vis[i]);
}
else
AC[u].vis[i] = AC[AC[u].fail].vis[i];
}
}
}
void AC_Query(string s)
{
int length = s.length(), now = 0;
for(int i = 0; i < length; i ++)
{
now = AC[now].vis[s[i] - 'a'];
AC[now].ans ++;
}
}
void topu()
{
queue <int> Q;
for(int i = 0; i <= cnt; i ++)
if(!inq[i])
Q.push(i);
while(!Q.empty())
{
int u = Q.front();
Q.pop();
num[AC[u].end] = AC[u].ans;
int v = AC[u].fail;
inq[v] --;
AC[v].ans += AC[u].ans;
if(!inq[v])
Q.push(v);
}
}
int main()
{
ios::sync_with_stdio(false);
int n;
cin >> n;
cnt = 0;
for(int i = 1; i <= n; i ++)
{
string s;
cin >> s;
insert(s, i);
}
AC[0].fail = 0;
memset(inq, 0, sizeof(inq));
get_fail();
memset(num, 0, sizeof(num));
string str;
cin >> str;
AC_Query(str);
topu();
for(int i = 1; i <= n; i ++)
cout << num[Mp[i]] << endl;
}