字符串
kmp/扩展kmp
kmp:
可以解决的问题:
1)一个串在另一个串中出现的次数
2)最小循环节/使一个串存在两个或两个以上循环增加的最少字符数
3)使一个串变成回文串增加的最小字符数(将原串倒置作为模式串去和原串匹配,len减去匹配到的最大长度即为需要补充的最少字符数,2*len-maxlen为增加字符后的回文长度,代码和kmp一样)
数组 next[i](i从1开始算)代表着,除去第i个数,在一个字符串里面从第一个数到第(i-1)字符串前缀与后缀最长重复的长度。
求nex数组时有两个变量i,j,i指向每个字符,j指向该字符下的nex[i]
求循环节:
l = Len - nex[len]
如果len%l == 0,说明最小循环节长度为l,否则需要曾加l - nex[len] % l.
代码:
#include <bits/stdc++.h> #include <iostream> #include <cstdio> #define ll long long #define inf 0x3f3f3f3f const int mod = 1e9+7; const int msize = 50005; using namespace std; int nex[100005]; char s[100005]; void getnext() { nex[0] = -1; int i,j; i = 0,j = -1; int len = strlen(s); while(i < len){ if(j == -1 || s[i] == s[j]) { i++; j++; nex[i] = j; } else { j = nex[j]; } } } int main() { int t; scanf("%d",&t); while(t--){ scanf("%s",s); getnext(); int len = strlen(s); int l = len - nex[len]; if(len % l == 0 && nex[len] != 0) printf("0\n"); else printf("%d\n",l - nex[len] % l); } return 0; }kmp匹配模板
#include <iostream> #include <algorithm> #include <cstdio> #include <cmath> #include <cstring> const int mod = 1e6+7; using namespace std; int nxt[105]; char s[105],p[105]; void getnext() { int l = strlen(p); int i = 0,j = -1; nxt[0] = -1; while(i < l && j < l){ if(j == -1 || p[i] == p[j]){ i++; j++; nxt[i] = j; } else j = nxt[j]; } } int kmp() { int l1 = strlen(s),l2 = strlen(p); int i = 0, j = 0; while(i < l1 && j < l2){ if(j == -1 || s[i] == p[j]){ i++; j++; } else j = nxt[j]; if(j == l2) return i - l2; } return -1; } int main() { cin >> s >> p; getnext(); cout << kmp(); return 0; }
单hash/双hash
单hash
将每一个字符串按一定的进制转化成数字,这个进制最好选择比最大字符大的素数。
例如12345,按十进制转化
p = 233,133,1331
mod = 1e9+7,998244353,1000001011
公式:
hash[i] = hash[i-1]*p + s[i] (其实就是进制转化公式,只不过合并起来了,到最后一个的时候拆开看就是进制转化公式)
p[i] = p[i - 1]*p
要求[l,r]的hash值
hash[l,r] = hash[r] - hash[l - 1] * p[r - l + 1]
代码:
#include <bits/stdc++.h> #include <iostream> #include <cstdio> #include <stdlib.h> #include <string.h> #define ll long long #define ull unsigned long long #define inf 0x3f3f3f3f const int mod = 1e9+7; int p1 = 133; int p2 = 233; int mod1 = 998244353; int mod2 = 1000001011; const int msize = 50005; using namespace std; ull h1[3050],h2[3050],p[3050]; set<int>q; int main() { int n; char s[1505]; scanf("%d",&n); unsigned ll t,t1,res; for(int i = 1; i <= n; i++){ scanf("%s",s); t = res = 0; int len = strlen(s); for(int i = 0; i < len; i++){ if(s[i] >= '0' && s[i] <= '9'){ t1 = s[i] - '0'; } else if(s[i] >= 'a' && s[i] <= 'z'){ t1 = s[i] - 'a'; } else t1 = s[i] - 'A'; t = t*26 + t1; } q.insert(t); } cout << q.size(); }
代码:
#include <bits/stdc++.h> #include <iostream> #include <cstdio> #include <stdlib.h> #include <string.h> #define ll long long #define ull unsigned long long #define inf 0x3f3f3f3f const int mod = 1e9+7; int p1 = 133; int p2 = 233; int mod1 = 998244353; int mod2 = 1000001011; const int msize = 50005; using namespace std; ull h1[1000005],h2[1000005],p[1000005]; int main() { int n,m; char s[1000005]; scanf("%s",s+1); scanf("%d",&m); int len = strlen(s+1); unsigned ll t,t1,res; h1[1] = s[1]; p[0] = 1; p[1] = p2; for(int i = 2; i <= len; i++){ h1[i] = h1[i - 1] * p2 + s[i]; p[i] = p[i - 1] * p2; } while(m--){ int l1,r1,l2,r2; scanf("%d%d%d%d",&l1,&r1,&l2,&r2); ull val1 = h1[r1] - h1[l1 - 1] * p[r1 - l1 + 1]; ull val2 = h1[r2] - h1[l2 - 1] * p[r2 - l2 + 1]; if(val1 == val2) cout << "Yes\n"; else cout << "No\n"; } }
欧涛爬树:hash+树形dp
用vector存图和直接求字符串会爆内存,所有要用链式前向星存图,字符串转化为hash值。
#include <iostream> #include <cstdio> #include <algorithm> #include <stack> #include <queue> #include <string.h> #include <cmath> #include <bitset> #include <set> #include <map> #include <list> #define ll long long const int inf = 0x3f3f3f3f; const int mod = 1e9; const ll ds = 1e15+7; const double p = 3.141592653589793238462643383; const ll res = 233; using namespace std; struct edge{ int v,next; }edge[5000005*2]; int head[5000005],cnt = 0; string s; //vector<int>a[5000005]; set<ll>st; void add(int u,int v) { edge[cnt].v = v; edge[cnt].next = head[u]; head[u] = cnt++; } void init() { for(int i = 0; i < s.size()+5; i++){ head[i] = -1; } } void dfs(int son,int fa,ll sum) { int t = 0;//记录孩子个数 for(int i = head[son]; ~i; i = edge[i].next){ int v = edge[i].v; if(v == fa) continue; t++; dfs(v,son,sum*res + s[v]); } if(t == 0) st.insert(sum);//没有孩子,则到达叶子节点,也就到了最远处,插入集合中。 } int main() { int n,u,v; while(~scanf("%d",&n)){ cin >> s; cnt = 0; init(); for(int i = 1; i < n; i++){ scanf("%d%d",&u,&v); add(u-1,v-1); add(v-1,u-1); } dfs(0,-1,(ll)(s[0])); printf("%d\n",st.size()); st.clear(); s.clear(); } return 0; }
双hash
为了避免冲突,对字符串按另一种进制转化,如果同时等于两种进制的值,则认为这两个字符串相等。
马拉车
#include <bits/stdc++.h> #include <iostream> #include <cstdio> #include <stdlib.h> #include <string.h> #define ll long long #define ull unsigned long long #define inf 0x3f3f3f3f const int mod = 1e9+7; //998244353; const int msize = 50005; using namespace std; const int MAXN = 110005; char s[MAXN<<1]; int p[MAXN<<1],l,r; int Manacher() { int len = strlen(s),maxp = 0,maxl = 0;//maxp是最长回文串的中心位置 for(int i = len; i >= 0; i--){ s[i*2+2] = s[i]; s[i*2+1] = '#'; } s[0] = '*';//最前面要加一个字符 for(int i = 2; i < 2*len+1; i++){ if(p[maxp] + maxp > i) p[i] = min(p[2*maxp-i],p[maxp]+maxp-i); else p[i]=1; while(s[i-p[i]] == s[i+p[i]]) p[i]++; if(p[maxp] + maxp < i+p[i])maxp = i; if(maxl < p[i]){ l = (i-p[i])/2;//最长回文的左右边界 r = (i+p[i])/2-2; maxl = p[i]; } } //int ans=0; //for(int i=0; i < 2*len+1; i++) ans += p[i]/2; //ans是回文串的个数 return maxl-1;//最长回文串的长度 } int main() { while(~scanf("%s",s)){ printf("%d\n",Manacher()); } return 0; }