后缀数组小结
后缀数组小结
后缀数组也算是拖了好久了,总算是把题单ak了,小结下就得去搞后缀自动机了,争取南京前把字符串都过一遍。
首先,后缀数组算法本身就是对一个字符串的所有后缀进行排序,从而得出两个比较有用的数组,即sa[i]和rak[i],其含义如下
sa[i]:排名第i的后缀的首字母所在下标
rak[i]:下标为i开头的后缀的排名
具体实现方法参考:后缀数组实现
通过后缀数组就引申出一个非常重要的数组,即
height[i]:排名为i的后缀与其前一名的后缀的最大公共前缀长度
具体实现方法和证明也在上面的博客中。
通过以上几个数组,我们就可以求一些常见的问题:
1.求一个字符串中l下标和r下标开始的后缀的最大公共前缀
首先思考,当已知height[2,3],要如何求出排名第一和排名第三的后缀的最大公共前缀?答案是min(height[2],height[3]),具体是为什么可以在图上画画,很简单,那么推广开来,对于任意下标l和r,要求他们的最大公共前缀,只需算出min(height[rak[l]],height[rak[l]+1]...height[rak[r]])(假设rak[l]小于rak[r],如果不是就对换)即可,而快速求出区间中最小值可用rmq算法,具体参考:RMQ算法
2.求多个字符串的公共子串等问题
利用第一个问题的解决方法,就可以很轻易地解决一个字符串内的指定后缀的公共前缀,那么如果是求多个字符串的公共子串要怎么做呢。第一反应自然是把这些字符串接起来,再利用问题1的方法求解,但是问题来了,直接拼接后求出的height数组是否是正确的呢?例如:串a为aab,串b为aaba,那么拼接后就成了aabaaba,此时我们求下标1和下标4的公共前缀长度就是4,然而实际上a串的长度只有3,结果算出了长度为4的公共子串显然是错的,所以为了避免这一点,我们要在a和b中间插入一个没出现过的字符,比如,aab#aaba,这样在就可以避免上述问题,注意,此字符一定是从没出现过的,然而在实际操作中我们发现要从ascll码中找出一些可见的没出现过的字符是不多的,所以在一些实际问题中我们可以把字符串全部转换成整形,这样我们中间插入的就可以随便选取些较大的数字了,毕竟能选的数字显然要远多于字符。
其实做了这么多后缀数组的题也基本逃不开这些,无非是要再加入些二分,dp啥的,接下来上题目。
Musical Theme
题意:给定一串数字,要求算出最长的公共子串长度,使其至少出现两次,且不能相交,并且长度至少为5,其中如果一个子串是另一个子串中所有数字加上某个字符,也算是相同子串。
思路:首先为了让两个差值为一个常数的子串相等,可以在存储的时候直接存储方差,为了避免负数,再全部加100.然后枚举长度二分,在check的时候只要存在有height大于等于mid,且这个height所代表的两个后缀长度为mid的字符串不相交即满足条件。
#include<iostream> #include<cstdio> #include<cstring> #include<cmath> #define fi first #define se second #define pb push_back #define me(a, b) memset(a, b, sizeof(a)) #define INIT() std::ios::sync_with_stdio(false) using namespace std; typedef long long ll; typedef unsigned long long ull; typedef pair<ll, ll> P; const int MAX_N = 1000000 + 5; const int MAX_M = 100000 + 5; const int INF = 0x7fffffff; const int inf = 1000000000; const double EPS = 1e-6; const ull base = 123; const ll mod = 1e4 + 7; const double pi = 4 * atan(1.0); int s[MAX_N]; int N,M,rak[MAX_N],sa[MAX_N],tax[MAX_N],tp[MAX_N]; int Height[MAX_N]; void Debug() { printf("*****************\n"); printf("下标"); for (int i = 1; i <= N; i++) printf("%d ", i); printf("\n"); printf("sa "); for (int i = 1; i <= N; i++) printf("%d ", sa[i]); printf("\n"); printf("rak "); for (int i = 1; i <= N; i++) printf("%d ", rak[i]); printf("\n"); printf("tp "); for (int i = 1; i <= N; i++) printf("%d ", tp[i]); printf("\n"); } void Qsort() { for(int i=0;i<=M;i++) tax[i]=0; for(int i=1;i<=N;i++) tax[rak[i]]++; for(int i=1;i<=M;i++) tax[i] += tax[i-1]; for(int i=N;i>=1;i--) sa[tax[rak[tp[i]]]--]=tp[i]; } void SuffixSort() { M=750; for(int i=1;i<=N;i++) { rak[i]=s[i]; tp[i]=i; } Qsort(); //Debug(); for(int w=1,p=0;p<N;M=p,w<<=1) { p=0; for(int i=1;i<=w;i++) { tp[++p]=N-w+i; } for(int i=1;i<=N;i++) { if(sa[i]>w) { tp[++p]=sa[i]-w; } } Qsort(); for(int i=1;i<=N;i++) { swap(tp[i],rak[i]); } rak[sa[1]]=p=1; for (int i = 2; i <= N; i++) rak[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p; //cout<<p<<' '<<N<<endl; //Debug(); } } void GetHeight() { int j, k = 0; int num=0; for(int i = 1; i <= N; i++) { if(k) k--; int j = sa[rak[i] - 1]; while(i+k<=N&&j+k<=N&&s[j + k] == s[i + k]) k++; Height[rak[i]] = k; //printf("%d\n", k); } } int n,k; int H[MAX_N]; bool check(int mid) { int num=1; int mn=sa[1],mx=sa[1]; for(int i=2;i<=n;i++) { if(Height[i]>=mid) { num++; mn=min(mn,sa[i]); mx=max(mx,sa[i]); if(mx-mn>mid) { return true; } } else { mn=mx=sa[i]; num=1; } } return false; } int main() { while(~scanf("%d",&n)&&n) { for(int i=1;i<=n;i++) { scanf("%d",&s[i]); } for(int i=n;i>=1;i--) { s[i]=s[i]-s[i-1]; s[i]+=100; } N=n; SuffixSort(); GetHeight(); /*for(int i=1;i<=n;i++) { cout<<i<<"-----"<<Height[i]<<endl; }*/ int l=0; int r=N; int mid=(l+r)>>1; while(l<=r) { if(check(mid)) { l=mid+1; } else { r=mid-1; } mid=(l+r)>>1; //cout<<mid<<endl; } if(mid>=4) { mid++; cout<<mid<<endl; } else { cout<<0<<endl; } } //system("pause"); }
Milk Patterns
题意:给定一串数字,求出现k次的最长子串长度
思路:和上一题似乎几乎一样,同样是二分长度,在check的时候只要存在连续k个height的最小值大于等于mid即符合条件。
#include<iostream> #include<cstdio> #include<cstring> #include<cmath> #define fi first #define se second #define pb push_back #define me(a, b) memset(a, b, sizeof(a)) #define INIT() std::ios::sync_with_stdio(false) using namespace std; typedef long long ll; typedef unsigned long long ull; typedef pair<ll, ll> P; const int MAX_N = 1000000 + 5; const int MAX_M = 100000 + 5; const int INF = 0x7fffffff; const int inf = 1000000000; const double EPS = 1e-6; const ull base = 123; const ll mod = 1e4 + 7; const double pi = 4 * atan(1.0); int s[MAX_N]; int N,M,rak[MAX_N],sa[MAX_N],tax[MAX_N],tp[MAX_N]; int Height[MAX_N]; void Debug() { printf("*****************\n"); printf("下标"); for (int i = 1; i <= N; i++) printf("%d ", i); printf("\n"); printf("sa "); for (int i = 1; i <= N; i++) printf("%d ", sa[i]); printf("\n"); printf("rak "); for (int i = 1; i <= N; i++) printf("%d ", rak[i]); printf("\n"); printf("tp "); for (int i = 1; i <= N; i++) printf("%d ", tp[i]); printf("\n"); } void Qsort() { for(int i=0;i<=M;i++) tax[i]=0; for(int i=1;i<=N;i++) tax[rak[i]]++; for(int i=1;i<=M;i++) tax[i] += tax[i-1]; for(int i=N;i>=1;i--) sa[tax[rak[tp[i]]]--]=tp[i]; } void SuffixSort() { M=75; for(int i=1;i<=N;i++) { rak[i]=s[i]; tp[i]=i; } Qsort(); //Debug(); for(int w=1,p=0;p<N;M=p,w<<=1) { p=0; for(int i=1;i<=w;i++) { tp[++p]=N-w+i; } for(int i=1;i<=N;i++) { if(sa[i]>w) { tp[++p]=sa[i]-w; } } Qsort(); for(int i=1;i<=N;i++) { swap(tp[i],rak[i]); } rak[sa[1]]=p=1; for (int i = 2; i <= N; i++) rak[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p; //cout<<p<<' '<<N<<endl; //Debug(); } } void GetHeight() { int j, k = 0; for(int i = 1; i <= N; i++) { if(k) k--; int j = sa[rak[i] - 1]; while(i+k<=N&&j+k<=N&&s[i + k] == s[j + k]) k++; Height[rak[i]] = k; //printf("%d\n", k); } } int n,k; int H[MAX_N]; int check(int mid) { int num=1; for(int i=1;i<=N;i++) { if(Height[i]>=mid) { num++; if(num>=k) { return 1; } } else { num=1; } } return 0; } int main() { scanf("%d%d",&n,&k); for(int i=1;i<=n;i++) { scanf("%d",&s[i]); } N=n; SuffixSort(); GetHeight(); /*for(int i=1;i<=N;i++) { H[i]=Height[rak[i]]; }*/ int mx=0; int pos=1; int num=1; int l=0; int r=MAX_N; int mid=(l+r)>>1; while(l<=r) { if(check(mid)) { l=mid+1; } else r=mid-1; mid=(l+r)>>1; } cout<<mid<<endl; //system("pause"); }
Distinct Substrings
题意:给一字符串,找出其中不同子串的数量。
思路:首先一个n长度的字符串的子串总数是(1+n)*n/2,然后去掉其中相同的子串个数就是答案了。那么只需要减掉height[1...n]的和就行了。
#include<iostream> #include<cstdio> #include<cstring> #include<cmath> #define fi first #define se second #define pb push_back #define me(a, b) memset(a, b, sizeof(a)) #define INIT() std::ios::sync_with_stdio(false) using namespace std; typedef long long ll; typedef unsigned long long ull; typedef pair<ll, ll> P; const int MAX_N = 1000000 + 5; const int MAX_M = 100000 + 5; const int INF = 0x7fffffff; const int inf = 1000000000; const double EPS = 1e-6; const ull base = 123; const ll mod = 1e4 + 7; const double pi = 4 * atan(1.0); char s[MAX_N]; int N,M,rak[MAX_N],sa[MAX_N],tax[MAX_N],tp[MAX_N]; int Height[MAX_N]; void Debug() { printf("*****************\n"); printf("下标"); for (int i = 1; i <= N; i++) printf("%d ", i); printf("\n"); printf("sa "); for (int i = 1; i <= N; i++) printf("%d ", sa[i]); printf("\n"); printf("rak "); for (int i = 1; i <= N; i++) printf("%d ", rak[i]); printf("\n"); printf("tp "); for (int i = 1; i <= N; i++) printf("%d ", tp[i]); printf("\n"); } void Qsort() { for(int i=0;i<=M;i++) tax[i]=0; for(int i=1;i<=N;i++) tax[rak[i]]++; for(int i=1;i<=M;i++) tax[i] += tax[i-1]; for(int i=N;i>=1;i--) sa[tax[rak[tp[i]]]--]=tp[i]; } void SuffixSort() { M=750; for(int i=1;i<=N;i++) { rak[i]=s[i]; tp[i]=i; } Qsort(); //Debug(); for(int w=1,p=0;p<N;M=p,w<<=1) { p=0; for(int i=1;i<=w;i++) { tp[++p]=N-w+i; } for(int i=1;i<=N;i++) { if(sa[i]>w) { tp[++p]=sa[i]-w; } } Qsort(); for(int i=1;i<=N;i++) { swap(tp[i],rak[i]); } rak[sa[1]]=p=1; for (int i = 2; i <= N; i++) rak[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p; //cout<<p<<' '<<N<<endl; //Debug(); } } void GetHeight() { int j, k = 0; int num=0; for(int i = 1; i <= N; i++) { if(k) k--; int j = sa[rak[i] - 1]; while(i+k<=N&&j+k<=N&&s[j + k] == s[i + k]) k++; Height[rak[i]] = k; //printf("%d\n", k); } } int n,k; int H[MAX_N]; bool check(int mid) { int num=1; int mn=sa[1],mx=sa[1]; for(int i=2;i<=n;i++) { if(Height[i]>=mid) { num++; mn=min(mn,sa[i]); mx=max(mx,sa[i]); if(mx-mn>mid) { return true; } } else { mn=mx=sa[i]; num=1; } } return false; } int main() { int t; cin>>t; while(t--) { scanf("%s",s+1); N=strlen(s+1); SuffixSort(); GetHeight(); int ans=(N+1)*N/2; for(int i=2;i<=N;i++) { //cout<<Height[i]<<endl; ans-=Height[i]; } cout<<ans<<endl; } system("pause"); }
New Distinct Substrings
题意:同上。
思路:唯一的区别是数据范围比刚才大了,开long long即可。
#include<iostream> #include<cstdio> #include<cstring> #include<cmath> #define fi first #define se second #define pb push_back #define me(a, b) memset(a, b, sizeof(a)) #define INIT() std::ios::sync_with_stdio(false) using namespace std; typedef long long ll; typedef unsigned long long ull; typedef pair<ll, ll> P; const int MAX_N = 1000000 + 5; const int MAX_M = 100000 + 5; const int INF = 0x7fffffff; const int inf = 1000000000; const double EPS = 1e-6; const ull base = 123; const ll mod = 1e4 + 7; const double pi = 4 * atan(1.0); char s[MAX_N]; int N,M,rak[MAX_N],sa[MAX_N],tax[MAX_N],tp[MAX_N]; int Height[MAX_N]; void Debug() { printf("*****************\n"); printf("下标"); for (int i = 1; i <= N; i++) printf("%d ", i); printf("\n"); printf("sa "); for (int i = 1; i <= N; i++) printf("%d ", sa[i]); printf("\n"); printf("rak "); for (int i = 1; i <= N; i++) printf("%d ", rak[i]); printf("\n"); printf("tp "); for (int i = 1; i <= N; i++) printf("%d ", tp[i]); printf("\n"); } void Qsort() { for(int i=0;i<=M;i++) tax[i]=0; for(int i=1;i<=N;i++) tax[rak[i]]++; for(int i=1;i<=M;i++) tax[i] += tax[i-1]; for(int i=N;i>=1;i--) sa[tax[rak[tp[i]]]--]=tp[i]; } void SuffixSort() { M=750; for(int i=1;i<=N;i++) { rak[i]=s[i]; tp[i]=i; } Qsort(); //Debug(); for(int w=1,p=0;p<N;M=p,w<<=1) { p=0; for(int i=1;i<=w;i++) { tp[++p]=N-w+i; } for(int i=1;i<=N;i++) { if(sa[i]>w) { tp[++p]=sa[i]-w; } } Qsort(); for(int i=1;i<=N;i++) { swap(tp[i],rak[i]); } rak[sa[1]]=p=1; for (int i = 2; i <= N; i++) rak[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p; //cout<<p<<' '<<N<<endl; //Debug(); } } void GetHeight() { int j, k = 0; int num=0; for(int i = 1; i <= N; i++) { if(k) k--; int j = sa[rak[i] - 1]; while(i+k<=N&&j+k<=N&&s[j + k] == s[i + k]) k++; Height[rak[i]] = k; //printf("%d\n", k); } } int n,k; int H[MAX_N]; bool check(int mid) { int num=1; int mn=sa[1],mx=sa[1]; for(int i=2;i<=n;i++) { if(Height[i]>=mid) { num++; mn=min(mn,sa[i]); mx=max(mx,sa[i]); if(mx-mn>mid) { return true; } } else { mn=mx=sa[i]; num=1; } } return false; } int main() { int t; cin>>t; while(t--) { scanf("%s",s+1); N=strlen(s+1); SuffixSort(); GetHeight(); ll ans=(ll)(N+1)*N/2; for(int i=2;i<=N;i++) { //cout<<Height[i]<<endl; ans-=(ll)Height[i]; } cout<<ans<<endl; } //system("pause"); }
Repeats
题意:给定一个字符串,令k为一子串中连续出现的公共子串个数,求k的最大值。
思路:出现了,论文题,直接上论文截图吧
所以就是枚举长度和起点,然后通过rmq计算区间height最小值求出向后匹配的距离,然后再判断下向前是否能再匹配一个。
#include <iostream> #include <cstdio> #include <cstring> #include <algorithm> #include <cmath> #include <vector> #define fi first #define se second #define pb push_back #define me(a, b) memset(a, b, sizeof(a)) #define INIT() std::ios::sync_with_stdio(false) using namespace std; typedef long long ll; typedef unsigned long long ull; typedef pair<ll, ll> P; const int MAX_N = 50000 + 5; const int MAX_M = 100000 + 5; const int INF = 0x7fffffff; const int inf = 1000000000; const double EPS = 1e-6; const ull base = 123; const ll mod = 1e4 + 7; const double pi = 4 * atan(1.0); char s[MAX_N]; int N, M, rak[MAX_N], sa[MAX_N], tax[MAX_N], tp[MAX_N]; int Height[MAX_N]; void Qsort() { for(int i=0;i<=M;i++) tax[i]=0; for(int i=1;i<=N;i++) tax[rak[i]]++; for(int i=1;i<=M;i++) tax[i]+=tax[i-1]; for(int i=N;i>=1;i--) sa[tax[rak[tp[i]]]--]=tp[i]; } void SuffixSort() { M = 255; for (int i = 1; i <= N; i++) { rak[i] = s[i] - '0' + 1; tp[i] = i; } Qsort(); for (int w = 1, p = 0; p < N; M = p, w <<= 1) { p = 0; for (int i = 1; i <= w; i++) tp[++p] = N - w + i; for (int i = 1; i <= N; i++) if (sa[i] > w) tp[++p] = sa[i] - w; Qsort(); for (int i = 1; i <= N; i++) { swap(tp[i], rak[i]); } rak[sa[1]] = p = 1; for(int i=2;i<=N;i++) { rak[sa[i]]=(tp[sa[i-1]]==tp[sa[i]]&&tp[sa[i-1]+w]==tp[sa[i]+w])?p:++p; } } } void GetHeight() { int j,k=0; for(int i=1;i<=N;i++) { if(k) k--; int j=sa[rak[i]-1]; while(s[i+k]==s[j+k]) k++; Height[rak[i]]=k; //cout<<k<<endl; } } int dp[MAX_N*2][33]; void RMQ_init() { for(int i=1;i<=N;i++) dp[i][0]=Height[i]; for(int j=1;(1<<j)<=N;j++) { for(int i=1;i+(1<<j)-1<=N;i++) { dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]); } } } int query(int l,int r) { int ll=rak[l]; int rr=rak[r]; if(ll>=rr) { swap(ll,rr); } ll++; int k=log2(rr-ll+1); int len=1<<k; return min(dp[ll][k],dp[rr-len+1][k]); } int ans=0; int main() { int t; cin>>t; while(t--) { int n; char c; scanf("%d",&n); for(int i=1;i<=n;i++) { scanf("%s",s+i); } N=n; SuffixSort(); //cout<<1<<endl; GetHeight(); //cout<<2<<endl; RMQ_init(); //cout<<3<<endl; int mx=0; for(int i=1;i<=n;i++) { for(int j=1;j+i<=n;j+=i) { int p=query(j,j+i); int k=j-(i-p%i); int ans=p/i+1; if(k>=0&&query(k,k+i)>=i) { ans++; } //cout<<ans<<endl; mx=max(mx,ans); } } printf("%d\n",mx); } //system("pause"); }
Maximum repetition substring
题意:跟上题是一样的,只是要求输出那个子串了,还要求字典序,这个标记 位置,然后用rak数组比较即可。
#include <iostream> #include <cstdio> #include <cstring> #include <algorithm> #include <cmath> #include <vector> #define fi first #define se second #define pb push_back #define me(a, b) memset(a, b, sizeof(a)) #define INIT() std::ios::sync_with_stdio(false) using namespace std; typedef long long ll; typedef unsigned long long ull; typedef pair<ll, ll> P; const int MAX_N = 100000 + 5; const int MAX_M = 100000 + 5; const int INF = 0x7fffffff; const int inf = 1000000000; const double EPS = 1e-6; const ull base = 123; const ll mod = 1e4 + 7; const double pi = 4 * atan(1.0); char s[MAX_N]; int N, M, rak[MAX_N], sa[MAX_N], tax[MAX_N], tp[MAX_N]; int Height[MAX_N]; void Qsort() { for(int i=0;i<=M;i++) tax[i]=0; for(int i=1;i<=N;i++) tax[rak[i]]++; for(int i=1;i<=M;i++) tax[i]+=tax[i-1]; for(int i=N;i>=1;i--) sa[tax[rak[tp[i]]]--]=tp[i]; } void SuffixSort() { M = 255; for (int i = 1; i <= N; i++) { rak[i] = s[i] - '0' + 1; tp[i] = i; } Qsort(); for (int w = 1, p = 0; p < N; M = p, w <<= 1) { p = 0; for (int i = 1; i <= w; i++) tp[++p] = N - w + i; for (int i = 1; i <= N; i++) if (sa[i] > w) tp[++p] = sa[i] - w; Qsort(); for (int i = 1; i <= N; i++) { swap(tp[i], rak[i]); } rak[sa[1]] = p = 1; for(int i=2;i<=N;i++) { rak[sa[i]]=(tp[sa[i-1]]==tp[sa[i]]&&tp[sa[i-1]+w]==tp[sa[i]+w])?p:++p; } } } void GetHeight() { int j,k=0; for(int i=1;i<=N;i++) { if(k) k--; int j=sa[rak[i]-1]; while(s[i+k]==s[j+k]) k++; Height[rak[i]]=k; //cout<<k<<endl; } } int dp[MAX_N*2][33]; void RMQ_init() { for(int i=1;i<=N;i++) dp[i][0]=Height[i]; for(int j=1;(1<<j)<=N;j++) { for(int i=1;i+(1<<j)-1<=N;i++) { dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]); } } } int query(int l,int r) { int ll=rak[l]; int rr=rak[r]; if(ll>=rr) { swap(ll,rr); } ll++; int k=log2(rr-ll+1); int len=1<<k; return min(dp[ll][k],dp[rr-len+1][k]); } int ans=0; int main() { int T=1; while(~scanf("%s",s+1)&&s[1]!='#') { N=strlen(s+1); SuffixSort(); //cout<<1<<endl; GetHeight(); //cout<<2<<endl; RMQ_init(); //cout<<3<<endl; int mx=0; int pos=1; int len=1; for(int i=1;i<=N;i++) { for(int j=1;j+i<=N;j+=i) { int p=query(j,j+i); int k=j-(i-p%i); int ans=p/i+1; int q=j; int cnt=0; for(int u=j-1;u>j-i&&s[u]==s[u+i]&&u>=0;u--) { cnt++; if(cnt==(i-p%i)) { ans++; q=u; } else if(rak[q]>rak[u]) { q=u; } } //cout<<ans<<endl; if(ans>mx) { mx=ans; pos=q; len=i*ans; } else if(ans==mx) { if(rak[pos]>rak[q]) { //cout<<j<<' '<<u<<' '<<ans<<' '<<i<<endl; pos=q; len=i*ans; } } } } //cout<<pos<<' '<<len<<endl; printf("Case %d: ",T++); if(len==1) { char m=s[1]; for(int i=1;i<=N;i++) { //cout<<m<<endl; m=min(m,s[i]); } cout<<m<<endl; }else{ for(int i=pos;i<pos+len;i++) { cout<<s[i]; } cout<<endl;} } //system("pause"); }
Long Long Message
题意:求两个字符串的最长公共字串
思路:直接用上面问题二的结论,接在一起,然后暴力算都行。
#include <iostream> #include <cstdio> #include <cstring> #include <algorithm> #include <cmath> #include <map> #include <vector> #define fi first #define se second #define pb push_back #define me(a, b) memset(a, b, sizeof(a)) #define INIT() std::ios::sync_with_stdio(false) using namespace std; typedef long long ll; typedef unsigned long long ull; typedef pair<ll, ll> P; const int MAX_N = 200000 + 5; const int MAX_M = 100000 + 5; const int INF = 0x7fffffff; const int inf = 1000000000; const double EPS = 1e-6; const ull base = 123; const ll mod = 1e4 + 7; const double pi = 4 * atan(1.0); char s[MAX_N]; int rak[MAX_N]; int tp[MAX_N]; int sa[MAX_N]; int tax[MAX_N]; int N; int M; int height[MAX_N]; void Sort() { for(int i=0;i<=M;i++) tax[i]=0; for(int i=1;i<=N;i++) tax[rak[i]]++; for(int i=1;i<=M;i++) tax[i]+=tax[i-1]; for(int i=N;i>=1;i--) sa[tax[rak[tp[i]]]--]=tp[i]; } void SuffixSort() { M=30; for(int i=1;i<=N;i++) { rak[i]=s[i]-'a'+1; tp[i]=i; } Sort(); for(int w=1,p=0;p<N;M=p,w<<=1) { p=0; for(int i=1;i<=w;i++) { tp[++p]=N-w+i; } for(int i=1;i<=N;i++) { if(sa[i]>w) tp[++p]=sa[i]-w; } Sort(); for(int i=1;i<=N;i++) { tp[i]=rak[i]; } rak[sa[1]]=p=1; for(int i=2;i<=N;i++) { rak[sa[i]]=(tp[sa[i-1]]==tp[sa[i]]&&tp[sa[i-1]+w]==tp[sa[i]+w])?p:++p; } } } void GetHeight() { int k=0; for(int i=1;i<=N;i++) { if(k) k--; int j=sa[rak[i]-1]; while(i+k<=N&&j+k<=N&&s[i+k]==s[j+k]) k++; height[rak[i]]=k; } } char s1[MAX_N]; char s2[MAX_N]; int main() { scanf("%s",s1+1); scanf("%s",s2+1); int len1=strlen(s1+1); int len2=strlen(s2+1); strcpy(s1+len1+1,s2+1); strcpy(s+1,s1+1); N=len1+len2; SuffixSort(); GetHeight(); int ans=0; //cout<<len1<<' '<<len2<<endl; for(int i=len1+1;i<=N;i++) { int j=rak[i]-1; //cout<<sa[j]<<endl; if(sa[j]<=len1) { ans=max(ans,min(len1-sa[j]+1,height[rak[i]])); //cout<<len1-sa[j]+1<<' '<<height[rak[i]]<<endl; } } for(int i=1;i<=len1;i++) { int j=rak[i]-1; if(sa[j]>len1) { ans=max(ans,height[rak[i]]); } } cout<<ans<<endl; //system("pause"); }
Substrings
题意:给多个字符串,求在所有字符串中都出现的最长子串长度。
思路:首先,自然是把所有字符串连起来,中间插上从未出现过的字符,然而,n最大到一百,也就是最多要插99个不一样的字符!,在以身试发后发现,当ascll码大到一定程度后就会是汉字,而汉字在转回ascll码时居然会变成负数,在re了无数回后,无奈将字符串改成整形,插数字。然后就是喜闻乐见的二分长度,check就是寻找连续的height超过mid并且里面包含了来自每个字符串的子串即可。
#include <iostream> #include <cstdio> #include <cstring> #include <cmath> #include <algorithm> #include <queue> #include <stack> #include <string> #include <map> #include <vector> #define me(a, b) memset(a, b, sizeof(a)) #define pb push_back using namespace std; typedef long long ll; const ll MAX_N = 100000 + 5; const ll MAX_M = 100000 + 5; const ll INF = 0x7fffffff; const ll inf = 1000000000; const ll mod = 1e9 + 7; const double eps = 1e-6; int sa[MAX_N], tp[MAX_N], rak[MAX_N], tax[MAX_N]; int s[MAX_N]; int N, M; void Sort() { for (int i = 0; i <= M; i++) tax[i] = 0; for (int i = 1; i <= N; i++) tax[rak[i]]++; for (int i = 1; i <= M; i++) tax[i] += tax[i - 1]; for (int i = N; i >= 1; i--) sa[tax[rak[tp[i]]]--] = tp[i]; } void SuffixSort() { M = 1000; for (int i = 1; i <= N; i++) { rak[i] = s[i]; tp[i] = i; } Sort(); for (int w = 1, p = 0; p < N; w <<= 1, M = p) { p = 0; for (int i = 1; i <= w; i++) { tp[++p] = N - w + i; } for (int i = 1; i <= N; i++) { if (sa[i] > w) tp[++p] = sa[i] - w; } Sort(); for (int i = 1; i <= N; i++) { tp[i] = rak[i]; } rak[sa[1]] = p = 1; for (int i = 2; i <= N; i++) { rak[sa[i]] = (sa[i] + w <= N && sa[i - 1] + w <= N && tp[sa[i]] == tp[sa[i - 1]] && tp[sa[i] + w] == tp[sa[i - 1] + w]) ? p : ++p; } } } int height[MAX_N]; int vis[210]; void GetHeight() { int k = 0; for (int i = 1; i <= N; i++) { if (k) k--; int j = sa[rak[i] - 1]; while (i + k <= N && j + k <= N && s[i + k] == s[j + k]) k++; height[rak[i]] = k; //cout<<height[rak[i]]<<endl; } } int le[MAX_N]; int n; bool check(int mid) { //cout<<n<<endl; me(vis, 0); int sum = 0; for (int i = 1; i <= N; i++) { // cout << mid << "-----" << i << ' ' << sa[i] << ' ' << sa[i - 1] << ' ' << sum << endl; if (height[i] >= mid) { if (le[sa[i]] >= 1 && le[sa[i]] <= n && vis[le[sa[i]]] == 0) { vis[le[sa[i]]] = 1; sum++; } if (le[sa[i - 1]] >= 1 && le[sa[i - 1]] <= n && vis[le[sa[i - 1]]] == 0) { vis[le[sa[i - 1]]] = 1; sum++; } if (le[sa[i]] >= 1 && le[sa[i]] > n && vis[le[sa[i]] - n] == 0) { vis[le[sa[i]] - n] = 1; sum++; } if (le[sa[i - 1]] >= 1 && le[sa[i - 1]] > n && vis[le[sa[i - 1]] - n] == 0) { vis[le[sa[i - 1]] - n] = 1; sum++; } } else { if (sum >= n) { // cout<<n<<endl; return true; } me(vis, 0); sum = 0; } } if (sum >= n) { // cout<<n<<endl; return true; } return false; } char c[105][105]; int main() { int t; cin >> t; while (t--) { scanf("%d", &n); for (int i = 1; i <= n; i++) { scanf("%s", c[i] + 1); } if(n==1) { printf("%d\n",(int)strlen(c[1]+1)); continue; } int op = 300; int pos = 1; for (int i = 1; i <= n; i++) { int len = strlen(c[i] + 1); for (int j = 1; j <= len; j++) { le[pos] = i; s[pos++] = c[i][j]; } s[pos++] = op++; } for (int i = 1; i <= n; i++) { int len = strlen(c[i] + 1); for (int j = len; j >= 1; j--) { le[pos] = n + i; s[pos++] = c[i][j]; } s[pos++] = op++; } pos -= 2; N = pos; SuffixSort(); GetHeight(); int l = 0; int r = 100; int mid = (l + r) >> 1; //cout<<n<<endl; while (l <= r) { if (check(mid)) { l = mid + 1; } else { r = mid - 1; } mid = (l + r) >> 1; //cout<<mid<<endl; } if (mid < 0) mid = 0; cout << mid << endl; } //system("pause"); }
Checking the Text
题意:给一个字符串,每次可以做插入和查询操作,插入是在指定位置插字符串,查询是查询两指定下标开头的字符串的匹配长度。
思路:这题我能想的用后缀数组的方法就是每次插入的时候再重新排一次序,然后查询就可以O(1)查询height,但是复杂度显然还是过不去,所以后缀数组怎么做我不会!所以我就用哈希了嘿嘿,直接暴力过。
#include <iostream> #include <cstdio> #include <cstring> #include <cmath> #include <algorithm> #include <queue> #include <stack> #include <string> #include <map> #include <vector> #define me(a, b) memset(a, b, sizeof(a)) #define pb push_back using namespace std; typedef long long ll; typedef unsigned long long ull; const ll MAX_N = 1000000 + 5; const ll MAX_M = 100000 + 5; const ll INF = 0x7fffffff; const ll inf = 1000000000; const ll mod = 1e9 + 7; const double eps = 1e-6; const ull base = 123; ull po[MAX_N]; ull has[MAX_N]; char s[MAX_N]; int p[MAX_N]; int len; int query(int x,int y) { int l=0; int r=len-max(x,y)+1; int mid=(l+r)>>1; while(l<=r) { if((has[x+mid-1]-has[x-1]*po[mid]%mod+mod)%mod==(has[y+mid-1]-has[y-1]*po[mid]%mod+mod)%mod) { l=mid+1; } else { r=mid-1; } mid=(l+r)>>1; } return mid; } int main() { scanf("%s", s + 1); len = strlen(s + 1); po[0] = 1; for (int i = 1; i <= 2 * len; i++) { po[i] = po[i - 1] * base % mod; } for (int i = 1; i <= len; i++) { has[i] = (has[i - 1] * base + s[i]) % mod; } //cout<<1<<endl; int n; cin >> n; for (int i = 1; i <= len; i++) { p[i] = i; } int m=len; while (n--) { char op[3]; scanf("%s", op); if (op[0] == 'I') { int x; scanf("%s%d", op, &x); if (x > len) x = len + 1; for (int i = len; i >= x; i--) { s[i + 1] = s[i]; } s[x] = op[0]; for(int i=m;i>=1;i--) { if(p[i]>=x) { p[i]++; } else { break; } } len++; for (int i = x; i <= len; i++) { has[i] = (has[i - 1] * base + s[i]) % mod; } } else { int l,r; scanf("%d%d",&l,&r); cout<<query(p[l],p[r])<<endl; } } //system("pause"); }
kuangbin题单里剩下些题思路都差不多,当然有些搞人心态的细节问题,仔细点就好,后缀数组就暂时告一段落,后缀自动机冲冲冲!
希望若干天后能看到我更后缀自动机。