题解 | #kmp算法#

kmp算法

http://www.nowcoder.com/practice/bb1615c381cc4237919d1aa448083bcc

常规kmp

题目：给你一个非空模板串S，一个文本串T，问T在S中完全匹配的起始下标(从0开始)

分析：

利用match串的next数组加速匹配过程
- next[i]：表示match[0...i-1]位置上前缀和后缀最长匹配长度
- 规定：next[0]=-1.next[1]=0,遍历指针从i=2开始

public static int[] getNextArray(char[] match) {
    // 长度为1的数组,前面没有元素,规定next值为-1
    if (match.length == 1) {
        return new int[]{-1};
    }
    int[] next = new int[match.length];
    // 第一个位置前面没有元素，规定为-1
    next[0] = -1;
    // 第二个位置前面只有0位置的元素,由于任何子串的后缀不能包括第一个字符，规定为0
    next[1] = 0;
    int i = 2;// 遍历指针从第3个元素开始
    // cn两个含义:
    // 1.拿哪个位置的字符跟i-1比,由于i初始化2,i-1是1,所以cn=0
    // 2.i位置的前缀和后缀最大匹配长度=最大匹配下标+1的值
    int cn = 0;
    while (i < next.length) {
        if (match[i - 1] == match[cn]) {// i-1位置和cn相同，next[i]=cn+1,cn和i后移再匹配
            next[i++] = ++cn;
        } else if (cn > 0) {// i-1位置和cn位置不相同：有两种情况
            // 情况1：cn一直往前跳,但cn必须大于数组0位置
            cn = next[cn];
        } else {//  情况2：cn一直往前跳来到0位置，说明match[i]无前后缀匹配，next[i++]=0
            next[i++] = 0;
        }
    }
    return next;
}

kmp：返回match串在s1中完整匹配的起始下标

public class KMP {
    // KMP:字符串s和m，s是否包含m，如果包含返回m在s中开始的位置?再结合NC149_kmp算法做比较
    // 要求整体时间复杂度O(N),N是s1的长度，M是s2的长度
    public static int getIndexOf(String s, String m) {
        if (s == null || m == null || m.length() < 1 || s.length() < m.length()) {
            return -1;
        }
        char[] str1 = s.toCharArray();
        char[] str2 = m.toCharArray();
        int i1 = 0;
        int i2 = 0;
        // 获得待匹配str2的next数组
        int[] next = getNextArray(str2);
        while (i1 < str1.length && i2 < str2.length) {
            if (str1[i1] == str2[i2]) {// i1与i2都匹配成功，与暴力法一样都后移指针
                i1++;
                i2++;
            } else if (next[i2] == -1) {// m串的遍历指针来到next[i2]=-1的位置，表示无法加速匹配，只能暴力移动i1
                i1++;
            } else {// 只要不是-1,i2就加速移动到最长后缀下一个位置,由于数组下标是0开始,所以i2移动到next[i2]下标
                i2 = next[i2];
            }
        }
        // while结束,i1或者i2越界
        // s1:abab,i1越界,匹配失败
        // s2: bab,i2越界,代表匹配完成(i1,i2是同时移动的),匹配的起始位置=i1-i2
        return i2 == str2.length ? i1 - i2 : -1;
    }

    // 获得match串的next数组
    public static int[] getNextArray(char[] match) {
        // 长度为1的数组,前面没有元素,规定next值为-1
        if (match.length == 1) {
            return new int[]{-1};
        }
        int[] next = new int[match.length];
        // 第一个位置前面没有元素，规定为-1
        next[0] = -1;
        // 第二个位置前面只有0位置的元素,由于任何子串的后缀不能包括第一个字符，规定为0
        next[1] = 0;
        int i = 2;// 遍历指针从第3个元素开始
        // cn两个含义:
        // 1.拿哪个位置的字符跟i-1比,由于i初始化2,i-1是1,所以cn=0
        // 2.i位置的前缀和后缀最大匹配长度=最大匹配下标+1的值
        int cn = 0;
        while (i < next.length) {
            if (match[i - 1] == match[cn]) {// i-1位置和cn相同，next[i]=cn+1,cn和i后移再匹配
                next[i++] = ++cn;
            } else if (cn > 0) {// i-1位置和cn位置不相同：有两种情况
                // 情况1：cn一直往前跳,但cn必须大于数组0位置
                cn = next[cn];
            } else {//  情况2：cn一直往前跳来到0位置，说明match[i]无前后缀匹配，next[i++]=0
                next[i++] = 0;
            }
        }
        return next;
    }

    public static void main(String[] args) {
        String str = "abcabcababaccc";
        String match = "ababa";
        // next = [-1, 0, 0, 1, 2]
        // System.out.println(Arrays.toString(getNextArray(match.toCharArray())));
        System.out.println(getIndexOf(str, match));// 返回完全开始匹配的索引
    }
}

牛客NC149

题目：给你一个非空模板串S，一个文本串T，问S在T中出现了多少次

示例：

输入：
"ababab","abababab"
返回值：
2

分析：

由于是问返回S在T中出现了多少次？常规Kmp第一次匹配就返回下标，所以要改造next数组和接口逻辑
改造常规的next

// 获得match串的改造next数组，比常规next多一位数
private int[] getNext(char[] match) {
    if (match.length == 1) {
        return new int[]{-1};
    }
    // next改造1：next多算一个,多出的末尾数组记录整个ms的最长前后缀长度
    int[] next = new int[match.length + 1];
    next[0] = -1;
    next[1] = 0;
    int i = 2;
    int cn = 0;
    // next改造2：i<m.len改成i<=m.len
    while (i <= match.length) {
        if (match[i - 1] == match[cn]) {
            next[i++] = ++cn;
        } else if (cn > 0) {
            cn = next[cn];
        } else {
            next[i++] = 0;
        }
    }
    return next;
}

kmp：
- 改造了next数组，当i2越界,代表匹配上了,res+1；并且i2 = next[i2] 回退到整个s2的最长后缀位置开始二轮匹配

public class Solution {
    // 牛客149kmp:给你一个非空模板串S，一个文本串T，问S在T中出现了多少次
    public int kmp(String S, String T) {
        if (T == null || S == null || S.length() < 1 || T.length() < S.length()) {
            return 0;
        }
        char[] s1 = T.toCharArray();
        char[] s2 = S.toCharArray();
        int i1 = 0;
        int i2 = 0;
        int[] next = getNext(s2);
        int res = 0;
        while (i1 < s1.length && i2 < s2.length) {
            if (s1[i1] == s2[i2]) {
                i1++;
                i2++;
            } else if (next[i2] == -1) {
                i1++;
            } else {
                i2 = next[i2];
            }
            // 以上是常规kmp算法,后续是改造了3点
            // 改造1:i2越界,代表匹配上了,结果+1
            // getNext中改造2:next多算一个,多出的末尾数组记录整个s2的最长前后缀长度
            if (i2 == s2.length) {
                res++;
                // i2再回退到整个s2的最长后缀位置开始二轮匹配
                i2 = next[i2];
            }
        }
        // 改造2:返回个数
        return res;
    }

    // 获得match串的改造next数组，比常规next多一位数
    private int[] getNext(char[] match) {
        if (match.length == 1) {
            return new int[]{-1};
        }
        // next改造1：next多算一个,多出的末尾数组记录整个ms的最长前后缀长度
        int[] next = new int[match.length + 1];
        next[0] = -1;
        next[1] = 0;
        int i = 2;
        int cn = 0;
        // next改造2：i<m.len改成i<=m.len
        while (i <= match.length) {
            if (match[i - 1] == match[cn]) {
                next[i++] = ++cn;
            } else if (cn > 0) {
                cn = next[cn];
            } else {
                next[i++] = 0;
            }
        }
        return next;
    }

    public static void main(String[] args) {
        String S = "abab";
        String T = "abacabab";
        // 返回match子串在str中匹配的起始位置
        Solution solution = new Solution();
        System.out.println(solution.kmp(S, T));
    }
}