KMP

KMP专题(二)

Posted on

专题链接

KMP专题(一)
KMP专题(三)

HUST 1010 The Minimum Length

题意:
找最小循环节长度。

思路:
无。

#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <iterator>
#include <map>
#include <queue>
#include <stack>
#include <string>
#include <vector>

using namespace std;

#define ll long long
#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)
#define fill(num, ary) memset((ary), (num), sizeof((ary)))

const int maxn = 1e6 + 5;
int len, nxt[maxn];
char des[maxn];

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < len) {
        if (j == -1 || des[i] == des[j])
            nxt[++i] = ++j;
        else
            j = nxt[j];
    }
}

int main()
{
    while (scanf("%s", des) != EOF) {
        len = strlen(des);
        getNext();
        printf("%d\n", len - nxt[len]);
    }
    return 0;
}

POJ 2406 Power Strings

题意:
找最短循环节……

思路:
无。

#include <cstring>
#include <iostream>
#include <cstdio>
using namespace std;

#define ll long long
#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)
#define fill(num, ary) memset((ary), (num), sizeof((ary)))

const int maxn = 1e6 + 5;
const int inf = 0x3f3f3f3f;
int nxt[maxn];
char src[maxn], des[maxn];
int slen, tlen;

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < tlen)
        if (j == -1 || des[i] == des[j])
            nxt[++i] = ++j;
        else
            j = nxt[j];
}

int main()
{
    while (scanf("%s", des) != EOF) {
        if (des[0] == '.' && des[1] == '\0')
            break;
        tlen = strlen(des);
        getNext();
        int len = tlen - nxt[tlen];
        if (tlen % len == 0)
            printf("%d\n", tlen / len);
        else
            puts("1");
    }
    return 0;
}

POJ 2752 Seek the Name, Seek the Fame

题意:
计算所有 前缀和后缀相同的字符串。

思路:
有点意思。这道题需要对 next 数组更深一层的了解。

AC Code

#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <queue>

using namespace std;

#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)

typedef long long ll;

const int maxn = 4e5 + 10;
char str[maxn];
int nxt[maxn], ans[maxn];
int len, num;

void getNext()
{
    num = 0;
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < len)
        if (j == -1 || str[i] == str[j])
            nxt[++i] = ++j;
        else
            j = nxt[j];
}

int main()
{
    while (scanf("%s", str) != EOF) {
        len = strlen(str);
        getNext();
        while (len > 0) {
            ans[num++] = len;
            len = nxt[len];
        }
        rrange(i, 0, num - 1) printf("%d%c", ans[i], i ? ' ' : '\n');
    }
    return 0;
}

POJ 3080 Blue Jeans

题意:
给出多个字符串,找出最长的且字典序最小的公共连续子串。

思路:
数据很小,直接枚举第一个字符串的所有连续子串,再对其他 n-1 个字符串尝试匹配即可。

AC Code

#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <map>
#include <queue>
#include <string>

using namespace std;

#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)

const int maxn = 65;
char mat[15][maxn];
int nxt[maxn];

void getNext(const string& str)
{
    int len = str.length(), i = 0, j = -1;
    nxt[0] = -1;
    while (i < len)
        if (j == -1 || str[i] == str[j])
            nxt[++i] = ++j;
        else
            j = nxt[j];
}

bool kmpIndex(const char* src, const string& des)
{
    int i = 0, j = 0, len = des.length();
    getNext(des);
    while (i < 60 && j < len)
        if (j == -1 || src[i] == des[j])
            i++, j++;
        else
            j = nxt[j];
    return j == len;
}

int main()
{
    int T, n;
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        each(i, n) scanf("%s", mat[i]);
        string ans = "";
        range(l, 3, 60) range(s, 0, 60 - l)
        {
            string tmp = "";
            range(i, s, s + l - 1) tmp += mat[0][i];
            if (tmp.length() == ans.length() && tmp > ans)
                continue;
            bool flag = true;
            range(i, 1, n - 1) if (!kmpIndex(mat[i], tmp))
            {
                flag = false;
                break;
            }
            if (flag)
                ans = tmp;
        }
        if (ans.length() == 0)
            puts("no significant commonalities");
        else
            cout << ans << endl;
    }
    return 0;
}

HDU 2594 Simpsons’ Hidden Talents

题意:
给出两个字符串,找出最长的前缀与后缀相同的子串。

思路:
还是老思路,只要把两个字符串连接起来就可以了。但是有一个问题,就是匹配的时候可能会越界。
比如说 abab 与 ab 如果是直接连接起来,那么算法给出的结果将会是 ababab 。
一个行之有效的方法就是在中间加一个不会出现的字符,比如说 ' # ' ,' & ' ,加上以后只有当两个字符串相同的才会出现越界情况。

AC Code

#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <map>
#include <queue>
#include <string>

using namespace std;

#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)

const int maxn = 5e4 + 5;

char s[maxn], des[maxn * 2];
int nxt[maxn * 2], len;

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    len = strlen(des);
    while (i < len)
        if (j == -1 || des[i] == des[j])
            nxt[++i] = ++j;
        else
            j = nxt[j];
}

int main()
{
    while (scanf("%s", des) != EOF) {
        scanf("%s", s);
        strcat(des, "#");
        strcat(des, s);
        getNext();
        int ans = nxt[len];
        each(i, ans) putchar(des[i]);
        if (ans)
            putchar(' ');
        printf("%d\n", ans);
    }
    return 0;
}
KMP

KMP专题(一)

Posted on

前言

OK,刷专题终于刷到了自己不会的最小表示法,在学习最小表示法之前,稍微总结一下。

kmp多水题,而且因为挂在hdu和poj上,本身有些数据也非常水,所以打算开个专题来汇总一下这些水题。

KMP专题链接

题解

HDU 1711 Number Sequence

题意:
返回第一个数字序列匹配的位置。

思路:
KMP入门题。

AC Code

#include <cstring>
#include <iostream>
using namespace std;

#define ll long long
#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)
#define fill(num, ary) memset((ary), (num), sizeof((ary)))

const int maxn = 1e6 + 5;
const int inf = 0x3f3f3f3f;
int nxt[maxn];
int src[maxn], des[maxn];
int slen, tlen;

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < tlen)
        if (j == -1 || des[i] == des[j]) {
            if (des[++i] != des[++j])
                nxt[i] = j;
            else
                nxt[i] = nxt[j];
        } else
            j = nxt[j];
}

int KMPIndex()
{
    int i = 0, j = 0;
    getNext();
    while (i < slen && j < tlen)
        if (j == -1 || src[i] == des[j]) {
            i++;
            j++;
        } else
            j = nxt[j];
    if (j == tlen)
        return i - tlen + 1;
    else
        return -1;
}

int main()
{
    int T_T;
    scanf("%d", &T_T);
    while (T_T--) {
        scanf("%d %d", &slen, &tlen);
        each(i, slen)
            scanf("%d", src + i);
        each(i, tlen)
            scanf("%d", des + i);
        printf("%d\n", KMPIndex());
    }
    return 0;
}

HDU 1686 Oulipo

题意:
返回匹配数量。

思路
KMP入门题。

AC Code

#include <cstring>
#include <iostream>
using namespace std;

#define ll long long
#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)
#define fill(num, ary) memset((ary), (num), sizeof((ary)))

const int maxn = 1e6 + 5;
const int inf = 0x3f3f3f3f;
int nxt[maxn];
char src[maxn], des[maxn];
int slen, tlen;

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < tlen)
        if (j == -1 || des[i] == des[j]) {
            if (des[++i] != des[++j])
                nxt[i] = j;
            else
                nxt[i] = nxt[j];
        } else
            j = nxt[j];
}

int KMPCount()
{
    int i, j = 0, ans = 0;
    if (slen == 1 && tlen == 1) {
        if (src[0] == des[0])
            return 1;
        else
            return 0;
    }
    getNext();
    for (i = 0; i < slen; i++) {
        while (j > 0 && src[i] != des[j])
            j = nxt[j];
        if (src[i] == des[j])
            j++;
        if (j == tlen) {
            ans++;
            j = nxt[j];
        }
    }
    return ans;
}

int main()
{
    int T_T;
    scanf("%d", &T_T);
    while (T_T--) {
        scanf("%s", des);
        scanf("%s", src);
        slen = strlen(src);
        tlen = strlen(des);
        printf("%d\n", KMPCount());
    }
    return 0;
}

HDU 2087 剪花布条

题意:
返回匹配数量,有点不一样的地方就是不能有重叠。比如说 aaaa 只能计作 两个 aa。

思路:
模板改一个小地方就好了。

AC Code

#include <cstring>
#include <iostream>
using namespace std;

#define ll long long
#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)
#define fill(num, ary) memset((ary), (num), sizeof((ary)))

const int maxn = 1e6 + 5;
const int inf = 0x3f3f3f3f;
int nxt[maxn];
char src[maxn], des[maxn];
int slen, tlen;

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < tlen)
        if (j == -1 || des[i] == des[j]) {
            if (des[++i] != des[++j])
                nxt[i] = j;
            else
                nxt[i] = nxt[j];
        } else
            j = nxt[j];
}

int KMPCount()
{
    int i, j = 0, ans = 0;
    getNext();
    for (i = 0; i < slen; i++) {
        while (j > 0 && src[i] != des[j])
            j = nxt[j];
        if (src[i] == des[j])
            j++;
        if (j == tlen) {
            ans++;
            j = 0;
            //j = nxt[j];
        }
    }
    return ans;
}

int main()
{
    while (scanf("%s", src)) {
        if (src[0] == '#' && src[1] == '\0')
            break;
        scanf("%s", des);
        slen = strlen(src);
        tlen = strlen(des);
        printf("%d\n", KMPCount());
    }
    return 0;
}

HDU 3746 Cyclic Nacklace

当时第一次发现kmp的next数组的应用,简直惊为天人,立马写了一片博客……认为,恩,好题!

事实上这类题目,巨tm多……
博文链接

HDU 1358 Period

题意:
题目什么意思来着……

再看了一下题目,原来还是求循环节……对于每个位置都找一下循环节,如果刚好是完整的几个循环节而来,就输出这个位置和循环次数。

思路:
上面那篇懂了得话,这题也是水题。

AC Code

#include <cstring>
#include <iostream>
using namespace std;

#define ll long long
#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)
#define fill(num, ary) memset((ary), (num), sizeof((ary)))

const int maxn = 1e6 + 5;
const int inf = 0x3f3f3f3f;
int nxt[maxn];
char src[maxn], des[maxn];
int slen, tlen;

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < tlen)
        if (j == -1 || des[i] == des[j])
            nxt[++i] = ++j;
        else
            j = nxt[j];
}

int main()
{
    int cas = 0;
    while (scanf("%d", &tlen) && tlen) {
        scanf("%s", des);
        getNext();
        printf("Test case #%d\n", ++cas);
        for (int i = 2; i <= tlen; i++) {
            int len = i - nxt[i];
            if (i != len && i % len == 0)
                printf("%d %d\n", i, i / len);
        }
        puts("");
    }
    return 0;
}
KMP

HDU3746 Cyclic Nacklace

Posted on

KMP的一个第一个小应用,求循环节。
昨天开始学字符串,晚上的时候看了很多博客,但都讲的很乱,很复杂,直到看到了这篇 (传送门) 最后在纸上跟着模拟了一下,才算个人意义上的理解了。包括一个小优化。

这道题只有你理解了kmp的next的数组求法才会做。

题意:
一串珠子,要你再加最少的珠子满足,珠子循环大于等于2。比如abcabc,你只能在左右两边加。

思路:
kmp的next数组保存的是以当前位置的前一位置结尾的字符串中最长的相同前后缀长度。
因为字符串是循环的,比如abcabcab,那么 中间的abc 为前后缀所共有,所以 next[len] = 5,因此的,len - next[len] 就是最短循环的长度了。得到循环节的长度问题也就随之而解了。

刚看到这道题的时候我还以为是一开始就是个环,可以在任意位置插入,那样的话就稍显麻烦了,虽然原理相同,我现在的水平只能想到循环暴力求。但明显复杂度不满足要求……

AC Code

#include <cstring>
#include <iostream>
using namespace std;

#define ll long long
#define each(i, n) for (int(i) = 0; (i) < (n); (i)++)
#define reach(i, n) for (int(i) = n - 1; (i) >= 0; (i)--)
#define range(i, st, en) for (int(i) = (st); (i) <= (en); (i)++)
#define rrange(i, st, en) for (int(i) = (en); (i) >= (st); (i)--)
#define fill(num, ary) memset((ary), (num), sizeof((ary)))

const int maxn = 1e6 + 5;
const int inf = 0x3f3f3f3f;
int nxt[maxn];
char src[maxn], des[maxn];
int slen, tlen;

void getNext()
{
    int i = 0, j = -1;
    nxt[0] = -1;
    while (i < tlen)
        if (j == -1 || des[i] == des[j])
            nxt[++i] = ++j;
        else
            j = nxt[j];
}

int main()
{
    int T_T;
    scanf("%d", &T_T);
    while (T_T--) {
        scanf("%s", des);
        tlen = strlen(des);
        getNext();
        int len = tlen - nxt[tlen];
        if (tlen != len && tlen % len == 0)
            puts("0");
        else
            printf("%d\n", len - nxt[tlen] % len);
    }
    return 0;
}