C++算法之旅、05 基础篇 | 第二章数据结构

2023-09-03 13:15 由小能日记发表于 #后端开发

常用代码模板2——数据结构 - AcWing

笔试用数组模拟而不是结构体

使用结构体指针，new Node() 非常慢，创建10万个节点就超时了，做笔试题不会用这种方式（优化是提前初始化好数组，但这样跟数组模拟没区别了，而且代码量很长）

单链表（数组）

使用两个数组，e存储val，ne存储next。空节点next用-1表示

826 ⭐

826. 单链表 - AcWing题库

第1个插入的点下标为0，第5个插入点下标为4，第k个插入点下标为k-1；

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

// head指向头结点，e[i]表示节点值，ne[i]表示节点next
// idx指向可用空间（当前可以用的最新点下标）
// 算法题不用考虑浪费的空间
int head, e[N], ne[N], idx;

void init() {
    head = -1;
    idx = 0;
}

// x 插到头结点后面
void add_to_head(int x) {
    e[idx] = x;
    ne[idx] = head;
    head = idx++;
}

// x 插到下标k的点后面
void add(int k, int x) {
    e[idx] = x;
    ne[idx] = ne[k];
    ne[k] = idx++;
}

// 将下标 k 后面点删掉
void remove(int k) {
    // if (ne[k] == -1) return; 题目保证合法不考虑边界情况
    ne[k] = ne[ne[k]];
}

int main() {
    int m;
    cin >> m;
    init();
    while (m--) {
        char c;
        int k, x;
        cin >> c;
        if (c == 'H') {
            cin >> x;
            add_to_head(x);
        } else if (c == 'D') {
            cin >> k;
            if (!k)
                head = ne[head];  // 特判删除头结点(链表第一个有效元素)
            else
                remove(k - 1);
        } else if (c == 'I') {
            cin >> k >> x;
            add(k - 1, x);
        }
    }
    for (int i = head; i != -1; i = ne[i]) {
        cout << e[i] << " ";
    }

    return 0;
}

邻接表

本质是一堆单链表，head[i]->x->x->-1 意思第i个点的邻边存起来了

最大用途：存储图、树 （内容在第三章）

双链表（数组）

用于优化某些题，每个节点有两个指针，一个指向前，一个指向后

需要3个数组，l 数组表示before，r 数组表示next，e 数组表示val，idx指向可用空间

下标0为head，指向头结点（左端点）；下标1为tail，指向尾结点（右端点）

827 ⭐

827. 双链表 - AcWing题库

因为提前用掉了数组中的两个，所以第k个插入元素下标是 k - 1 + 2

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int e[N], l[N], r[N], idx;

void init() {
    r[0] = 1;
    l[1] = 0;
    idx = 2;
}

// 在下标k点的右边插入x（可以转化成左边）
void add(int k, int x) {
    e[idx] = x;
    r[idx] = r[k];
    l[idx] = k;
    r[k] = idx;
    l[r[idx]] = idx;
    idx++;
}

// 删除下标k的点
void remove(int k) {
    l[r[k]] = l[k];
    r[l[k]] = r[k];
}

int main() {
    int m;
    cin >> m;
    init();
    while (m--) {
        string s;
        int k, x;
        cin >> s;
        if (s == "L") {
            cin >> x;
            add(0, x);
        } else if (s == "R") {
            cin >> x;
            add(l[1], x);
        } else if (s == "D") {
            cin >> k;
            remove(k - 1 + 2);
        } else if (s == "IL") {
            cin >> k >> x;
            add(l[k - 1 + 2], x);
        } else if (s == "IR") {
            cin >> k >> x;
            add(k - 1 + 2, x);
        }
    }
    for (int i = r[0]; i != 1; i = r[i]) {
        cout << e[i] << " ";
    }
    return 0;
}

栈

830

828. 模拟栈 - AcWing题库

先进后出。数据存储区间为[1,M]，tt为栈顶指针

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int stk[N], tt;

int main() {
    int m;
    cin >> m;
    while (m--) {
        string s;
        int x;
        cin >> s;
        if (s == "push") {
            cin >> x;
            stk[++tt] = x;
        } else if (s == "pop") {
            tt--;
        } else if (s == "empty") {
            if (tt > 0)
                puts("NO");
            else
                puts("YES");
        } else if (s == "query") {
            cout << stk[tt] << endl;
        }
    }
    return 0;
}

3302 ⭐⭐数组

3302. 表达式求值 - AcWing题库

中缀转后缀的重要规则（强行记忆）。转换与计算可以同步进行（各自需要一个栈）

当前运算符优先级<=栈顶元素，则栈顶元素依次输出直到不满足条件，并当前符号进栈
遇到右括号，则栈顶元素依次输出直到左括号
遍历完成后弹出栈内剩余运算符

#include <algorithm>
#include <cstdio>
#include <iostream>
#include <unordered_map>

using namespace std;

const int N = 1e5 + 10;

int op[N], num[N], opt = -1, numt = -1;

unordered_map<char, int> priority;

// 计算后缀表达式
void eval() {
    auto b = num[numt--];
    auto a = num[numt--];
    auto c = op[opt--];
    int res;
    if (c == '-')
        res = a - b;
    else if (c == '+')
        res = a + b;
    else if (c == '*')
        res = a * b;
    else if (c == '/')
        res = a / b;
    num[++numt] = res;
}

int main() {
    priority = {{'-', 1}, {'+', 1}, {'*', 2}, {'/', 2}};
    string str;
    cin >> str;
    for (int i = 0; i < str.size(); i++) {
        if (isdigit(str[i])) {
            int j = i, res = 0;
            while (isdigit(str[j])) res = res * 10 + str[j++] - '0';
            num[++numt] = res;
            i = j - 1;
        } else if (str[i] == '(')
            op[++opt] = str[i];
        else if (str[i] == ')') {
            while (op[opt] != '(') eval();
            opt--;
        } else {
            while (opt >= 0 && priority[str[i]] <= priority[op[opt]]) eval();
            op[++opt] = str[i];
        }
    }
    while (opt >= 0) eval();
    cout << num[numt];
    return 0;
}

3302 ⭐⭐STL

#include <algorithm>
#include <cstdio>
#include <iostream>
#include <stack>
#include <unordered_map>

using namespace std;

stack<int> num;
stack<char> op;

void eval() {
    auto b = num.top();
    num.pop();
    auto a = num.top();
    num.pop();
    auto c = op.top();
    op.pop();
    int x;
    if (c == '+')
        x = a + b;
    else if (c == '-')
        x = a - b;
    else if (c == '*')
        x = a * b;
    else
        x = a / b;
    num.push(x);
}

int main() {
    unordered_map<char, int> pr{{'+', 1}, {'-', 1}, {'*', 2}, {'/', 2}};
    string str;
    cin >> str;
    for (int i = 0; i < str.size(); i++) {
        auto c = str[i];
        if (isdigit(c)) {
            // 第一类双指针
            int x = 0, j = i;
            while (j < str.size() && isdigit(str[j]))
                x = x * 10 + str[j++] - '0';
            i = j - 1;  // 考虑i++
            num.push(x);
        } else if (c == '(')
            op.push(c);
        else if (c == ')') {
            while (op.top() != '(') eval();
            op.pop();
        } else {
            while (op.size() && pr[op.top()] >= pr[c]) eval();
            op.push(c);
        }
    }
    while (op.size()) eval();
    cout << num.top() << endl;

    return 0;
}

队列

829

829. 模拟队列 - AcWing题库队尾插入，队头输出

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int que[N], st = -1, ed = -1;

int main() {
    int m;
    cin >> m;
    while (m--) {
        string str;
        int x;
        cin >> str;
        if (str == "push") {
            cin >> x;
            que[++ed] = x;
        } else if (str == "pop") {
            st++;
        } else if (str == "empty") {
            if (st == ed)
                puts("YES");
            else
                puts("NO");
        } else if (str == "query") {
            cout << que[st + 1] << endl;
        }
    }

    return 0;
}

单调栈

830 ⭐⭐

830. 单调栈 - AcWing题库

朴素做法是两层循环。

使用栈，满足情况：当下标为 i，栈内元素为 \(a_1,a_2...a_i\)

单调栈要求遍历数组过程中，维护栈，满足栈底至栈顶元素呈单调性（依次递增）

栈内 \(a_1\)~\(a_i\) 递增，此时遍历至 \(a_{i+1}\)，将小于 \(a_{i+1}\) 的栈内元素删除，再插入 \(a_{i+1}\)
每个元素一次，出栈一次，\(O(n)\)

scanf 与 cin 速度差了十倍左右，使用 cin.tie(0) 或 ios::sync_with_stdio(false) 加速

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int n;
int stk[N], tt;

int main() {
    cin.tie(0);
    // ios::sync_with_stdio(false);
    int n;
    cin >> n;
    for (int i = 0; i < n; i++) {
        int x;
        cin >> x;
        while (tt && stk[tt] >= x) tt--;
        if (tt)
            cout << stk[tt] << " ";
        else
            cout << -1 << " ";
        stk[++tt] = x;
    }

    return 0;
}

单调队列

154 ⭐⭐

154. 滑动窗口 - AcWing题库

朴素算法是 \(O(n^2)\)

单调队列要求滑动窗口每滑动一次时，将窗口内 >= \(a_i\) 的元素从队尾删除（类似单调栈），\(a_i\) 入队，该队列将保持单调递增，此时对头点为最小值。注意队列里存的是下标

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e6 + 10;

int a[N], q[N], st = 0, ed = -1;

int main() {
    cin.tie(0);
    int n, k;
    cin >> n >> k;
    for (int i = 0; i < n; i++) {
        cin >> a[i];
    }
    for (int i = 0; i < n; i++) {
        // 移动队头
        if (st <= ed && i - k + 1 > q[st]) st++;
        // 移动队尾
        while (st <= ed && a[q[ed]] >= a[i]) ed--;
        // ^ 先插入
        q[++ed] = i;
        // 每次滑动输出
        if (i >= k - 1) printf("%d ", a[q[st]]);
    }
    cout << endl;
    st = 0;
    ed = -1;
    for (int i = 0; i < n; i++) {
        if (st <= ed && i - k + 1 > q[st]) st++;
        while (st <= ed && a[q[ed]] <= a[i]) ed--;
        q[++ed] = i;
        if (i >= k - 1) printf("%d ", a[q[st]]);
    }

    return 0;
}

KMP

一个人能走多远不在于他在顺境时能走的多快，而在于他在逆境时多久能找到曾经的自己

强烈建议看胡凡 算法笔记 P455

KMP字符串 —— 深入浅出 next 数组

假设下标从1开始。字符串 S[1,N]，子串 P[1,M]，S 指针 i-1，P 指针 j

next[j]

子串中，以 j 为终点的后缀与从1开始的前缀相等的最长长度 x
\(P[1,x] = P[j-x+1,j]\)

kmp 建议看算法笔记

i-1 与 j 对应字符相同；i 与 j+1 对应字符不同。此时需要把红颜色子串往后移动，为了移动最少需要 next[j]
让 j = next[j]，从线2变为线3（线1红色部分等于线3红色部分）
继续匹配 i 与 j+1，若发现不匹配，再 j = next[j] （递归进行）
当 j = m，意味着找到子串，然后 j = next[j] 继续寻找

时间复杂度计算

生成next数组 \(O(m)\)
字符串每个字符被遍历一次，\(O(n)\)
j++ 最多 n 次，最多减 n 次，\(O(n)\)

831 ⭐⭐⭐

831. KMP字符串 - AcWing题库

next 在某些头文件里有定义，最好换个变量名；另外KMP算法还可以进一步优化，以下是优化后的算法

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;
const int M = 1e6 + 10;
int next_val[N];

int main() {
    int n, m;
    char p[N], s[M];
    cin >> n >> p + 1 >> m >> s + 1;
    // ^ 生成 next_val 数组
    for (int i = 2, j = 0; i <= n; i++) {
        while (j && p[i] != p[j + 1]) j = next_val[j];
        if (p[i] == p[j + 1]) j++;
        // ^ 继续优化，选择回退的最佳位置
        if (j && p[i + 1] == p[j + 1]) {
            next_val[i] = next_val[j];
        } else
            next_val[i] = j;
    }
    // for (int i = 1; i <= n; i++) {
    //     cout << next_val[i] << endl;
    // }
    // ^ KMP 匹配过程
    for (int i = 1, j = 0; i <= m; i++) {
        while (j && p[j + 1] != s[i]) j = next_val[j];
        if (p[j + 1] == s[i]) j++;
        if (j == n) {
            cout << i - n << " ";
            j = next_val[j];
        }
    }
    return 0;
}

字典树 Trie

用于高效存储和查找字符串集合的数据结构

835

835. Trie字符串统计 - AcWing题库

son[N][26] 存的是每个节点所有的儿子是什么，cnt[N] 存的是单词的数量，idx与数组模拟单链表里的idx相同

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int son[N][26], cnt[N], idx;  // ^ 下标是0的点，既是根节点又是空节点

void insert(char str[]) {
    int p = 0;
    for (int i = 0; str[i]; i++) {
        int u = str[i] - 'a';
        if (!son[p][u]) son[p][u] = ++idx;
        p = son[p][u];
    }
    cnt[p]++;
}

int query(char str[]) {
    int p = 0;
    for (int i = 0; str[i]; i++) {
        int u = str[i] - 'a';
        if (!son[p][u]) return 0;
        p = son[p][u];
    }
    return cnt[p];
}

int main() {
    cin.tie(0);
    int n;
    cin >> n;
    while (n--) {
        char a, b[N];
        cin >> a >> b;
        if (a == 'I')
            insert(b);
        else
            cout << query(b) << endl;
    }
    return 0;
}

143 ⭐⭐

143. 最大异或对 - AcWing题库

朴素算法是两层循环并满足 j<i（避免重复， \(C^2_n = n*(n-1)/2\) ）；时间复杂度 \(O(n^2)\) ，题目给的 n 是 1e5，则 1e10 超时

可以使用 trie tree 优化，每插入一个元素 x，在字典树中查找满足与该元素异或最大值的元素（尽可能反着取，每次查找只要遍历31位），时间复杂度 \(O(n)\)

可以先插入再遍历（少个判断），第一个插入的元素与自身异或为 0

每个节点个数长31，最多1e5个，那么idx最大可以到 31 * 1e5

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10, M = 31 * N;

int son[M][2], idx;

void insert(int x) {
    int p = 0;
    for (int i = 30; i >= 0; i--) {
        int bit = x >> i & 1;
        if (!son[p][bit]) son[p][bit] = ++idx;
        p = son[p][bit];
    }
}

int query(int x) {
    int res = 0, p = 0;
    for (int i = 30; i >= 0; i--) {
        int bit = x >> i & 1;
        if (son[p][!bit]) {
            bit = !bit;
        }
        p = son[p][bit];
        res = (res << 1) + bit;
    }
    return res;
}

int main() {
    int n;
    cin >> n;
    int m = 0;
    while (n--) {
        int x;
        cin >> x;
        insert(x);
        m = max(x ^ query(x), m);
    }
    cout << m;

    return 0;
}

并查集

操作

将两个集合合并
询问两个元素是否在一个集合当中

朴素算法下，合并两个集合需要执行\(O(n+m)\)次；并查集可以近乎\(O(1)\)合并两个集合

基本原理

用树的形式维护所有集合；根节点是集合编号
每个节点存储父节点，p[x] 表示x的父节点

如何判断树根

if (p[x] == x) ，根节点的父节点是它本身

如何求x的集合编号

while(p[x] != x) x = p[x]
路径压缩⭐ ：往上走找到根节点把路径所有点都指向跟节点

如何合并两个集合 ⭐ 记住模板

p[x] = y，将一颗树根节点插到另一棵树的某个位置

int find(int x) {
    if (p[x] != x) p[x] = find(p[x]);
    return p[x];
}

如何维护集合元素个数（携带额外信息）

见837题

836 ⭐

836. 合并集合 - AcWing题库

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int p[N];

// ^ 返回x祖宗节点 + 路径压缩
int find(int x) {
    if (p[x] != x) p[x] = find(p[x]);
    return p[x];
}

int main() {
    cin.tie(0);
    int n, m;
    cin >> n >> m;
    for (int i = 1; i <= n; i++) p[i] = i;

    while (m--) {
        char op;
        int a, b;
        cin >> op >> a >> b;
        if (op == 'M') {
            p[find(a)] = find(b);
        } else {
            if (find(a) == find(b))
                puts("Yes");
            else
                puts("No");
        }
    }
    return 0;
}

837 ⭐⭐

837. 连通块中点的数量 - AcWing题库

保证根节点的nums有意义

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int p[N], nums[N];

int find(int x) {
    if (p[x] != x) p[x] = find(p[x]);
    return p[x];
}

int main() {
    cin.tie(0);
    int n, m;
    cin >> n >> m;
    for (int i = 1; i <= n; i++) {
        p[i] = i;
        nums[i] = 1;
    }
    while (m--) {
        string op;
        int a, b;
        cin >> op;
        if (op == "C") {
            cin >> a >> b;
            // ^ 特判
            if (find(a) == find(b)) continue;
            // ^ 先算元素个数再合并
            nums[find(b)] += nums[find(a)];
            p[find(a)] = find(b);
        } else if (op == "Q1") {
            cin >> a >> b;
            if (find(a) == find(b))
                puts("Yes");
            else
                puts("No");
        } else if (op == "Q2") {
            cin >> a;
            cout << nums[find(a)] << endl;
        }
    }
    return 0;
}

240 ⭐⭐⭐

240. 食物链 - AcWing题库

确定每个动物跟领袖（n对1）的关系，而不是动物跟动物（n对n）的关系

维护每个节点与根节点的距离（x吃y，y到x距离是1），然后 % 3 判断类型。初始化每个节点都是0，各自一类。

余1：可以吃根节点
余2：可以被根节点吃
余0：与根节点是同类

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int p[N], d[N];

int find(int x) {
    if (p[x] != x) {
        // 递归处理所有祖宗节点，并更新到根的距离
        int t = find(p[x]);
        // d[x] 等于 x到父的距离 + 父到根的距离（递归处理完了）
        d[x] += d[p[x]];
        // x 的父修改为根节点，路径优化
        p[x] = t;

        // // 记录旧的父
        // int u = p[x];
        // // 路径压缩，新父根节点
        // p[x] = find(p[x]);
        // // x到根的距离等于x到旧父距离加上旧父到根的距离
        // d[x] += d[u];
    }
    return p[x];
}

int main() {
    cin.tie(0);
    int n, k;
    cin >> n >> k;
    for (int i = 1; i <= n; i++) p[i] = i;

    int count = 0;
    while (k--) {
        char t;
        int x, y;
        cin >> t >> x >> y;
        if (x > n || y > n) {
            count++;
        } else {
            int px = find(x), py = find(y);
            if (t == '1') {
                if (px == py && (d[x] - d[y]) % 3 != 0)
                    count++;
                else if (px != py) {
                    p[px] = py;
                    d[px] = d[y] - d[x];  // IMPORTANT
                }
            } else {
                if (x == y || px == py && (d[x] - d[y] - 1) % 3 != 0) {
                    count++;
                } else if (px != py) {
                    p[px] = py;
                    d[px] = d[y] + 1 - d[x];
                }
            }
        }
    }
    cout << count << endl;

    return 0;
}

堆

小根堆

每个点 <= 左右儿子，根节点就是树的最小值。

完全二叉树用一维数组存：x 的左儿子 2x，x 的右儿子 2x+1；下标从1开始

两个操作 ⭐

down(x) 往下调整（x是坐标 1 ~ size） \(O(log_2n)\)
- 每次找{ x，x左子，x右子 }的最小值，进行交换
up(x) 往上调整 \(O(log_2n)\)
- 每次找{ x，x父 }的最小值，进行交换

堆的功能

插入一个数 \(O(log_2n)\)
- heap[++size] = x 然后不断往上移 up(size)
求集合当中的最小值 \(O(1)\)
- heap[1]
删除最小值 \(O(log_2n)\)
- heap[1] = heap[size--] 堆最后一个元素覆盖堆顶元素，然后 down(1)
- 因为删除头结点非常困难，删除尾结点很easy
删除任意一个元素（STL没直接实现，优先队列） \(O(log_2n)\)
- heap[k] = heap[size--] 有三种情况，可直接 down(k) 再 up(k)，只会执行一个
修改任意一个元素（STL没直接实现，优先队列） \(O(log_2n)\)
- heap[k] = x 再 down(k) 、up(k)

838 ⭐

838. 堆排序 - AcWing题库

一个一个往里插是 \(O(nlog_2n)\) 。可以采用 \(O(n)\) 的方式，先全部读入，除最后一层外反着down操作（倒第2层，倒第3层...第1层）（可用数列推导出，每个节点down的次数总和 < n）（记忆）

    for (int i = n / 2; i; i--) {
        down(i);
    }

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int n, m;
int h[N], idx;

void down(int u) {
    int t = u;
    if (u * 2 <= idx && h[u * 2] < h[t]) t = u * 2;
    if (u * 2 + 1 <= idx && h[u * 2 + 1] < h[t]) t = u * 2 + 1;
    if (u != t) {
        swap(h[t], h[u]);
        down(t);
    }
}

int main() {
    cin.tie(0);
    cin >> n >> m;
    int x;
    for (int i = 1; i <= n; i++) {
        cin >> h[i];
    }
    idx = n;

    for (int i = n / 2; i; i--) {
        down(i);
    }

    while (m--) {
        cout << h[1] << " ";
        h[1] = h[idx--];
        down(1);
    }

    return 0;
}

839 ⭐⭐

839. 模拟堆 - AcWing题库

存储映射：ph 存第k个插入的点在堆里的下标，hp 存堆里点的坐标是第k个插入的，两者互为相反关系。

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

const int N = 1e5 + 10;

int h[N], hp[N], ph[N], idx;

void new_swap(int a, int b) {
    swap(ph[hp[a]], ph[hp[b]]);
    swap(hp[a], hp[b]);
    swap(h[a], h[b]);
}

void up(int x) {
    while (x / 2 && h[x / 2] > h[x]) {
        new_swap(x / 2, x);
        x /= 2;
    }
}

void down(int x) {
    int t = x;
    if (2 * x <= idx && h[2 * x] < h[t]) t = 2 * x;
    if (2 * x + 1 <= idx && h[2 * x + 1] < h[t]) t = 2 * x + 1;
    if (t != x) {
        new_swap(x, t);
        down(t);
    }
}

int main() {
    cin.tie(0);
    int n;
    cin >> n;
    int count = 0;
    while (n--) {
        string s;
        int k, x;
        cin >> s;
        if (s == "I") {
            cin >> x;
            idx++;
            count++;
            h[idx] = x;
            hp[idx] = count;
            ph[count] = idx;
            up(idx);
        } else if (s == "PM") {
            cout << h[1] << endl;
        } else if (s == "DM") {
            // ^ h[1] = h[idx--];  // 这样只交换点,没交换ph hp
            new_swap(1, idx--);
            down(1);
        } else if (s == "D") {
            cin >> k;
            // ^ h[ph[k]] = h[idx--]; // 这样只交换点,没交换ph hp
            k = ph[k];  // 获取第k个插入的数在堆中的坐标
            new_swap(k, idx--);
            down(k), up(k);  // 只会执行其中一个
        } else if (s == "C") {
            cin >> k >> x;
            k = ph[k];
            h[k] = x;
            down(k), up(k);
        }
    }

    return 0;
}

哈希表

情景

把 1e5 个值域 -1e9~1e9 的数（值域大，个数少）映射到 1e5 的范围的哈希函数
\(h(x) \in (0,1e5) = x\ mod\ 1e5\) 模数一般要取质数，离2的整数幂尽可能远
发生冲突：两个不同值域的数映射成了同一个数

存储结构-解决冲突的方式 ⭐

算法题99%一般只有添加、查找，若要实现删除不会真删掉，开一个bool数组标记删除

开放寻址法（常用）
- 开一个一维数组，范围是题目数据范围的2~3倍，即 2e5 ~ 3e5 区间的质数，该数组存储实际的 x 值
- 计算哈希值，如果哈希值已被占用，则移动到下一个位置，从前往后找
- h(11) = 3 在数组[3] 存入 11，h(4) = 3 在数组[4] 存入 4
- 与拉链法不同，查询函数返回 x 所在的位置，如果 x 不存在返回应该存储的位置
- 需要约定一个标志，不在 x 的值域范围内，表示当前位置为空，如 0x3f3f3f3f
拉链法
- 开一个一维数组 \([0,大于1e5的最小质数]\) 存储所有哈希值对应的链表
- h(11) = 3 在数组[3] 开一条链，插入 11
- 若 h(4) = 3 在数组[3] 已开的链插入 4
- 期望情况下，每条链长度 1，时间复杂度 \(O(1)\)

字符串哈希-字符串前缀哈希法（特殊）

应用：可以快速判断一个字符串某两段是否相同。KMP望而却步（KMP擅长循环节），极困难题可用这种方法

先预处理所有前缀的哈希，h[0] = 0, h[1] = "A" 的hash，h[2] = "AB" 的hash...
- 把字符串看成 P 进制的数，然后模 Q
**某个字符不能映射成 0，否则 AA 跟 A 两个哈希都是 0 **
该哈希法假定人品足够好，不考虑冲突
经验值：当 p = 131 或 13331，Q 取 \(2^{64}\) (用 unsinged long long可以不取模，溢出相当于取模)

⭐ 我们可以利用前缀哈希算出任意子段的哈希值

预处理前缀 h(i) = h(i-1) * p + str[i]

与整数离散化的区别

整数离散化需要保序，而且不会发生冲突，每一个元素都有唯一确定的位置（1 ~ n）

840 ⭐拉链法

840. 模拟散列表 - AcWing题库

h 数组维护 N 条链，空节点表示-1，e 数组与 ne 数组维护链上的每一个节点，idx为每个节点分配唯一标识符；插入操作从链表头结点插入

#include <algorithm>
#include <cstdio>
#include <cstring>
#include <iostream>

using namespace std;

const int N = 1e5 + 3;  // 质数

int h[N], e[N], ne[N], idx;

void insert(int x) {
    // 第一次模余数可能是负数，第二次模余数绝对是正数
    int k = (x % N + N) % N;
    e[idx] = x;
    ne[idx] = h[k];
    h[k] = idx++;
}

bool find(int x) {
    int k = (x % N + N) % N;
    for (int i = h[k]; i != -1; i = ne[i])
        if (e[i] == x) return true;

    return false;
}

int main() {
    cin.tie(0);
    memset(h, -1, sizeof h);
    int n;
    cin >> n;
    while (n--) {
        string s;
        int x;
        cin >> s >> x;
        if (s == "I")
            insert(x);
        else {
            if (find(x))
                puts("Yes");
            else
                puts("No");
        }
    }
    return 0;
}

840 ⭐开放寻址法

#include <algorithm>
#include <cstdio>
#include <cstring>
#include <iostream>

using namespace std;

const int N = 2e5 + 3, null = 0x3f3f3f3f;  // 质数

int h[N];

int find(int x) {
    int k = (x % N + N) % N;
    while (h[k] != null && h[k] != x) {
        k++;
        if (k == N) k = 0;  // 查到数组最后从开头找
    }
    return k;
}

int main() {
    cin.tie(0);
    memset(h, 0x3f, sizeof h);
    int n;
    cin >> n;
    while (n--) {
        string s;
        int x;
        cin >> s >> x;
        int k = find(x);
        if (s == "I")
            h[k] = x;
        else {
            if (h[k] != null)
                puts("Yes");
            else
                puts("No");
        }
    }
    return 0;
}

841 ⭐⭐ 字符串哈希

841. 字符串哈希 - AcWing题库

如下的朴素算法会超时

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

int main() {
    cin.tie(0);
    string s;
    int n, m, l1, l2, r1, r2;
    cin >> n >> m >> s;
    while (m--) {
        cin >> l1 >> r1 >> l2 >> r2;
        l1 -= 1, l2 -= 1, r1 -= 1, r2 -= 1;
        if (s.substr(l1, r1 - l1 + 1) == s.substr(l2, r2 - l2 + 1))
            puts("Yes");
        else
            puts("No");
    }

    return 0;
}

p 数组提前存储预处理的 p 次方的值，减少计算量

#include <algorithm>
#include <cstdio>
#include <iostream>

using namespace std;

typedef unsigned long long ULL;

const int N = 1e5, P = 131;

int n, m;
char str[N];
ULL h[N], p[N];

ULL get(int l, int r) { return h[r] - h[l - 1] * p[r - l + 1]; }

int main() {
    cin.tie(0);
    cin >> n >> m >> str + 1;
    p[0] = 1;
    for (int i = 1; i <= n; i++) {
        p[i] = p[i - 1] * P;
        h[i] = h[i - 1] * P + str[i];
    }

    while (m--) {
        int l1, r1, l2, r2;
        cin >> l1 >> r1 >> l2 >> r2;
        if (get(l1, r1) == get(l2, r2))
            puts("Yes");
        else
            puts("No");
    }
    return 0;
}

STL

size()、empty() 所有容器都有，时间复杂度 \(O(1)\)

clear() 并不是所有容器都有，队列、优先队列、栈；范围遍历可以遍历所有容器

⭐ 系统为某一个程序分配空间时，所需的时间与空间大小无关，与申请次数有关

vector

变长数组，倍增，支持比较运算（字典序（4个3小于3个4））。有erase但用得不多

插入操作\(O(1)\)：插入1e6次，申请空间的次数 \(log_21e6\) ，拷贝的次数均摊是 1 （数组大小从1到1e6，总共拷贝次数是1e6（1+2+4+8+...））

#include <iostream>
#include <vector>

using namespace std;

int main() {
    // ^ 定义一个长度为10的vector，每个数都是3
    vector<int> a(10, 3);
    vector<int> b[10];

    a.push_back(1);

    cout << a.size() << endl;
    cout << a.empty() << endl;
    a.clear();
    cout << a.front() << endl;
    cout << a.back() << endl;
    // a.begin(); // ^ 返回的是迭代器（指针），需要解引用
    // a.end();
    return 0;
}

pair

二元组，支持比较运算（字典序）。适用于某种东西有两个属性，也可以存储三个属性，任意嵌套

相比结构体：pair 帮我们实现了结构体，并实现了比较函数，省了点代码

#include <iostream>

using namespace std;

int main() {
    pair<int, pair<int, string>> a;
    pair<int, string> p;
    p = {20, "abc"};
    p.first;
    p.second;
    return 0;
}

string

字符串，substr(起始位置，长度)、c_str()

#include <cstdio>
#include <iostream>

using namespace std;

int main() {
    string a = "yxc";
    a += "def";
    a += "c";
    // 从哪开始，返回多长（可省略）
    cout << a.substr(1, 2) << endl;
    printf("%s\n", a.c_str());
    return 0;
}

queue

队列，push()、front()、pop()，没有 clear()

int main() {
    queue<int> q;
    // ^ 没有clear的清空方法
    q = queue<int>();
    return 0;
}

deque

双端队列，支持随机访问 []。相当于加强版 vector。效率较低，不常用

front、back、push_back、pop_back、push_front、pop_front

priority_queue

优先队列，默认是大根堆。push()、top()、pop()。头文件queue

int main() {
    priority_queue<int> heap;
    // ^ 定义小根堆
    // 方式 1 每次插入负数
    // heap.push(-x);
    // 方式 2 多加两个参数
    priority_queue<int, vector<int>, greater<int>> heap;
    return 0;
}

stack

栈，push()、top()、pop()；与队列用法类似

set、map、multiset、multimap

基于平衡二叉树（红黑树），动态维护有序序列。begin、end支持++、--操作，返回前驱后继（有序序列的前一个数或后一个数）。增删改查大部分 \(O(log_2n)\)

#include <iostream>
#include <set>

using namespace std;

int main() {
    set<int> s;
    multiset<int> ms;
    s.insert(1);
    // 找不到返回 end 迭代器
    s.find(1);
    // 返回某个数的个数
    s.count(1);
    // 删除所有等于这个数的节点 O(k + logn) k是x的个数
    ms.erase(1);
    // 删除这个迭代器
    ms.erase(ms.find(1));
    // 大于等于 x 的最小的数的迭代器，不存在返回end
    s.lower_bound(0);
    // 大于 x 的最小的数的迭代器，不存在返回end
    s.upper_bound(0);
    return 0;
}

#include <iostream>
#include <map>

using namespace std;

int main() {
    map<string, int> a;
    a.insert({"abc", 123});
    // erase 跟 set 一样
    // 可以像数组一样用 map，时间复杂度 O(logn)
    count << a["abc"] << endl;
    // 也支持 lower_bound、upper_bound
    return 0;
}

unordered_

没有顺序，基于哈希表实现。与上面类似。但增删改查时间复杂度是 \(O(1)\)

不支持 lower_bound、upper_bound；不支持迭代器 ++ --

bitset

压位。C++里bool是1字节，如果要开1024个bool数组需要1024个字节，如果用压位，只需要128B

1e4 * 1e4 布尔矩阵，需要 1e8B，约100MB，题目给的 64MB，用压位可以缩小到12MB

int main() {
    bitset<10000> s;
    // ~ & | ^ >> << == != []
    // count() 返回有多少个 1
    // any() 判断是否至少有1个 1
    // none() 判断是否全为 0
    // set() 把所有位置1
    // set(k,v) 将第k位变成v
    // reset() 把所有位变成0
    // flip() 等价于 ~
    // flip(k) 把第k位取反
    return 0;
}