C++ 在子数组中查找多个查询的不同（唯一）值的数目_C++_Arrays_Algorithm_Data Structures_Wavelet

C++ 在子数组中查找多个查询的不同（唯一）值的数目

c++ arrays algorithm data-structures

C++ 在子数组中查找多个查询的不同（唯一）值的数目,c++,arrays,algorithm,data-structures,wavelet,C++,Arrays,Algorithm,Data Structures,Wavelet,我有一个数组（可以有2X10^5个值）。我想在此数组上执行大量查询。每个查询都属于[L，R]类型，此查询的结果应该是子数组中从索引L开始到索引R结束的唯一值的数量我知道这可以在O（nrootn）时间内使用Mo算法完成。然而，问题是莫的算法是离线算法。我要寻找的是一个在线算法，因为前一个查询的结果决定了我案例中的下一个查询我尝试使用来形成一个段树，其中节点将存储范围中所有不同的元素。然而，这对我来说太慢了。预处理过程占用了太多的时间。 < P>这是我的C++尝试，在一个解决方案（也张贴）中使用

我有一个数组（可以有2X10^5个值）。我想在此数组上执行大量查询。每个查询都属于[L，R]类型，此查询的结果应该是子数组中从索引L开始到索引R结束的唯一值的数量

我知道这可以在O（nrootn）时间内使用Mo算法完成。然而，问题是莫的算法是离线算法。我要寻找的是一个在线算法，因为前一个查询的结果决定了我案例中的下一个查询

我尝试使用来形成一个段树，其中节点将存储范围中所有不同的元素。然而，这对我来说太慢了。预处理过程占用了太多的时间。

< P>这是我的C++尝试，在一个解决方案（也张贴）中使用了一个用代码实现的。重新表述问题（作为光子链接）的想法是首先构造一个数组，为原始数组中的每个对应单元列出右侧下一个重复元素的索引。然后问题就变成了找出区间中有多少个单元具有超出当前区间的“下一个索引”（那些在区间内显然没有重复项），这可以通过修饰的小波树来查询。请参阅底部的（非零基）查询示例

// Adapted from https://www.geeksforgeeks.org/wavelet-trees-introduction

#include <iostream>
#include <vector>
#include <map>
#include <algorithm>
#include <climits>
using namespace std;

// wavelet tree class 
class wavelet_tree { 
public: 
    // Range to elements 
    int low, high; 

    // Left and Right child 
    wavelet_tree* l, *r; 

    std::vector<int> freq;

    // Default constructor 
    // Array is in range [x, y] 
    // Indices are in range [from, to] 
    wavelet_tree(int* from, int* to, int x, int y) 
    { 
        // Initialising low and high 
        low = x, high = y; 

        // Array is of 0 length 
        if (from >= to) 
            return; 

        // Array is homogenous 
        // Example : 1 1 1 1 1 
        if (high == low) { 
            // Assigning storage to freq array 
            freq.reserve(to - from + 1); 

            // Initialising the Freq array 
            freq.push_back(0); 

            // Assigning values 
            for (auto it = from; it != to; it++) 

                // freq will be increasing as there'll 
                // be no further sub-tree 
                freq.push_back(freq.back() + 1); 

            return; 
        } 

        // Computing mid 
        int mid = (low + high) / 2; 

        // Lambda function to check if a number 
        // is less than or equal to mid 
        auto lessThanMid = [mid](int x) { 
            return x <= mid; 
        }; 

        // Assigning storage to freq array 
        freq.reserve(to - from + 1); 

        // Initialising the freq array 
        freq.push_back(0); 

        // Assigning value to freq array 
        for (auto it = from; it != to; it++) 

            // If lessThanMid returns 1(true), we add 
            // 1 to previous entry. Otherwise, we add 0 
            // (element goes to right sub-tree) 
            freq.push_back(freq.back() + lessThanMid(*it));      

        // std::stable_partition partitions the array w.r.t Mid 
        auto pivot = std::stable_partition(from, to, lessThanMid); 

        // Left sub-tree's object 
        l = new wavelet_tree(from, pivot, low, mid); 

        // Right sub-tree's object 
        r = new wavelet_tree(pivot, to, mid + 1, high); 
    } 

    // Count of numbers in range[L..R] less than 
    // or equal to k 
    int kOrLess(int l, int r, int k) 
    { 
        // No elements int range is less than k 
        if (l > r or k < low) 
            return 0; 

        // All elements in the range are less than k 
        if (high <= k) 
            return r - l + 1; 

        // Computing LtCount and RtCount 
        int LtCount = freq[l - 1]; 
        int RtCount = freq[r]; 

        // Answer is (no. of element <= k) in 
        // left + (those <= k) in right 
        return (this->l->kOrLess(LtCount + 1, RtCount, k) + 
            this->r->kOrLess(l - LtCount, r - RtCount, k)); 
    } 

    // Count of numbers in range[L..R] greater than 
    // or equal to k 
    int kOrMore(int l, int r, int k) 
    { 
        // No elements int range are greater than k 
        if (l > r or k > high) 
            return 0; 

        // All elements in the range are greater than k 
        if (low >= k) 
            return r - l + 1; 

        // Computing LtCount and RtCount 
        int LtCount = freq[l - 1]; 
        int RtCount = freq[r]; 

        // Answer is (no. of element <= k) in 
        // left + (those <= k) in right 
        return (this->l->kOrMore(LtCount + 1, RtCount, k) + 
            this->r->kOrMore(l - LtCount, r - RtCount, k)); 
    }

}; 


int main() 
{ 
    int size = 7, high = INT_MIN;
    int arr[] = {1, 2, 3, 2, 4, 3, 1};
    int next[size];
    std::map<int, int> next_idx;

    for (int i=size-1; i>=0; i--){
        if (next_idx.find(arr[i]) == next_idx.end())
            next[i] = size + 1;
        else
            next[i] = next_idx[arr[i]];
        next_idx[arr[i]] = i + 1;
        high = max(high, next[i]);
    } 

    // Object of class wavelet tree 
    wavelet_tree obj(next, next + size, 1, high);

    // Queries are NON-zero-based
    //
    //  1  2  3  4  5  6  7
    // {1, 2, 3, 2, 4, 3, 1};
    // query([3, 6]) = 3;
    cout << obj.kOrMore(3, 6, 7) << '\n';
    // query([1, 4]) = 3;
    cout << obj.kOrMore(1, 4, 5) << '\n';
    // query([1, 7]) = 4;
    cout << obj.kOrMore(1, 7, 8) << '\n';

    return 0; 
}

//改编自https://www.geeksforgeeks.org/wavelet-trees-introduction
#包括
#包括
#包括
#包括
#包括
使用名称空间std；
//小波树类
类小波树{
公众：
//元素范围
int低，高；
//左右儿童
小波树*l，*r；
std：：矢量频率；
//默认构造函数
//数组在范围[x，y]内
//指数的范围为[从，到]
小波树（int*from，int*to，int x，int y）
{ 
//初始化低和高
低=x，高=y；
//数组的长度为0
如果（从>=到）
返回；
//数组是同质的
//示例：1
如果（高==低）{
//将存储分配给freq数组
频率储备（至-自+1）；
//初始化Freq数组
频率推回（0）；
//赋值
for（自动it=from；it！=to；it++）
//频率将随着时间的推移而增加
//不再是子树
频率回推（频率回推（）+1）；
返回；
} 
//计算媒体
int mid=（低+高）/2；
//Lambda函数，用于检查数字
//小于或等于中间值
自动lessThanMid=[mid]（int x）{
返回x r或k<低）
返回0；
//范围内的所有元素都小于k
if（高r->kOrLess（l-LtCount，r-RtCount，k））；
} 
//范围[L..R]中大于的数字计数
//或等于k
内特科莫尔（内特l，内特r，内特k）
{ 
//没有元素int范围大于k
如果（l>r或k>high）
返回0；
//范围内的所有元素都大于k
如果（低>=k）
返回r-l+1；
//计算LtCount和RtCount
int LtCount=freq[l-1]；
int RtCount=频率[r]；
//答案是（元素kOrMore的编号（LtCount+1，RtCount，k）+
这个->r->科尔莫（l-LtCount，r-RtCount，k））；
}
}; 
int main（）
{ 
int size=7，high=int_MIN；
int arr[]={1,2,3,2,4,3,1}；
int next[大小]；
std:：map next_idx；
对于（int i=size-1；i>=0；i--）{
if（next_idx.find（arr[i]）==next_idx.end（））
下一个[i]=大小+1；
其他的
next[i]=next_idx[arr[i]]；
下一个_idx[arr[i]]=i+1；
高=最大值（高，下一个[i]）；
} 
//类对象小波树
小波树obj（下一个，下一个+大小，1，高）；
//查询不是基于零的
//
//  1  2  3  4  5  6  7
// {1, 2, 3, 2, 4, 3, 1};
//查询（[3,6]）=3；
无法将子数组复制到a并获取其大小？我知道这是显而易见的事情。但我有多达10^5个查询。因此这将使其成为O（n^2）。我想知道是否有一种方法可以在不到该值的情况下完成此操作。您执行这么多“查询”的时间单位是多少？每秒？每分钟？程序生命周期？听起来可能不太好，但如果这些查询延长到运行时数小时，则可能不是问题。在没有实际测量和分析的情况下，不要忽略这些明显的（可能是“糟糕的”）解决方案。“足够好了”通常已经足够好了。上一个查询的结果决定下一个查询
-如何？是否有任何规则定义它？如果没有它，将很难进行优化