Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/156.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 使用指数级系统ram的CUDA程序_C++_Cuda_Sfml - Fatal编程技术网

C++ 使用指数级系统ram的CUDA程序

C++ 使用指数级系统ram的CUDA程序,c++,cuda,sfml,C++,Cuda,Sfml,我的粒子模拟占用了太多内存。我关心的不是数量本身,而是事实,当我有理由相信它不应该增长时,它会呈指数增长。我正在使用CUDA,这是我最近添加的组件,因此我怀疑是导致问题的原因。我已经确定这不仅仅是内核内部的问题,因为即使内核没有运行,ram的使用也会增加。我怀疑这与我分配内存的方式有关,但我不明白我哪里出错了。我为这样一个微不足道的问题道歉,我是CUDA的新手(如果这不是显而易见的话)。这是有问题的意大利面,谢谢你抽出时间 int main() { std::srand(time(0)

我的粒子模拟占用了太多内存。我关心的不是数量本身,而是事实,当我有理由相信它不应该增长时,它会呈指数增长。我正在使用CUDA,这是我最近添加的组件,因此我怀疑是导致问题的原因。我已经确定这不仅仅是内核内部的问题,因为即使内核没有运行,ram的使用也会增加。我怀疑这与我分配内存的方式有关,但我不明白我哪里出错了。我为这样一个微不足道的问题道歉,我是CUDA的新手(如果这不是显而易见的话)。这是有问题的意大利面,谢谢你抽出时间


int main() {
    std::srand(time(0));
    window.setFramerateLimit(limit);
    window.setVerticalSyncEnabled(true);
    sf::Clock clock;
    
    while (window.isOpen()) {
        sf::Event evnt;
        while (window.pollEvent(evnt)) {
            switch (evnt.type) {
            case sf::Event::Closed:
                window.close();
                break;
            case sf::Event::TextEntered:
                if (evnt.text.unicode < 128) {
                    //printf("%c", evnt.text.unicode);
                }
            }
            
        }


        if (sf::Keyboard::isKeyPressed(sf::Keyboard::Key::Space)) {
            spawnParticle();
            
        }
        if (sf::Keyboard::isKeyPressed(sf::Keyboard::Key::R)) {
            for (auto particle : particleList) {
                delete particle;
            }
            particleList.clear();
        }

        window.clear(sf::Color::Color::Black);
        background.setFillColor(sf::Color::Color(25, 25, 25, 255));
        background.setPosition(-8, -8);
        window.draw(background);

        for (int i = 0; i < particleList.size(); i++) {
            particleList[i]->write(i);

        }


        int Num = particleList.size();

        // Vectors for holding the host-side (CPU-side) data
        float* h_big_algo, * h_big_relationships, * h_location,  * h_destinations, * h_energies, * h_frequencies;
        int* h_N;
        cudaMallocHost(&h_big_algo, Num * Num * 8 * sizeof(float));
        cudaMallocHost(&h_big_relationships, Num * Num * 3 * sizeof(float));
        cudaMallocHost(&h_location, Num * 2 * sizeof(float));
        cudaMallocHost(&h_N, sizeof(int));
        cudaMallocHost(&h_destinations, Num * 2 * sizeof(float));
        cudaMallocHost(&h_energies, Num * sizeof(float));
        cudaMallocHost(&h_frequencies, Num * sizeof(float));

        h_big_algo = big_algo.data();
        h_big_relationships = big_relationships.data();
        h_location = location_list.data();
        h_N = &Num;
        h_frequencies = frequencies.data();

        // Allocate device memory
        float* d_big_algo, * d_big_relationships, * d_location, *d_destinations,  *d_energies, *d_frequencies;
        int* d_N, * d_influence_N;
        cudaMalloc(&d_big_algo, Num * Num *8*sizeof(float));
        cudaMalloc(&d_big_relationships, Num * Num *3*sizeof(float));
        cudaMalloc(&d_location, Num *2*sizeof(float));
        cudaMalloc(&d_N, sizeof(int));
        cudaMalloc(&d_destinations, Num * 2 * sizeof(float));
        cudaMalloc(&d_influence_N, Num *sizeof(int));
        cudaMalloc(&d_energies, Num * sizeof(float));
        cudaMalloc(&d_frequencies, Num * sizeof(float));

        // Copy data to the device
        cudaMemcpy(d_big_algo, h_big_algo, Num * 8 * Num * sizeof(float), cudaMemcpyHostToDevice);
        cudaMemcpy(d_big_relationships, h_big_relationships, Num * Num * 3 * sizeof(float), cudaMemcpyHostToDevice);
        cudaMemcpy(d_location, h_location, Num * 2 * sizeof(float), cudaMemcpyHostToDevice);
        cudaMemcpy(d_N, h_N, sizeof(int), cudaMemcpyHostToDevice);
        cudaMemcpy(d_energies, energies.data(), Num * sizeof(float), cudaMemcpyHostToDevice);
        cudaMemcpy(d_frequencies, h_frequencies, Num * sizeof(float), cudaMemcpyHostToDevice);
        cudaMemcpy(d_destinations, h_location, Num * 2 * sizeof(float), cudaMemcpyHostToDevice);

        cudaMemset(d_influence_N, 0, Num * sizeof(int));

        int NUM_THREADS = 1024;

        int NUM_BLOCKS = (pow(Num,2) + NUM_THREADS - 1) / NUM_THREADS;

        move <<<NUM_BLOCKS, NUM_THREADS>>> (d_big_algo, d_big_relationships, d_location, d_N, 
                                                d_destinations, d_influence_N, d_energies, d_frequencies);
         
        // Copy back to the host
        cudaMemcpy(h_destinations, d_destinations, Num * 2 * sizeof(float), cudaMemcpyDeviceToHost);
        cudaMemcpy(h_energies, d_energies, Num * sizeof(float), cudaMemcpyDeviceToHost);

        // Free memory on device
        cudaFree(d_big_algo);
        cudaFree(d_big_relationships);
        cudaFree(d_location);
        cudaFree(d_N);
        cudaFree(d_destinations);
        cudaFree(d_influence_N);
        cudaFree(d_energies);
        cudaFree(d_frequencies);

        big_algo.clear();
        big_relationships.clear();

        location_list.clear();
        energies.clear();
        frequencies.clear();

        //read from h_locations and h_energies

        cudaFreeHost(h_big_algo);
        cudaFreeHost(h_big_relationships);
        cudaFreeHost(h_N);
        cudaFreeHost(h_frequencies);
        cudaFreeHost(h_location);
        apply_all(h_destinations, h_energies);
        cudaFreeHost(h_energies);
        cudaFreeHost(h_destinations);

        for (int i = 0; i < particleList.size(); i++) {
            particleList[i]->draw_self();

            /*if (particleList[i]->energy < 0) {
                cout << "particle died" << endl;
                particleList[i]->seppuku();
                //doomed_particles.push_back({ i, particleList[i] });
            }
            if (particleList[i]->energy > 10) {
                particleList[i]->reproduce();
                particleList[i]->energy -= reproduction_cost;
            }*/
            
        }
        

        window.display();
        }
        
    return 0;
}


int main(){
标准:srand(时间(0));
设置帧率限制(限制);
window.setVerticalSyncEnabled(真);
sf:时钟;
while(window.isOpen()){
sf::事件evnt;
while(window.pollEvent(evnt)){
开关(evnt.type){
案例sf::事件::已结束:
window.close();
打破
案例sf::事件::文本输入:
if(evnt.text.unicode<128){
//printf(“%c”,evnt.text.unicode);
}
}
}
如果(sf::Keyboard::isKeyPressed(sf::Keyboard::Key::Space)){
粒子();
}
如果(sf::Keyboard::isKeyPressed(sf::Keyboard::Key::R)){
用于(自动粒子:particleList){
删除粒子;
}
particleList.clear();
}
窗口。清除(sf::颜色::颜色::黑色);
setFillColor(sf::Color::Color(25,25,25255));
背景设置位置(-8,-8);
窗口。绘制(背景);
对于(int i=0;i写(i);
}
int Num=particleList.size();
//用于保存主机端(CPU端)数据的向量
浮动*h_大算法,*h_大关系,*h_位置,*h_目的地,*h_能量,*h_频率;
int*hn;
cudamalochost(&h_big_algo,Num*Num*8*sizeof(float));
cudaMallocHost(&h_big_relationships,Num*Num*3*sizeof(float));
cudaMallocHost(&h_位置,Num*2*sizeof(float));
库达马洛霍斯特(h_N,sizeof(int));
cudaMallocHost(和h_目的地,数量*2*sizeof(浮动));
Cudamalochost(&h_能量,Num*sizeof(float));
cudaMallocHost(&h_频率,Num*sizeof(float));
h_big_algo=big_algo.data();
h_big_relationships=big_relationships.data();
h_location=location_list.data();
h_N=&Num;
h_频率=频率。数据();
//分配设备内存
浮动*d_大算法、*d_大关系、*d_位置、*d_目的地、*d_能量、*d_频率;
int*d_N,*d_影响;
cudaMalloc(&d_big_algo,Num*Num*8*sizeof(float));
cudamaloc(和d_big_关系,Num*Num*3*sizeof(float));
cudaMalloc(&d_位置,数量*2*sizeof(浮动));
库达马洛克(d_N,sizeof(int)),;
Cudamaloc(和d_目的地,数量*2*sizeof(浮动));
cudamaloc(&d_influence_N,Num*sizeof(int));
Cudamaloc(&d_能量,Num*sizeof(float));
cudaMalloc(&d_频率,Num*sizeof(float));
//将数据复制到设备
cudaMemcpy(d_big_algo,h_big_algo,Num*8*Num*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(d_big_关系、h_big_关系、Num*Num*3*sizeof(float)、cudaMemcpyHostToDevice);
cudaMemcpy(d_位置、h_位置、Num*2*sizeof(float)、cudaMemcpyHostToDevice);
cudaMemcpy(d_N,h_N,sizeof(int),cudamemcpyhostodevice);
cudaMemcpy(d_energies,energies.data(),Num*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(d_频率、h_频率、Num*sizeof(float)、cudaMemcpyHostToDevice);
cudaMemcpy(d_目的地、h_位置、Num*2*sizeof(float)、cudaMemcpyHostToDevice);
cudaMemset(d_influence_N,0,Num*sizeof(int));
int NUM_线程=1024;
int NUM_块=(pow(NUM,2)+NUM_线程-1)/NUM_线程;
移动(d_big_algo,d_big_关系,d_位置,d_N,
d_目的地、d_影响、d_能量、d_频率);
//复制回主机
cudaMemcpy(h_目的地、d_目的地、Num*2*sizeof(float)、cudamemcpydevicetoost);
cudaMemcpy(h_能量,d_能量,Num*sizeof(float),cudamemcpydevicetoost);
//设备上的可用内存
cudaFree(d_big_algo);
cudaFree(d_big_关系);
cudaFree(d_位置);
库达弗里(d_N);
cudaFree(迪乌目的地);
cudaFree(d_影响N);
cudaFree(d_能量);
cudaFree(d_频率);
大算法清除();
大关系。清除();
位置_list.clear();
能量;
频率。清除();
//从h_位置和h_能量读取
cudaFreeHost(h_big_algo);
cudaFreeHost(h_big_关系);
cudaFreeHost(h_N);
cudaFreeHost(h_频率);
cudaFreeHost(h_位置);
全部应用(目的地、能量);
cudaFreeHost(h_能量);
cudaFreeHost(h_目的地);
对于(int i=0;i绘制自身();
/*if(粒子列表[i]->能量<0){
库特能量>10){
分词列表[i]->repearch();
粒子列表[i]>能量-=复制成本;
}*/
}
window.display();
}
返回0;
}
以及内核的良好措施:

__global__ void move(float* d_big_algo, float* d_big_relationships, float* d_location, 
                int* N,  float* d_destinations, int * d_influence_N, float *d_energies, float*d_frequencies) {
    
    int id = (blockIdx.x * blockDim.x) + threadIdx.x;
    if (id < (*N)*(*N)) {
        //printf("%i ", *N);

        int subject = (id-((id+*N)%*N))/ *N;
        int object = (id + *N) % *N;
        
        float distance = sqrt(powf((d_location[object*2] - d_location[subject*2]), 2.0f) 
                    + powf((d_location[object * 2 +1] - d_location[subject * 2+ 1]), 2.0f));

        float relative_maximum = d_big_relationships[(object * 3) + (subject * *N * 3) +2];
        
        if ((distance < relative_maximum)&&(distance > 0)) {
            float relative_minimum = d_big_relationships[(object * 3) + (subject * *N * 3)];
            float relative_medium = d_big_relationships[(object * 3) + (subject * *N * 3) + 1];

            /*if (distance < 12) {
                if (abs(d_frequencies[subject] - d_frequencies[object]) > 0.1) {
                    if (d_energies[subject] > d_energies[object]) {
                        d_energies[subject]+=0.1;
                        d_energies[object]-=0.1;
                    }
                    if (d_energies[subject] < d_energies[object]) {
                        d_energies[subject]-=0.1;
                        d_energies[object]+=0.1;
                    }
                }
                //else {
                //  d_energies[subject]+= (d_energies[subject] - d_energies[object])/100;
                //}
            }*/

            if ((distance < 8) && (distance > 0)) {
                //printf("%i moving\n", id);
                float force = 2.0f * (-distance / powf(distance, 2));
                d_influence_N[subject] += 1;
                //printf("%i ready to sync\n", id);
                //__syncthreads();
                //printf("%i INFLUENCE ", d_influence_N[subject]);
                d_destinations[subject*2] += force * (d_location[object * 2] - d_location[subject * 2]);
                d_destinations[subject * 2+1] += force * (d_location[object * 2 + 1] - d_location[subject * 2 + 1]);
                //delete& force;

            }
            else if ((distance < relative_medium) && (distance > relative_minimum)) {
                //printf("%i moving\n", id);
                float force = d_big_algo[(object * 8) + (subject * *N * 8) + 4] * abs((d_big_algo[(object * 8) + (subject * *N * 8) + 5] * distance)
                                                - d_big_algo[(object * 8) + (subject * *N * 8) + 6]) + d_big_algo[(object * 8) + (subject * *N * 8) + 7];
                d_influence_N[subject] += 1;
                //printf("%i ready to sync\n", id);
                //__syncthreads();
                //printf("%i INFLUENCE ", d_influence_N[subject]);
                float destination_mod = (2.0f * d_influence_N[subject]) / powf(d_influence_N[subject], 2.0f);
                d_destinations[subject * 2] += force * (d_location[object * 2] - d_location[subject * 2]) * destination_mod;
                d_destinations[subject * 2 + 1] += force * (d_location[object * 2 + 1] - d_location[subject * 2 + 1]) * destination_mod;
                //delete& force;
                //delete& destination_mod;
            }

            else if (distance > relative_medium) {
                //printf("%i moving\n", id);
                float force = d_big_algo[(object * 8) + (subject * *N * 8)] * abs((d_big_algo[(object * 8) + (subject * *N * 8) +1] * distance)
                                                - d_big_algo[(object * 8) + (subject * *N * 8) + 2]) + d_big_algo[(object * 8) + (subject * *N * 8) + 3];
                d_influence_N[subject] += 1;
                //printf("%i ready to sync\n", id);
                //__syncthreads();
                //printf("%i INFLUENCE ", d_influence_N[subject]);
                float destination_mod = (2.0f * d_influence_N[subject]) / powf(d_influence_N[subject], 2.0f);
                d_destinations[subject * 2] += force * (d_location[object * 2] - d_location[subject * 2]) * destination_mod;
                d_destinations[subject * 2 + 1] += force * (d_location[object * 2 + 1] - d_location[subject * 2 + 1]) * destination_mod;
                //delete& force;
                //delete& destination_mod;
                
            }
            //delete& relative_minimum;
            //delete& relative_medium;
        }
        //delete& subject;
        //delete& object;
        //delete& distance;
        //delete& relative_maximum;
    }

    //__syncthreads();
    if (id < *N) {
        if (d_location[id * 2+1] < 2 || d_location[id * 2 + 1] > HEIGHT - 2) {
            d_destinations[id * 2+1] = HEIGHT / 2;
        }
        if (d_location[id * 2]<2 || d_location[id * 2] > WIDTH - 2) {
            d_destinations[id * 2] = WIDTH / 2;
        }

        if (d_location[id * 2] >= WIDTH - 10) {
            d_destinations[id * 2] = abs(d_location[id * 2]) - ((d_location[id * 2] - (WIDTH - 10)) / 2) / (WIDTH / abs(d_location[id * 2]));
        }
        if (d_location[id * 2] < 10) {
            d_destinations[id * 2] = abs(d_location[id * 2]) + ((d_location[id * 2] + 10) / 2) / (abs(d_location[id * 2]) + 0.1);
        }
        if (d_location[id * 2 + 1] >= HEIGHT - 10) {
            d_destinations[id * 2 + 1] = abs(d_location[id * 2 + 1]) - ((d_location[id * 2 + 1] - (HEIGHT - 10)) / 2) / (HEIGHT / abs(d_location[id * 2 + 1]));
        }
        if (d_location[id * 2 + 1] < 10) {
            d_destinations[id * 2 + 1] = abs(d_location[id * 2 + 1]) + ((d_location[id * 2 + 1] + 10) / 2) / (abs(d_location[id * 2 + 1]) + 0.1);
        }

        if (2.0f * (sqrt(powf(d_location[id * 2] - WIDTH / 2.0f, 2) + powf(d_location[id * 2 + 1] - WIDTH / 2.0f, 2))) > WIDTH) {
            if (d_location[id * 2 + 1] >= HEIGHT / 2.0f) {
                d_destinations[id * 2 + 1] = sqrt(abs(powf(HEIGHT / 2.0f, 2) - powf(d_location[id * 2] - HEIGHT / 2.0f, 2))) + HEIGHT / 2.0f;
            }
            if (d_location[id * 2 + 1] <= HEIGHT / 2.0f) {
                d_destinations[id * 2 + 1] = -sqrt(abs(powf(HEIGHT / 2.0f, 2) - powf(d_location[id * 2] - HEIGHT / 2.0f, 2))) + HEIGHT / 2.0f;
            }
            if (d_location[id * 2] <= WIDTH / 2.0f) {
                d_destinations[id * 2] = -sqrt(abs(powf(WIDTH / 2.0f, 2) - powf(d_location[id * 2 + 1] - WIDTH / 2.0f, 2))) + WIDTH / 2.0f;
            }
            if (d_location[id * 2] >= WIDTH / 2.0f) {
                d_destinations[id * 2] = sqrt(abs(powf(WIDTH / 2.0f, 2) - powf(d_location[id * 2 + 1] - WIDTH / 2.0f, 2))) + WIDTH / 2.0f;

                //send out
            }
        }
    }
    delete &id;
    
    
}
\uuuu全局\uuuu无效移动(浮点*d\u大\u算法,浮点*d\u大\u关系,浮点*d\u低)