Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
183 views
in Technique[技术] by (71.8m points)

cuda - thrust operations empty host array

I want to do some thrust operations but I am not sure how exactly.

Right now , I am receiving am array full of zeros ( the h_a array)

I have :

#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>

#include <cuda.h>
#include <cuda_runtime_api.h>

#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>


template <typename T>
struct square
{
    __host__ __device__
    T operator()( const T& x ) const
    {
        return x * x;
    }

};


int
main(
             int argc,
    const char * argv[] )
{
    const size_t NbOfPoints  = 256;

    int BlocksPerGridX    = 16;
    int BlocksPerGridY    = 16;

    int ThreadsPerBlockX  = 16;
    int ThreadsPerBlockY  = 16;

    // generate random data on the host
    thrust::host_vector<float> h_Kx ( NbOfPoints );
    thrust::generate( h_Kx.begin(), h_Kx.end(), rand );

    thrust::host_vector<float> h_Ky ( NbOfPoints );
    thrust::generate( h_Ky.begin(), h_Ky.end(), rand );

    // transfer to device
    thrust::device_vector<float> dev_Kx = h_Kx;
    thrust::device_vector<float> dev_Ky = h_Ky;

    // create arrays for holding the number of threads per block in each dimension
    int * X , * Y;
    cudaMalloc((void **) &X, ThreadsPerBlockX * BlocksPerGridX * sizeof(*X) );
    cudaMalloc((void **) &Y, ThreadsPerBlockY * BlocksPerGridY * sizeof(*Y) );

    // wrap raw pointer with a device_ptr
    thrust::device_ptr<int> dev_X ( X );
    thrust::device_ptr<int> dev_Y ( Y );

    // use device_ptr in Thrust algorithms
    thrust::fill( dev_X, dev_X + ( ThreadsPerBlockX * BlocksPerGridX ) , (int) 0 );
    thrust::fill( dev_Y, dev_Y + ( ThreadsPerBlockY * BlocksPerGridY ) , (int) 0 );

    // setup arguments
    square<float> square_op;

    // create various vectors
    thrust::device_vector<int> distX ( NbOfPoints );
    thrust::device_vector<int> distY ( NbOfPoints );
    thrust::device_vector<unsigned int> Tmp ( NbOfPoints );
    thrust::host_vector<unsigned int> h_a ( NbOfPoints );
    thrust::device_vector<unsigned int> distXSquared ( NbOfPoints );
    thrust::device_vector<unsigned int> distYSquared ( NbOfPoints );


    // compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
    thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
    thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

    //square distances
    thrust::transform( distX.begin(), distX.end(), distXSquared.begin(), square_op );
    thrust::transform( distY.begin(), distY.end(), distYSquared.begin(), square_op );

    // compute Tmp =  distX + distY
    thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );
    thrust::copy( Tmp.begin(), Tmp.end(), h_a.begin() );


    for ( int i = 0; i < 5; i ++ )
        printf("
 temp = %u",h_a[ i ] );


return 0;
}

UPDATE:

Apart the edits from Robert Crovella , you must edit to integers:

square<int> square_op;
thrust::transform( dev_Kx.begin(), dev_Kx.end(), dev_X , distX.begin() , thrust::minus<int>() );
thrust::transform( dev_Ky.begin(), dev_Ky.end(), dev_Y , distY.begin() , thrust::minus<int>() );
See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

You've got several instances of doing zero-length transforms:

thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

and:

thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );

Since the first two parameters to each of the above transforms is the same, the work being done is zero. Presumably you want the corresponding .end() iterators in the second position rather than .begin()

When I make those changes, I got non-zero values printed out. They are quite large, but you appear to be squaring large values, so I'm not sure what your intent is.


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...