#include #include #include "timestamp.h" #include "cuda_runtime.h" #include "device_launch_parameters.h" typedef long arrayelt; arrayelt *A, *B, *C; long arraydim = 12345678, arraysize; void getparms(int argc, char* argv[]) { if (argc > 1) arraydim = atol(argv[1]); arraysize = arraydim * sizeof(arrayelt); printf("arraysize = %.0fMB\n", (float)arraysize / 1024 / 1024); } void getarray(arrayelt*& a) { cudaMallocHost(&a, arraysize); } void freearray(arrayelt* a) { cudaFreeHost(a); } void initarrays(void) { for (long ai = 0; ai < arraydim; ai++) { A[ai] = ai; B[ai] = ai + 1; } } __device__ inline arrayelt mathalgo(const arrayelt a, const arrayelt b) { return (a * a + b); } __global__ void domath(long arraydim, arrayelt* A, arrayelt* B, arrayelt* C) { for (long ai = 0; ai < arraydim; ai++) C[ai] = mathalgo(A[ai], B[ai]); } int main(int argc, char* argv[]) { printf("hello! - using GPU\n"); getparms(argc, argv); timestamp(NULL); getarray(A); getarray(B); getarray(C); timestamp("... arrays allocated"); initarrays(); timestamp("... arrays initialized"); domath <<< 1, 1 >>> (arraydim, A, B, C); cudaDeviceSynchronize(); timestamp("... computation done"); freearray(A); freearray(B); freearray(C); timestamp("... arrays deallocated"); printf("goodbye\n"); return (0); }