#include #include #include "config.h" #include "ipedma.h" __global__ void null(uint32_t *data) { } __global__ void ipedma(volatile void *bar, uintptr_t bus_addr, volatile uint64_t *desc, uint32_t *data) { int i; clock_t sum = 0, sumk = 0, t1, t2, t3; for (i = 0; i < GPU_ITERS; i++) { long wait = 0; desc[1] = 0; // Reset counter #ifdef USE_HW_CONTER WR32 (REG_DMA, 0); WR32 (REG_PERF_COUNTER, 0); WR64 (REG_DESCRIPTOR_ADDRESS, bus_addr); t1 = clock64(); WR32 (REG_DMA, 1); #else t1 = clock64(); WR64 (REG_DESCRIPTOR_ADDRESS, bus_addr); #endif do { if (++wait > 0x10000) break; } while (desc[1] == 0); t2 = clock64(); #ifdef USE_HW_CONTER WR32 (REG_PERF_COUNTER, 1); #endif null<<<1,1>>>(data); cudaDeviceSynchronize(); t3 = clock64(); sum += t2 - t1; sumk += t3 - t1; } data[0] = sum / GPU_ITERS; data[1] = sumk / GPU_ITERS; } /* __global__ void do_leet (int *rin) { *rin = 0x1337; } */