From 7ec4581ad37b88bbb300ac00850603433a8cdfe9 Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@dside.dyndns.org>
Date: Thu, 20 Jun 2013 19:15:55 +0200
Subject: Multipage DMA tests for Xilinx

---
 .bzrignore            |   2 +
 apps/CMakeLists.txt   |   3 +
 apps/load.sh          |   3 +
 apps/xilinx2.c        | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++
 driver/kmem.c         |  19 ++++--
 ipecamera/ipecamera.c |   3 +-
 ipecamera/model.h     |   2 +-
 7 files changed, 202 insertions(+), 9 deletions(-)
 create mode 100755 apps/load.sh
 create mode 100644 apps/xilinx2.c

diff --git a/.bzrignore b/.bzrignore
index f584400..17e5a11 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -19,3 +19,5 @@ Makefile
 *.so.*
 install_manifest.txt
 ./xilinx
+apps/xilinx
+apps/xilinx2
diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt
index 2f882a3..80a506f 100644
--- a/apps/CMakeLists.txt
+++ b/apps/CMakeLists.txt
@@ -6,3 +6,6 @@ link_directories(${UFODECODE_LIBRARY_DIRS})
 
 add_executable(xilinx xilinx.c)
 target_link_libraries(xilinx pcilib rt)
+
+add_executable(xilinx2 xilinx2.c)
+target_link_libraries(xilinx2 pcilib rt)
diff --git a/apps/load.sh b/apps/load.sh
new file mode 100755
index 0000000..6456c63
--- /dev/null
+++ b/apps/load.sh
@@ -0,0 +1,3 @@
+#! /bin/bash
+
+echo "10ee 6028" >   /sys/bus/pci/drivers/pciDriver/new_id
diff --git a/apps/xilinx2.c b/apps/xilinx2.c
new file mode 100644
index 0000000..6dd1be3
--- /dev/null
+++ b/apps/xilinx2.c
@@ -0,0 +1,179 @@
+#define _BSD_SOURCE
+#define _POSIX_C_SOURCE 199309L
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <time.h>
+#include <sched.h>
+#include <sys/time.h>
+
+#include "pcilib.h"
+#include "irq.h"
+#include "kmem.h"
+
+#define DEVICE "/dev/fpga0"
+#define BAR PCILIB_BAR0
+#define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1)
+#define BUFFERS 1
+#define ITERATIONS 16384
+#define HUGE_PAGE 128	// number of pages per huge page
+#define PAGE_SIZE 4096	// other values are not supported in the kernel
+#define TIMEOUT 100000
+
+/* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help,
+otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite 
+much extra time */
+//#define USE_IRQ
+//#define CHECK_READY
+//#define REALTIME
+//#define ADD_DELAYS
+
+//#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); }
+//#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; }
+#define WR(addr, value) { *(uint32_t*)(bar + addr) = value; }
+#define RD(addr, value) { value = *(uint32_t*)(bar + addr); }
+
+static void fail(const char *msg, ...) {
+    va_list va;
+    
+    va_start(va, msg);
+    vprintf(msg, va);
+    va_end(va);
+    printf("\n");
+
+    exit(-1);
+}
+
+void hpsleep(size_t ns) {
+    struct timespec wait, tv;
+
+    clock_gettime(CLOCK_REALTIME, &wait);
+
+    wait.tv_nsec += ns;
+    if (wait.tv_nsec > 999999999) {
+	wait.tv_sec += 1;
+	wait.tv_nsec = 1000000000 - wait.tv_nsec;
+    }
+
+    do {
+	clock_gettime(CLOCK_REALTIME, &tv);
+    } while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec)));
+}
+
+
+int main() {
+    int err;
+    int i, j;
+    pcilib_t *pci;
+    pcilib_kmem_handle_t *kbuf;
+    uint32_t status;
+    struct timeval start, end;
+    size_t size, run_time;
+    void* volatile bar;
+    uintptr_t bus_addr[BUFFERS];
+
+    pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE;
+
+#ifdef ADD_DELAYS
+    long rpt = 0, rpt2 = 0;
+    size_t best_time;
+    best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024);
+#endif /* ADD_DELAYS */
+
+#ifdef REALTIME
+    pid_t pid;
+    struct sched_param sched = {0};
+
+    pid = getpid();
+    sched.sched_priority = sched_get_priority_min(SCHED_FIFO);
+    if (sched_setscheduler(pid, SCHED_FIFO, &sched))
+	printf("Warning: not able to get real-time priority\n");
+#endif /* REALTIME */
+
+    pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT);
+    if (!pci) fail("pcilib_open");
+
+    bar = pcilib_map_bar(pci, BAR);
+    if (!bar) {
+	pcilib_close(pci);
+	fail("map bar");
+    }
+
+	// Reset
+    WR(0x00, 1)
+    usleep(1000);
+    WR(0x00, 0)
+
+    pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0);
+    pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT);
+
+    pcilib_clean_kernel_memory(pci, USE, clean_flags);
+
+    kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, 0);
+
+    WR(0x04, 0)
+    WR(0x0C, 0x20)
+    WR(0x10, (HUGE_PAGE * (PAGE_SIZE / 0x80)))
+    WR(0x14, 0x13131313)
+
+    for (j = 0; j < BUFFERS; j++ ) {
+        bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j);
+    }
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < ITERATIONS; i++) {
+	for (j = 0; j < BUFFERS; j++ ) {
+//	    uintptr_t ba = pcilib_kmem_get_block_ba(pci, kbuf, j);
+//	    WR(0x08, ba)
+	    WR(0x08, bus_addr[j]);
+	    WR(0x04, 0x01)
+
+#ifdef USE_IRQ
+	    err = pcilib_wait_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT, TIMEOUT, NULL);
+	    if (err) printf("Timeout waiting for IRQ, err: %i\n", err);
+
+	    RD(0x04, status);
+	    if ((status&0xFFFF) != 0x101) printf("Invalid status %x\n", status);
+//	    WR(0x04, 0x00);
+#else /* USE_IRQ */
+# ifdef ADD_DELAYS
+//	    hpsleep(best_time);
+	    do {
+		rpt++;
+		RD(0x04, status);
+	    } while (status != 0x101);
+# else /* ADD_DELAYS */
+	    do {
+		RD(0x04, status);
+	    } while (status != 0x101);
+# endif /* ADD_DELAYS */
+#endif /* USE_IRQ */
+
+	    WR(0x00, 1)
+#ifdef CHECK_READY
+	    do {
+		rpt2++;
+		RD(0x04, status);
+	    } while (status != 0);
+#endif /* CHECK_READY */
+	    WR(0x00, 0)
+	}
+    }
+    gettimeofday(&end, NULL);
+
+    pcilib_free_kernel_memory(pci, kbuf,  0);
+    pcilib_disable_irq(pci, 0);
+    pcilib_unmap_bar(pci, BAR, bar);
+    pcilib_close(pci);
+
+    run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
+    size = (long long int)ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE;
+
+    printf("%.3lf GB/s: transfered %zu bytes in %zu us using %u buffers\n", 1000000. * size / run_time / 1024 / 1024 / 1024, size, run_time, BUFFERS);
+
+# ifdef ADD_DELAYS
+    printf("Repeats: %lf, %lf\n",1. * rpt / (ITERATIONS * BUFFERS), 1. * rpt2 / (ITERATIONS * BUFFERS));
+#endif /* USE_IRQ */	    
+}
diff --git a/driver/kmem.c b/driver/kmem.c
index 37a7368..ee64a78 100644
--- a/driver/kmem.c
+++ b/driver/kmem.c
@@ -130,23 +130,28 @@ int pcidriver_kmem_alloc(pcidriver_privdata_t *privdata, kmem_handle_t *kmem_han
 	    retptr = pci_alloc_consistent( privdata->pdev, kmem_handle->size, &(kmem_entry->dma_handle) );
 	    break;
 	 case PCILIB_KMEM_TYPE_PAGE:
-	    retptr = (void*)__get_free_pages(GFP_KERNEL, get_order(PAGE_SIZE));
+	    if (kmem_handle->size == 0)
+		kmem_handle->size = PAGE_SIZE;
+	    else if (kmem_handle->size%PAGE_SIZE)
+		goto kmem_alloc_mem_fail;
+		
+	    retptr = (void*)__get_free_pages(GFP_KERNEL|__GFP_DMA, get_order(kmem_handle->size));
 	    kmem_entry->dma_handle = 0;
-	    kmem_handle->size = PAGE_SIZE;
+	    kmem_handle->size = kmem_handle->size;
 	    
 	    if (retptr) {
 	        if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_S2C_PAGE) {
 		    kmem_entry->direction = PCI_DMA_TODEVICE;
-    		    kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, PAGE_SIZE, PCI_DMA_TODEVICE);
+    		    kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, kmem_handle->size, PCI_DMA_TODEVICE);
 		    if (pci_dma_mapping_error(privdata->pdev, kmem_entry->dma_handle)) {
-			free_page((unsigned long)retptr);
+			free_pages((unsigned long)retptr, get_order(kmem_handle->size));
 			goto kmem_alloc_mem_fail;
 		    }
 		} else if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_C2S_PAGE) {
 		    kmem_entry->direction = PCI_DMA_FROMDEVICE;
-    		    kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+    		    kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, kmem_handle->size, PCI_DMA_FROMDEVICE);
 		    if (pci_dma_mapping_error(privdata->pdev, kmem_entry->dma_handle)) {
-			free_page((unsigned long)retptr);
+			free_pages((unsigned long)retptr, get_order(kmem_handle->size));
 			goto kmem_alloc_mem_fail;
 		    
 		    }
@@ -435,7 +440,7 @@ int pcidriver_kmem_free_entry(pcidriver_privdata_t *privdata, pcidriver_kmem_ent
 		    pci_unmap_single(privdata->pdev, kmem_entry->dma_handle, kmem_entry->size, PCI_DMA_FROMDEVICE);
 		}
 	    }
-	    free_page((unsigned long)kmem_entry->cpua);
+	    free_pages((unsigned long)kmem_entry->cpua, get_order(kmem_entry->size));
 	    break;
 	}
 
diff --git a/ipecamera/ipecamera.c b/ipecamera/ipecamera.c
index 27bea77..b4bbeb8 100644
--- a/ipecamera/ipecamera.c
+++ b/ipecamera/ipecamera.c
@@ -123,7 +123,8 @@ pcilib_context_t *ipecamera_init(pcilib_t *pcilib) {
 	    ctx->firmware = value;
 	    break;
 	 default:
-    	    pcilib_error("Unsupported version of firmware (%lu)", value);
+//    	    pcilib_error("Unsupported version of firmware (%lu)", value);
+	    ;
 	}
 
 #ifdef IPECAMERA_BUG_POSTPONED_READ
diff --git a/ipecamera/model.h b/ipecamera/model.h
index 4c527df..37f9096 100644
--- a/ipecamera/model.h
+++ b/ipecamera/model.h
@@ -8,7 +8,7 @@
 
 //#define IPECAMERA_DEBUG
 
-#define IPECAMERA_DMA_R3
+//#define IPECAMERA_DMA_R3
 #define IPECAMERA_DMA_ADDRESS 1
 #define IPECAMERA_DMA_PACKET_LENGTH 4096
 
-- 
cgit v1.2.3


From f000eb0d43193ef8225f947226d9cd3deb00115b Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@dside.dyndns.org>
Date: Sun, 14 Jul 2013 05:49:19 +0200
Subject: Support pre-allocated memory with memmap in KMem

---
 apps/xilinx2.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
 driver/kmem.c  | 30 +++++++++++++++++++++++++++---
 kmem.c         | 10 ++++++++--
 pcilib_types.h |  5 ++++-
 4 files changed, 82 insertions(+), 9 deletions(-)

diff --git a/apps/xilinx2.c b/apps/xilinx2.c
index 6dd1be3..90a2b6a 100644
--- a/apps/xilinx2.c
+++ b/apps/xilinx2.c
@@ -2,6 +2,7 @@
 #define _POSIX_C_SOURCE 199309L
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #include <stdarg.h>
 #include <time.h>
@@ -15,9 +16,10 @@
 #define DEVICE "/dev/fpga0"
 #define BAR PCILIB_BAR0
 #define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1)
+#define STATIC_REGION 0x80000000 //  to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters
 #define BUFFERS 1
-#define ITERATIONS 16384
-#define HUGE_PAGE 128	// number of pages per huge page
+#define ITERATIONS 100
+#define HUGE_PAGE 4096	// number of pages per huge page
 #define PAGE_SIZE 4096	// other values are not supported in the kernel
 #define TIMEOUT 100000
 
@@ -28,6 +30,7 @@ much extra time */
 //#define CHECK_READY
 //#define REALTIME
 //#define ADD_DELAYS
+#define CHECK_RESULT
 
 //#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); }
 //#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; }
@@ -64,7 +67,7 @@ void hpsleep(size_t ns) {
 
 int main() {
     int err;
-    int i, j;
+    long i, j;
     pcilib_t *pci;
     pcilib_kmem_handle_t *kbuf;
     uint32_t status;
@@ -110,7 +113,30 @@ int main() {
 
     pcilib_clean_kernel_memory(pci, USE, clean_flags);
 
+#ifdef STATIC_REGION
+    kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, 0);
+#else /* STATIC_REGION */
     kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, 0);
+#endif /* STATIC_REGION */
+
+    if (!kbuf) {
+	printf("KMem allocation failed\n");
+	exit(0);
+    }
+
+
+#ifdef CHECK_RESULT    
+    volatile uint32_t *ptr0 = pcilib_kmem_get_block_ua(pci, kbuf, 0);
+
+    memset((void*)ptr0, 0, (HUGE_PAGE * PAGE_SIZE));
+    
+    for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) {
+	if (ptr0[i] != 0) break;
+    }
+    if (i < (HUGE_PAGE * PAGE_SIZE / 4)) {
+	printf("Initialization error in position %lu, value = %x\n", i * 4, ptr0[i]);
+    }
+#endif /* CHECK_RESULT */
 
     WR(0x04, 0)
     WR(0x0C, 0x20)
@@ -163,6 +189,18 @@ int main() {
     }
     gettimeofday(&end, NULL);
 
+
+#ifdef CHECK_RESULT    
+    pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0);
+
+    for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) {
+	if (ptr0[i] != 0x13131313) break;
+    }
+    if (i < (HUGE_PAGE * PAGE_SIZE / 4)) {
+	printf("Error in position %lu, value = %x\n", i * 4, ptr0[i]);
+    }
+#endif /* CHECK_RESULT */
+
     pcilib_free_kernel_memory(pci, kbuf,  0);
     pcilib_disable_irq(pci, 0);
     pcilib_unmap_bar(pci, BAR, bar);
@@ -176,4 +214,6 @@ int main() {
 # ifdef ADD_DELAYS
     printf("Repeats: %lf, %lf\n",1. * rpt / (ITERATIONS * BUFFERS), 1. * rpt2 / (ITERATIONS * BUFFERS));
 #endif /* USE_IRQ */	    
+
+
 }
diff --git a/driver/kmem.c b/driver/kmem.c
index ee64a78..f36ff8c 100644
--- a/driver/kmem.c
+++ b/driver/kmem.c
@@ -129,15 +129,23 @@ int pcidriver_kmem_alloc(pcidriver_privdata_t *privdata, kmem_handle_t *kmem_han
 	 case PCILIB_KMEM_TYPE_CONSISTENT:
 	    retptr = pci_alloc_consistent( privdata->pdev, kmem_handle->size, &(kmem_entry->dma_handle) );
 	    break;
+	 case PCILIB_KMEM_TYPE_REGION:
+	    retptr = ioremap(kmem_handle->pa,  kmem_handle->size);
+	    kmem_entry->dma_handle = kmem_handle->pa;
+	    if (kmem_entry->type == PCILIB_KMEM_TYPE_REGION_S2C) {
+		kmem_entry->direction = PCI_DMA_TODEVICE;
+	    } else if (kmem_entry->type == PCILIB_KMEM_TYPE_REGION_C2S) {
+		kmem_entry->direction = PCI_DMA_FROMDEVICE;
+	    }
+	    break;
 	 case PCILIB_KMEM_TYPE_PAGE:
 	    if (kmem_handle->size == 0)
 		kmem_handle->size = PAGE_SIZE;
 	    else if (kmem_handle->size%PAGE_SIZE)
 		goto kmem_alloc_mem_fail;
-		
+	
 	    retptr = (void*)__get_free_pages(GFP_KERNEL|__GFP_DMA, get_order(kmem_handle->size));
 	    kmem_entry->dma_handle = 0;
-	    kmem_handle->size = kmem_handle->size;
 	    
 	    if (retptr) {
 	        if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_S2C_PAGE) {
@@ -432,6 +440,9 @@ int pcidriver_kmem_free_entry(pcidriver_privdata_t *privdata, pcidriver_kmem_ent
 	 case PCILIB_KMEM_TYPE_CONSISTENT:
 	    pci_free_consistent( privdata->pdev, kmem_entry->size, (void *)(kmem_entry->cpua), kmem_entry->dma_handle );
 	    break;
+	 case PCILIB_KMEM_TYPE_REGION:
+	    iounmap((void *)(kmem_entry->cpua));
+	    break;
 	 case PCILIB_KMEM_TYPE_PAGE:
 	    if (kmem_entry->dma_handle) {
 		if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_S2C_PAGE) {
@@ -609,12 +620,25 @@ int pcidriver_mmap_kmem(pcidriver_privdata_t *privdata, struct vm_area_struct *v
 			virt_to_phys((void*)kmem_entry->cpua),
 			page_to_pfn(virt_to_page((void*)kmem_entry->cpua)));
 
-	ret = remap_pfn_range_cpua_compat(
+	 if ((kmem_entry->type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) {
+	    mod_info("Mapping address %08lx / Size %08lx\n",
+			kmem_entry->dma_handle,
+			(vma_size < kmem_entry->size)?vma_size:kmem_entry->size)
+
+		ret = remap_pfn_range_compat(
+					vma,
+					vma->vm_start,
+					kmem_entry->dma_handle,
+					(vma_size < kmem_entry->size)?vma_size:kmem_entry->size,
+					vma->vm_page_prot);
+	 } else {
+		ret = remap_pfn_range_cpua_compat(
 					vma,
 					vma->vm_start,
 					kmem_entry->cpua,
 					(vma_size < kmem_entry->size)?vma_size:kmem_entry->size,
 					vma->vm_page_prot );
+	}
 
 	if (ret) {
 		mod_info("kmem remap failed: %d (%lx)\n", ret,kmem_entry->cpua);
diff --git a/kmem.c b/kmem.c
index d693b60..64f593a 100644
--- a/kmem.c
+++ b/kmem.c
@@ -91,13 +91,19 @@ pcilib_kmem_handle_t *pcilib_alloc_kernel_memory(pcilib_t *ctx, pcilib_kmem_type
     kh.align = alignment;
     kh.use = use;
 
-    if ((type&PCILIB_KMEM_TYPE_MASK) != PCILIB_KMEM_TYPE_PAGE) {
+    if ((type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) {
+	kh.align = 0;
+    } else if ((type&PCILIB_KMEM_TYPE_MASK) != PCILIB_KMEM_TYPE_PAGE) {
 	kh.size += alignment;
     }
-    
+
     for ( i = 0; i < nmemb; i++) {
 	kh.item = i;
 	kh.flags = flags;
+
+	if ((type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) {
+	    kh.pa = alignment + i * size;
+	}
 	
         ret = ioctl(ctx->handle, PCIDRIVER_IOC_KMEM_ALLOC, &kh);
 	if (ret) {
diff --git a/pcilib_types.h b/pcilib_types.h
index f4f8f20..52c0879 100644
--- a/pcilib_types.h
+++ b/pcilib_types.h
@@ -15,7 +15,10 @@ typedef enum {
     PCILIB_KMEM_TYPE_CONSISTENT = 0x00000,
     PCILIB_KMEM_TYPE_PAGE = 0x10000,
     PCILIB_KMEM_TYPE_DMA_S2C_PAGE = 0x10001,
-    PCILIB_KMEM_TYPE_DMA_C2S_PAGE = 0x10002
+    PCILIB_KMEM_TYPE_DMA_C2S_PAGE = 0x10002,
+    PCILIB_KMEM_TYPE_REGION = 0x20000,
+    PCILIB_KMEM_TYPE_REGION_S2C = 0x20001,
+    PCILIB_KMEM_TYPE_REGION_C2S = 0x20002
 } pcilib_kmem_type_t;
 
 typedef enum {
-- 
cgit v1.2.3


From 57fe574e058bc34c95a71995060ac45f71300ab6 Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@dside.dyndns.org>
Date: Fri, 26 Jul 2013 17:02:23 +0200
Subject: Allow setting of TLP size

---
 apps/xilinx2.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/apps/xilinx2.c b/apps/xilinx2.c
index 90a2b6a..30e0f94 100644
--- a/apps/xilinx2.c
+++ b/apps/xilinx2.c
@@ -19,6 +19,7 @@
 #define STATIC_REGION 0x80000000 //  to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters
 #define BUFFERS 1
 #define ITERATIONS 100
+#define TLP_SIZE 64
 #define HUGE_PAGE 4096	// number of pages per huge page
 #define PAGE_SIZE 4096	// other values are not supported in the kernel
 #define TIMEOUT 100000
@@ -26,7 +27,7 @@
 /* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help,
 otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite 
 much extra time */
-//#define USE_IRQ
+#define USE_IRQ
 //#define CHECK_READY
 //#define REALTIME
 //#define ADD_DELAYS
@@ -139,8 +140,8 @@ int main() {
 #endif /* CHECK_RESULT */
 
     WR(0x04, 0)
-    WR(0x0C, 0x20)
-    WR(0x10, (HUGE_PAGE * (PAGE_SIZE / 0x80)))
+    WR(0x0C, TLP_SIZE)
+    WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE))))
     WR(0x14, 0x13131313)
 
     for (j = 0; j < BUFFERS; j++ ) {
@@ -194,6 +195,7 @@ int main() {
     pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0);
 
     for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) {
+//	printf("%lx ", ptr0[i]);
 	if (ptr0[i] != 0x13131313) break;
     }
     if (i < (HUGE_PAGE * PAGE_SIZE / 4)) {
-- 
cgit v1.2.3


From f5b4c23cc79affe851016c7ef7970b3e3489fad3 Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@dside.dyndns.org>
Date: Fri, 26 Jul 2013 18:30:01 +0200
Subject: Add missing semicolon in the driver

---
 driver/kmem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/driver/kmem.c b/driver/kmem.c
index f36ff8c..b8ce0d0 100644
--- a/driver/kmem.c
+++ b/driver/kmem.c
@@ -621,9 +621,9 @@ int pcidriver_mmap_kmem(pcidriver_privdata_t *privdata, struct vm_area_struct *v
 			page_to_pfn(virt_to_page((void*)kmem_entry->cpua)));
 
 	 if ((kmem_entry->type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) {
-	    mod_info("Mapping address %08lx / Size %08lx\n",
-			kmem_entry->dma_handle,
-			(vma_size < kmem_entry->size)?vma_size:kmem_entry->size)
+		mod_info("Mapping address %08lx / Size %08lx\n",
+			(unsigned long)kmem_entry->dma_handle,
+			(vma_size < kmem_entry->size)?vma_size:kmem_entry->size);
 
 		ret = remap_pfn_range_compat(
 					vma,
-- 
cgit v1.2.3


From 7d1a222eaa8cb0965446ad0e745271c070521e78 Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@dside.dyndns.org>
Date: Fri, 26 Jul 2013 18:30:47 +0200
Subject: Support offseted BARs in the xilinx2 test app

---
 apps/xilinx2.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/apps/xilinx2.c b/apps/xilinx2.c
index 30e0f94..757c388 100644
--- a/apps/xilinx2.c
+++ b/apps/xilinx2.c
@@ -35,8 +35,8 @@ much extra time */
 
 //#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); }
 //#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; }
-#define WR(addr, value) { *(uint32_t*)(bar + addr) = value; }
-#define RD(addr, value) { value = *(uint32_t*)(bar + addr); }
+#define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; }
+#define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); }
 
 static void fail(const char *msg, ...) {
     va_list va;
@@ -77,6 +77,9 @@ int main() {
     void* volatile bar;
     uintptr_t bus_addr[BUFFERS];
 
+    pcilib_bar_t bar_tmp = BAR; 
+    uintptr_t offset = 0;
+
     pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE;
 
 #ifdef ADD_DELAYS
@@ -104,6 +107,8 @@ int main() {
 	fail("map bar");
     }
 
+    pcilib_detect_address(pci, &bar_tmp, &offset, 1);
+
 	// Reset
     WR(0x00, 1)
     usleep(1000);
@@ -190,7 +195,6 @@ int main() {
     }
     gettimeofday(&end, NULL);
 
-
 #ifdef CHECK_RESULT    
     pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0);
 
-- 
cgit v1.2.3


From c57db04f528e671040256d322bb8f21a8d8e9ac1 Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@dside.dyndns.org>
Date: Fri, 26 Jul 2013 19:10:37 +0200
Subject: Read out performance counters for Xilinx DMA with big buffers

---
 apps/counters.sh | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100755 apps/counters.sh

diff --git a/apps/counters.sh b/apps/counters.sh
new file mode 100755
index 0000000..e3ba0f5
--- /dev/null
+++ b/apps/counters.sh
@@ -0,0 +1,117 @@
+#! /bin/bash
+
+BAR=0
+USE=1
+ITERATIONS=1
+TLP_SIZE=32
+BUFFER_SIZE=8
+
+function pci {
+    PCILIB_PATH=`pwd`/..
+    LD_LIBRARY_PATH="$PCILIB_PATH" $PCILIB_PATH/pci $*
+}
+
+
+function reset {
+    pci -b $BAR -w 0 1
+    usleep 1000
+    pci -b $BAR -w 0 0
+    pci -b $BAR -w 4 0
+}
+
+function read_cfg {
+#    echo $1 1>&2
+    pci -a config -r 0x$1 | awk '{ print $2; }'
+}
+
+function parse_config {
+    info=0x`pci -b $BAR -r 0 | awk '{ print $2; }'`
+    model=`printf "%X" $((info>>24))`
+    if [ $model -eq 14 ]; then
+	model="Xilinx Virtex-6"
+    else
+	model="Xilinx $model"
+    fi
+    version=$(((info >> 8) & 0xFF))
+    data_width=$((16 * (2 ** ((info >> 16) & 0xF))))
+    
+    echo "$model, build $version, $data_width bits"
+
+
+    next=`read_cfg 34 | cut -c 7-8`
+
+    while [ $next -ne 0 ]; do
+	cap=`read_cfg $next`
+	capid=`echo $cap | cut -c 7-8`
+	if [ $capid -eq 10 ]; then
+	    addr=`printf "%X" $((0x$next + 12))`
+	    pcie_link1=`read_cfg $addr`
+	    addr=`printf "%X" $((0x$next + 16))`
+	    pcie_link2=`read_cfg $addr`
+
+	    link_speed=$((((0x$pcie_link2 & 0xF0000) >> 16)))
+	    link_width=$((((0x$pcie_link2 & 0x3F00000) >> 20)))
+
+	    dev_link_speed=$((((0x$pcie_link1 & 0xF))))
+	    dev_link_width=$((((0x$pcie_link1 & 0x3F0) >> 4)))
+	fi
+	next=`echo $cap | cut -c 5-6`
+    done
+
+    echo "Link: PCIe gen$link_speed x$link_width"
+    if [ $link_speed -ne $dev_link_speed -o $link_width -ne $dev_link_width ]; then
+	echo " * But device capable of gen$dev_link_speed x$dev_link_width"
+    fi
+    
+    info=0x`read_cfg 40`
+    max_tlp=$((2 ** (5 + ((info & 0xE0) >> 5))))
+    echo "TLP: 32 dwords (transfering 32 TLP per request)"
+    if [ $max_tlp -ne $TLP_SIZE ]; then
+	echo " * But device is able to transfer TLP up to $max_tlp bytes"
+    fi
+    
+    # 2500 MT/s, but PCIe gen1 and gen2 uses 10 bit encoding
+    speed=$((link_width * link_speed * 2500 / 10))
+}
+
+reset
+parse_config
+
+pci --enable-irq
+pci --acknowledge-irq
+
+# TLP size
+pci -b $BAR -w 0x0C 0x`echo "obase=16; $TLP_SIZE" | bc`
+# TLP count
+pci -b $BAR -w 0x10 0x`echo "obase=16; $BUFFER_SIZE * 1024 * 1024 / $TLP_SIZE / 4" | bc`
+# Data
+pci -b $BAR -w 0x14 0x13131313
+
+bus="80000000"
+dmaperf=0
+for i in `seq 1 $ITERATIONS`; do
+  for addr in $bus; do 
+    pci -b $BAR -w 0x08 0x$addr
+
+#Trigger
+    pci -b $BAR -w 0x04 0x01
+    pci --wait-irq
+
+    status=`pci -b $BAR -r 0x04 | awk '{print $2; }' | cut -c 5-8`
+    if [ $status != "0101" ]; then
+	echo "Read failed, invalid status: $status"
+    fi
+
+    dmaperf=$((dmaperf + 0x`pci -b $BAR -r 0x28 | awk '{print $2}'`))
+    reset
+  done
+done
+
+pci --free-kernel-memory $USE
+pci --disable-irq
+
+echo
+# Don't ask me about this formula
+echo "Performance reported by FPGA: $(($BUFFER_SIZE * 1024 * 1024 * ITERATIONS * $speed / $dmaperf / 8)) MB/s"
+
+#pci -b $BAR  -r 0 -s 32
-- 
cgit v1.2.3