From d252fcf6889855bb276cf6f9bf516e61910c064f Mon Sep 17 00:00:00 2001
From: dkazanc <dkazanc@hotmail.com>
Date: Fri, 30 Nov 2018 16:18:59 +0000
Subject: matlab version tested, cython bit started

---
 Wrappers/Matlab/demos/demoMatlab_denoise.m         |  15 ++
 Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m |   6 +
 .../regularisers_CPU/NeighbSearch2D_test.c         | 207 --------------
 .../mex_compile/regularisers_CPU/NeighbSelect.c    | 297 ---------------------
 .../mex_compile/regularisers_CPU/Nonlocal_TV.c     | 111 ++------
 .../regularisers_CPU/Nonlocal_TV_core.c            | 169 ++++++++++++
 .../regularisers_CPU/Nonlocal_TV_core.h            |  61 +++++
 .../mex_compile/regularisers_CPU/PatchSelect.c     |  92 +++++++
 .../regularisers_CPU/PatchSelect_core.c            | 254 ++++++++++++++++++
 .../regularisers_CPU/PatchSelect_core.h            |  62 +++++
 10 files changed, 678 insertions(+), 596 deletions(-)
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSearch2D_test.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSelect.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.h

(limited to 'Wrappers/Matlab')

diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m
index d11bc63..2cbdb56 100644
--- a/Wrappers/Matlab/demos/demoMatlab_denoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m
@@ -135,6 +135,21 @@ figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');
 % tic; u_diff4_g = Diffusion_4thO_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
 % figure; imshow(u_diff4_g, [0 1]); title('Diffusion 4thO denoised image (GPU)');
 %%
+fprintf('Weights pre-calculation for Non-local TV (takes time on CPU) \n');
+SearchingWindow = 9;
+PatchWindow = 3;
+NeighboursNumber = 15; % the number of neibours to include
+h = 0.25; % edge related parameter for NLM
+[H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h);
+%%
+fprintf('Denoise using Non-local Total Variation (CPU) \n');
+iter_nltv = 3; % number of nltv iterations
+lambda_nltv = 0.085; % regularisation parameter for nltv
+tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc; 
+rmse_nltv = (RMSE(u_nltv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv);
+figure; imshow(u_nltv, [0 1]); title('Non-local Total Variation denoised image (CPU)');
+%%
 
 %>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
 
diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m b/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
index 767d811..49b5dfd 100644
--- a/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
+++ b/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
@@ -52,6 +52,12 @@ fprintf('%s \n', 'Compiling ROF-LLT...');
 mex LLT_ROF.c LLT_ROF_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile('LLT_ROF.mex*',Pathmove);
 
+fprintf('%s \n', 'Compiling NonLocal-TV...');
+mex PatchSelect.c PatchSelect_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex Nonlocal_TV.c Nonlocal_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('Nonlocal_TV.mex*',Pathmove);
+movefile('PatchSelect.mex*',Pathmove);
+
 fprintf('%s \n', 'Compiling additional tools...');
 mex TV_energy.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile('TV_energy.mex*',Pathmove);
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSearch2D_test.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSearch2D_test.c
deleted file mode 100644
index d94b521..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSearch2D_test.c
+++ /dev/null
@@ -1,207 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-#define EPS 1.0000e-12
-
-/* C implementation of the spatial-dependent histogram
- * currently not optimal memory-wise
- *
- *
- * Input Parameters:
- * 1. 2D grayscale image (N x N)
- * 2. Number of histogram bins (M)
- * 4. Similarity window (half-size)
- *
- * Output:
- * 1. Filtered Image (N x N)
- *
- *
- * compile from Matlab with:
- * mex NLTV_SB_fast.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- *
- * Im = double(imread('barb.bmp'))/255; % loading image
- * u0 = Im + .05*randn(size(Im)); u0(u0<0) = 0; % adding noise
- * [Filtered, theta, I1, J1] = NLTV_SB_fast(single(u0), 7, 7, 20, 0.1);
- * D. Kazantsev
- */
-
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-
-/*2D functions */
-float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimY, int dimX, int NumNeighb, int SearchWindow,  int SimilarWin, float h2);
-float NLM_ST_H1(float *Aorig, float *Output,  unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimX, int dimY, int NumNeighb, float beta, int IterNumb);
-
-
-
-float denoise2D(float *Aorig, float *Output, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimY, int dimX, int NumNeighb);
-/**************************************************/
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    int number_of_dims, i, j, k, dimX, dimY, dimZ, SearchWindow, SimilarWin, NumNeighb,kk;
-    unsigned short *H_i=NULL, *H_j=NULL;
-    const int  *dim_array;
-    float *A, *Output, *Weights, h, h2, lambda;
-    int dim_array2[3];
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /* a 2D image or a set of 2D images (3D stack) */
-    SearchWindow = (int) mxGetScalar(prhs[1]);    /* Large Searching window to find and cluster intensities */
-    SimilarWin = (int) mxGetScalar(prhs[2]);    /* Similarity window */
-    NumNeighb = (int) mxGetScalar(prhs[3]); /* the total number of neighbours to take */
-    h = (float) mxGetScalar(prhs[4]); /* NLM parameter */
-                  
-    h2 = h*h;    
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb;  /* 2D case */ 
-    
-    /*****2D INPUT *****/
-    if (number_of_dims == 2) {
-        dimZ = 0;
-        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
-        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
-        Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL));
-        Output = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));        
-       
-    /* for each pixel store indeces of the most similar neighbours (patches) */
-#pragma omp parallel for shared (A, Output, Weights, H_i, H_j) private(i,j)
-       for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {
-                Indeces2D(A, H_i, H_j, Weights, i, j, dimX, dimY, NumNeighb, SearchWindow, SimilarWin, h2); 
-                // denoise2D(A, Output, H_i, H_j, Weights, i, j, dimX, dimY, NumNeighb);
-                NLM_ST_H1(A, Output, H_i, H_j, Weights, i, j, dimX, dimY, NumNeighb, 0.01f, 1);               
-            }}          
-    }
-    /*****3D INPUT *****/
-    /****************************************************/
-    if (number_of_dims == 3) { 
-	}
-}
-
-
-float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimY, int dimX, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
-{
-    int i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, k, counter, x, y;
-    float *Weight_Vec, normsum, temp;
-    unsigned short *ind_i, *ind_j, temp_i, temp_j;
-    
-   
-    Weight_Vec = (float*) calloc((2*SearchWindow + 1)*(2*SearchWindow + 1), sizeof(float));
-    ind_i = (unsigned short*) calloc((2*SearchWindow + 1)*(2*SearchWindow + 1), sizeof(unsigned short));
-    ind_j = (unsigned short*) calloc((2*SearchWindow + 1)*(2*SearchWindow + 1), sizeof(unsigned short));    
-    
-         counter = 0;
-        for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-            for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-                i1 = i+i_m;
-                j1 = j+j_m;
-                if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
-                        normsum = 0;
-                        for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                            for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                                i2 = i1 + i_c;
-                                j2 = j1 + j_c;
-                                i3 = i + i_c;
-                                j3 = j + j_c;
-                                if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
-                                    if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
-                                        normsum += pow(Aorig[i3*dimY+j3] - Aorig[i2*dimY+j2], 2);
-                                    }}
-                            }}
-                         /* writing temporarily into vectors */ 
-                         if (normsum > EPS) Weight_Vec[counter] = exp(-normsum/h2);                      
-                         ind_i[counter] = i1;
-                         ind_j[counter] = j1;
-                        counter ++;                    
-                }
-            }}
-        /* do sorting to choose the most prominent weights [LOW -> HIGH]*/
-        /* and re-arrange indeces accordingly */
-         for(x=0; x < counter; x++)	{
-             for(y=0; y < counter - 1; y++)		{
-                 if(Weight_Vec[y] < Weight_Vec[y+1]) {
-                     temp = Weight_Vec[y+1];
-                     temp_i = ind_i[y+1];
-                     temp_j = ind_j[y+1];
-                     Weight_Vec[y+1] = Weight_Vec[y];
-                     Weight_Vec[y] = temp;
-                     ind_i[y+1] = ind_i[y];
-                     ind_i[y] = temp_i;
-                     ind_j[y+1] = ind_j[y];
-                     ind_j[y] = temp_j;
-                 }}} /*sorting loop end*/
-         
- //       printf("%f %i %i \n", Weight_Vec[10], ind_i[10], ind_j[10]);
-	 /*now select NumNeighb more prominent weights */
-         for(x=0; x < NumNeighb; x++) {
-             H_i[(dimX*dimY*x) + i*dimY+j] = ind_i[x];
-             H_j[(dimX*dimY*x) + i*dimY+j] = ind_j[x];
-             Weights[(dimX*dimY*x) + i*dimY+j] = Weight_Vec[x];            
-         }
-             
-    free(ind_i);
-    free(ind_j);
-    free(Weight_Vec);
-    return 1;
-}
-
-/* a test if NLM denoising works */
-float denoise2D(float *Aorig, float *Output, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimY, int dimX, int NumNeighb)
-{
-	int x, i1, j1; 
-	float value = 0.0f, normweight  = 0.0f;
-	
-	for(x=0; x < NumNeighb; x++) {
-		i1 = (H_i[(dimX*dimY*x) + i*dimY+j]);
-		j1 = (H_j[(dimX*dimY*x) + i*dimY+j]);	
-		value += Aorig[i1*dimY+j1]*Weights[(dimX*dimY*x) + i*dimY+j];
-		normweight += Weights[(dimX*dimY*x) + i*dimY+j];
-	}
-	if (normweight != 0) Output[i*dimY+j] = value/normweight;
-	else Output[i*dimY+j] = 0.0f;
-
-	return *Output;
-}
-
-/***********<<<<Main Function for ST NLM - H1 penalty>>>>**********/
-float NLM_ST_H1(float *Aorig, float *Output,  unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimX, int dimY, int NumNeighb, float beta, int IterNumb)
-{
-	int x, i1, j1; 
-	float value = 0.0f, normweight  = 0.0f;
-	
-	for(x=0; x < NumNeighb; x++) {
-		i1 = (H_i[(dimX*dimY*x) + i*dimY+j]);
-		j1 = (H_j[(dimX*dimY*x) + i*dimY+j]);	
-		value += Aorig[i1*dimY+j1]*Weights[(dimX*dimY*x) + i*dimY+j];
-		normweight += Weights[(dimX*dimY*x) + i*dimY+j];
-	}
-	
-//	if (normweight != 0) Output[i*dimY+j] = value/normweight;
-//	else Output[i*dimY+j] = 0.0f;
-
-    Output[i*dimY+j] = (beta*Aorig[i*dimY+j] + value)/(beta + normweight);
-    return *Output;
-}
-
-
-
-/* General Functions */
-/*****************************************************************/
-/* Copy Image */
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ)
-{
-    int j;
-#pragma omp parallel for shared(A, B) private(j)
-    for(j=0; j<dimX*dimY*dimZ; j++)  B[j] = A[j];
-    return *B;
-}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSelect.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSelect.c
deleted file mode 100644
index d7c56ec..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/NeighbSelect.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-#define EPS 1.0000e-12
-
-/* C-OMP implementation of non-local weight pre-calculation for non-local priors
- * Weights and associated indices are stored into pre-allocated arrays and passed
- * to the regulariser
- *
- *
- * Input Parameters:
- * 1. 2D/3D grayscale image/volume
- * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
- * 3. Similarity window (half-size of the patch window, e.g. 2)
- * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
- * 5. noise-related parameter to calculate non-local weights
- *
- * Output [2D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. Weights_ij - associated weights
- *
- * Output [3D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. AR_k - indeces of j neighbours
- * 4. Weights_ijk - associated weights
- *
- * compile from Matlab with:
- * mex NeighbSearch2D_test.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- */
-
-float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimY, long dimX, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
-float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
-
-/**************************************************/
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    int number_of_dims,  SearchWindow, SimilarWin, NumNeighb, counterG;
-    long i, j, k, dimX, dimY, dimZ;
-    unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL;
-    const int  *dim_array;
-    float *A, *Weights, *Eucl_Vec, h, h2;
-    int dim_array2[3]; /* for 2D data */
-    int dim_array3[4]; /* for 3D data */
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /* a 2D or 3D image/volume */
-    SearchWindow = (int) mxGetScalar(prhs[1]);    /* Large Searching window */
-    SimilarWin = (int) mxGetScalar(prhs[2]);    /* Similarity window (patch-search)*/
-    NumNeighb = (int) mxGetScalar(prhs[3]); /* the total number of neighbours to take */
-    h = (float) mxGetScalar(prhs[4]); /* NLM parameter */
-    h2 = h*h;
-    
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb;  /* 2D case */
-    dim_array3[0] = dimX; dim_array3[1] = dimY; dim_array3[2] = dimZ; dim_array3[3] = NumNeighb;  /* 3D case */
-    
-    /****************2D INPUT ***************/
-    if (number_of_dims == 2) {
-        dimZ = 0;
-        
-        /* generate a 2D Gaussian kernel for NLM procedure */
-        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
-        counterG = 0;
-        for(i=-SimilarWin; i<=SimilarWin; i++) {
-            for(j=-SimilarWin; j<=SimilarWin; j++) {
-                Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2*SimilarWin*SimilarWin));
-                counterG++;
-            }} /*main neighb loop */
-        
-        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
-        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
-        Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL));
-        
-        /* for each pixel store indeces of the most similar neighbours (patches) */
-#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
-        for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {
-                Indeces2D(A, H_i, H_j, Weights, i, j, dimX, dimY, Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
-            }}
-    }
-    /****************3D INPUT ***************/
-    if (number_of_dims == 3) {
-        
-        /* generate a 3D Gaussian kernel for NLM procedure */
-        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
-        counterG = 0;
-        for(i=-SimilarWin; i<=SimilarWin; i++) {
-            for(j=-SimilarWin; j<=SimilarWin; j++) {
-                for(k=-SimilarWin; k<=SimilarWin; k++) {
-                    Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin));
-                    counterG++;
-                }}} /*main neighb loop */
-        
-        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
-        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
-        H_k = (unsigned short*)mxGetPr(plhs[2] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
-        Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL));
-        
-        /* for each voxel store indeces of the most similar neighbours (patches) */
-#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
-        for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {
-                for(k=0; k<dimZ; k++) {
-                    Indeces3D(A, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
-                }}}
-    }
-    free(Eucl_Vec);
-}
-
-float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimY, long dimX, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
-{
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
-    float *Weight_Vec, normsum, temp;
-    unsigned short *ind_i, *ind_j, temp_i, temp_j;
-    
-    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
-    
-    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
-    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
-                            if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
-                                normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
-                                counterG++;
-                            }}
-                        
-                    }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {
-                    Weight_Vec[counter] = expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;
-                    counter++;
-                }
-            }
-        }}
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter; x++)  {
-        for (y = 0; y < counter; y++)  {
-            if (Weight_Vec[y] < Weight_Vec[x]) {
-                temp = Weight_Vec[y+1];
-                temp_i = ind_i[y+1];
-                temp_j = ind_j[y+1];
-                Weight_Vec[y+1] = Weight_Vec[y];
-                Weight_Vec[y] = temp;
-                ind_i[y+1] = ind_i[y];
-                ind_i[y] = temp_i;
-                ind_j[y+1] = ind_j[y];
-                ind_j[y] = temp_j;
-            }}}
-    /*sorting loop finished*/
-    
-    /*now select the NumNeighb more prominent weights and store into arrays */
-    for(x=0; x < NumNeighb; x++) {
-        index = (dimX*dimY*x) + j*dimX+i;
-        H_i[index] = ind_i[x];
-        H_j[index] = ind_j[x];
-        Weights[index] = Weight_Vec[x];
-    }
-    
-    free(ind_i);
-    free(ind_j);
-    free(Weight_Vec);
-    return 1;
-}
-
-float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
-{
-    long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG;
-    float *Weight_Vec, normsum, temp;
-    unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k;
-    
-    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1);
-    
-    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
-    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    
-    counter = 0l;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            for(k_m=-SearchWindow; k_m<=SearchWindow; k_m++) {
-                k1 = k+k_m;
-                i1 = i+i_m;
-                j1 = j+j_m;
-                if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY)) && ((k1 >= 0) && (k1 < dimZ))) {
-                    normsum = 0.0f; counterG = 0l;
-                    for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                        for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                            for(k_c=-SimilarWin; k_c<=SimilarWin; k_c++) {
-                                i2 = i1 + i_c;
-                                j2 = j1 + j_c;
-                                k2 = k1 + k_c;
-                                i3 = i + i_c;
-                                j3 = j + j_c;
-                                k3 = k + k_c;
-                                if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY)) && ((k2 >= 0) && (k2 < dimZ))) {
-                                    if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY)) && ((k3 >= 0) && (k3 < dimZ))) {
-                                        normsum += Eucl_Vec[counterG]*pow(Aorig[(dimX*dimY*k3) + j3*dimX + (i3)] - Aorig[(dimX*dimY*k2) + j2*dimX + (i2)], 2);
-                                        counterG++;
-                                    }}
-                            }}}
-                    /* writing temporarily into vectors */
-                    if (normsum > EPS) {
-                        Weight_Vec[counter] = expf(-normsum/h2);
-                        ind_i[counter] = i1;
-                        ind_j[counter] = j1;
-                        ind_k[counter] = k1;
-                        counter ++;
-                    }
-                }
-            }}}
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter; x++)  {
-        for (y = 0; y < counter; y++)  {
-            if (Weight_Vec[y] < Weight_Vec[x]) {
-                temp = Weight_Vec[y+1];
-                temp_i = ind_i[y+1];
-                temp_j = ind_j[y+1];
-                temp_k = ind_k[y+1];
-                Weight_Vec[y+1] = Weight_Vec[y];
-                Weight_Vec[y] = temp;
-                ind_i[y+1] = ind_i[y];
-                ind_i[y] = temp_i;
-                ind_j[y+1] = ind_j[y];
-                ind_j[y] = temp_j;
-                ind_k[y+1] = ind_k[y];
-                ind_k[y] = temp_k;
-            }}}
-    /*sorting loop finished*/
-    
-    /*now select the NumNeighb more prominent weights and store into arrays */
-    for(x=0; x < NumNeighb; x++) {
-        index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-        
-        H_i[index] = ind_i[x];
-        H_j[index] = ind_j[x];
-        H_k[index] = ind_k[x];
-        
-        Weights[index] = Weight_Vec[x];
-    }
-    
-    free(ind_i);
-    free(ind_j);
-    free(ind_k);
-    free(Weight_Vec);
-    return 1;
-}
-
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
index ee529ea..dea343c 100644
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
@@ -1,10 +1,11 @@
 /*
  * This work is part of the Core Imaging Library developed by
  * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
  *
  * Copyright 2017 Daniil Kazantsev
  * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,18 +18,16 @@
  * limitations under the License.
  */
 
+#include "matrix.h"
 #include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
+#include "Nonlocal_TV_core.h"
 
 #define EPS 1.0000e-9
 
-/* C-OMP implementation of non-local regulariser
- * Weights and associated indices must be given as an input 
+/* Matlab wrapper for C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
  *
  *
  * Input Parameters:
@@ -43,24 +42,18 @@
  * Output:
  * 1. denoised image/volume 	
  * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
- 
  */
 
-float copyIm(float *A, float *U, long dimX, long dimY, long dimZ);
-float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimX, int dimY, int NumNeighb, float lambda);
-float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimX, int dimY, int NumNeighb, float lambda);
-/**************************************************/
-
 void mexFunction(
         int nlhs, mxArray *plhs[],
         int nrhs, const mxArray *prhs[])
 {
-    long number_of_dims,  i, j, k, dimX, dimY, dimZ, NumNeighb;
-    int IterNumb, iter;
+    long number_of_dims,  dimX, dimY, dimZ;
+    int IterNumb, NumNeighb = 0;
     unsigned short *H_i, *H_j, *H_k;
     const int  *dim_array;
     const int  *dim_array2;
-    float *A_orig, *Output, *Weights, lambda;
+    float *A_orig, *Output=NULL, *Weights, lambda;
     
     dim_array = mxGetDimensions(prhs[0]);
     dim_array2 = mxGetDimensions(prhs[1]);
@@ -80,83 +73,17 @@ void mexFunction(
          
     /*****2D INPUT *****/
     if (number_of_dims == 2) {
-        dimZ = 0;
-      
+        dimZ = 0;   
         NumNeighb = dim_array2[2];
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));        
-        copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);
-       
-    /* for each pixel store indeces of the most similar neighbours (patches) */
-     for(iter=0; iter<IterNumb; iter++) {    
-#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, iter) private(i,j)
-      for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {              
-            //NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, dimX, dimY, NumNeighb, lambda);
-            NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, dimX, dimY, NumNeighb, lambda);                              
-           }}          
-          }
-    }
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));  
+        }
     /*****3D INPUT *****/
     /****************************************************/
     if (number_of_dims == 3) {
-    Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));         
+        NumNeighb = dim_array2[3];
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));               
     }
+    
+    /* run the main function here */
+    Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb);        
 }
-
-/***********<<<<Main Function for ST NLM - H1 penalty>>>>**********/
-float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimX, int dimY, int NumNeighb, float lambda)
-{
-	long x, i1, j1, index; 
-	float value = 0.0f, normweight  = 0.0f;
-	
-	for(x=0; x < NumNeighb; x++) {
-	index =  (dimX*dimY*x) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-		value += A[j1*dimX+i1]*Weights[index];
-		normweight += Weights[index];
-	}	
-    A[j*dimX+i] = (lambda*A_orig[j*dimX+i] + value)/(lambda + normweight);
-    return *A;
-}
-
-/***********<<<<Main Function for ST NLM - TV penalty>>>>**********/
-float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, int i, int j, int dimX, int dimY, int NumNeighb, float lambda)
-{
-	long x, i1, j1, index; 
-	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
-	
-	for(x=0; x < NumNeighb; x++) {
-	index =  (dimX*dimY*x) + j*dimX+i;
-		index =  (dimX*dimY*x) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-	        NLgrad_magn += powf((A[j1*dimX+i1] - A[j*dimX+i]),2)*Weights[index];
-	}
-  
-    NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
-    NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
-    		
-    for(x=0; x < NumNeighb; x++) {
-	index =  (dimX*dimY*x) + j*dimX+i;
-	i1 = H_i[index];
-	j1 = H_j[index];
-        value += A[j1*dimX+i1]*NLCoeff*Weights[index];
-        normweight += Weights[index]*NLCoeff;
-    }   		
-    A[j*dimX+i] = (lambda*A_orig[j*dimX+i] + value)/(lambda + normweight);
-    return *A;
-}
-
-
-
-/* Copy Image (float) */
-float copyIm(float *A, float *U, long dimX, long dimY, long dimZ)
-{
-	long j;
-#pragma omp parallel for shared(A, U) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-	return *U;
-}
-
-
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.c
new file mode 100644
index 0000000..d327dd5
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.c
@@ -0,0 +1,169 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Nonlocal_TV_core.h"
+
+/* C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights 
+ * 6. regularisation parameter
+ * 7. iterations number 
+ 
+ * Output:
+ * 1. denoised image/volume 	
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
+ 
+ */
+/*****************************************************************************/
+
+float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambda, int IterNumb)
+{
+
+    long i, j, k;
+    int iter;
+         
+    /*****2D INPUT *****/
+    if (dimZ == 0) {
+	  copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);       
+    /* for each pixel store indeces of the most similar neighbours (patches) */
+     for(iter=0; iter<IterNumb; iter++) {    
+#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, iter) private(i,j)
+      for(i=0; i<(long)(dimX); i++) {
+            for(j=0; j<(long)(dimY); j++) {              
+            /* NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, dimX, dimY, NumNeighb, lambda); */ /* NLM - H1 penalty */
+            NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambda);   /* NLM - TV penalty */     
+           }}          
+          }
+    }  
+    else {
+     /*****3D INPUT *****/
+        copyIm(A_orig, Output, (long)(dimX), (long)(dimY), (long)(dimZ));       
+    /* for each pixel store indeces of the most similar neighbours (patches) */
+     for(iter=0; iter<IterNumb; iter++) {    
+#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, H_k, iter) private(i,j,k)
+      for(i=0; i<(long)(dimX); i++) {
+            for(j=0; j<(long)(dimY); j++) {              
+               for(k=0; k<(long)(dimZ); k++) {
+            /* NLM_H1_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, NumNeighb, lambda); */ /* NLM - H1 penalty */
+            NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambda);   /* NLM - TV penalty */     
+           }}}          
+          }          
+    }
+    return *Output;
+}
+
+/***********<<<<Main Function for NLM - H1 penalty>>>>**********/
+float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambda)
+{
+	long x, i1, j1, index; 
+	float value = 0.0f, normweight  = 0.0f;
+	
+	for(x=0; x < NumNeighb; x++) {
+	index =  (dimX*dimY*x) + j*dimX+i;
+		i1 = H_i[index];
+		j1 = H_j[index];
+		value += A[j1*dimX+i1]*Weights[index];
+		normweight += Weights[index];
+	}	
+    A[j*dimX+i] = (lambda*A_orig[j*dimX+i] + value)/(lambda + normweight);
+    return *A;
+}
+/*3D version*/
+float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambda)
+{
+	long x, i1, j1, k1, index; 
+	float value = 0.0f, normweight  = 0.0f;
+	
+	for(x=0; x < NumNeighb; x++) {
+	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+		i1 = H_i[index];
+		j1 = H_j[index];
+		k1 = H_k[index];
+		value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index];
+		normweight += Weights[index];
+	}	
+    A[(dimX*dimY*k) + j*dimX+i] = (lambda*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambda + normweight);
+    return *A;
+}
+
+
+/***********<<<<Main Function for NLM - TV penalty>>>>**********/
+float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambda)
+{
+	long x, i1, j1, index; 
+	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+	
+	for(x=0; x < NumNeighb; x++) {
+		index =  (dimX*dimY*x) + j*dimX+i;
+		i1 = H_i[index];
+		j1 = H_j[index];
+	        NLgrad_magn += powf((A[j1*dimX+i1] - A[j*dimX+i]),2)*Weights[index];
+	}
+  
+    NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
+    NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
+    		
+    for(x=0; x < NumNeighb; x++) {
+	index =  (dimX*dimY*x) + j*dimX+i;
+	i1 = H_i[index];
+	j1 = H_j[index];
+        value += A[j1*dimX+i1]*NLCoeff*Weights[index];
+        normweight += Weights[index]*NLCoeff;
+    }   		
+    A[j*dimX+i] = (lambda*A_orig[j*dimX+i] + value)/(lambda + normweight);
+    return *A;
+}
+/*3D version*/
+float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambda)
+{
+	long x, i1, j1, k1, index; 
+	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+	
+	for(x=0; x < NumNeighb; x++) {
+	index =  dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+		i1 = H_i[index];
+		j1 = H_j[index];
+		k1 = H_k[index];
+	        NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index];
+	}
+  
+    NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
+    NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
+    		
+    for(x=0; x < NumNeighb; x++) {
+	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+	i1 = H_i[index];
+	j1 = H_j[index];
+	k1 = H_k[index];
+        value += A[(dimX*dimY*k1) + j1*dimX+i1]*NLCoeff*Weights[index];
+        normweight += Weights[index]*NLCoeff;
+    }   		
+    A[(dimX*dimY*k) + j*dimX+i] = (lambda*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambda + normweight);
+    return *A;
+}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.h b/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.h
new file mode 100644
index 0000000..5b6963e
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV_core.h
@@ -0,0 +1,61 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+#define EPS 1.0000e-9
+
+/* C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights 
+ * 6. regularisation parameter
+ * 7. iterations number 
+ 
+ * Output:
+ * 1. denoised image/volume 	
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans.   Image Processing 17, no. 7 (2008): 1047-1060. 
+ */
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambda, int IterNumb);
+CCPI_EXPORT float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambda);
+CCPI_EXPORT float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambda);
+CCPI_EXPORT float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambda);
+CCPI_EXPORT float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambda);
+#ifdef __cplusplus
+}
+#endif
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
new file mode 100644
index 0000000..fdd9a97
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
@@ -0,0 +1,92 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "matrix.h"
+#include "mex.h"
+#include "PatchSelect_core.h"
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+/**************************************************/
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+{
+    int number_of_dims,  SearchWindow, SimilarWin, NumNeighb;
+    mwSize dimX, dimY, dimZ;
+    unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL;
+    const int  *dim_array;
+    float *A, *Weights = NULL, h;
+    int dim_array2[3]; /* for 2D data */
+    int dim_array3[4]; /* for 3D data */
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    A  = (float *) mxGetData(prhs[0]); /* a 2D or 3D image/volume */
+    SearchWindow = (int) mxGetScalar(prhs[1]);    /* Large Searching window */
+    SimilarWin = (int) mxGetScalar(prhs[2]);    /* Similarity window (patch-search)*/
+    NumNeighb = (int) mxGetScalar(prhs[3]); /* the total number of neighbours to take */
+    h = (float) mxGetScalar(prhs[4]); /* NLM parameter */
+
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb;  /* 2D case */
+    dim_array3[0] = dimX; dim_array3[1] = dimY; dim_array3[2] = dimZ; dim_array3[3] = NumNeighb;  /* 3D case */
+    
+    /****************2D INPUT ***************/
+    if (number_of_dims == 2) {
+        dimZ = 0;               
+        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
+        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
+        Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL));
+        }
+    /****************3D INPUT ***************/
+    if (number_of_dims == 3) {        
+        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+        H_k = (unsigned short*)mxGetPr(plhs[2] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+        Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL));        
+    }
+    
+    PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h); 
+    
+ }
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.c
new file mode 100644
index 0000000..efc5498
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.c
@@ -0,0 +1,254 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PatchSelect_core.h"
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+
+/**************************************************/
+
+float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long dimX, long dimY, long dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+{
+    int counterG;
+    long i, j, k;
+    float *Eucl_Vec, h2;
+    h2 = h*h;   
+   
+    /****************2D INPUT ***************/
+    if (dimZ == 0) {       
+        /* generate a 2D Gaussian kernel for NLM procedure */
+        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
+        counterG = 0;
+        for(i=-SimilarWin; i<=SimilarWin; i++) {
+            for(j=-SimilarWin; j<=SimilarWin; j++) {
+                Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2*SimilarWin*SimilarWin));
+                counterG++;
+            }} /*main neighb loop */
+              
+        /* for each pixel store indeces of the most similar neighbours (patches) */
+#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
+        for(i=0; i<dimX; i++) {
+            for(j=0; j<dimY; j++) {
+                Indeces2D(A, H_i, H_j, Weights, (i), (j), (dimX), (dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+            }}
+    }
+    else {
+    /****************3D INPUT ***************/       
+        /* generate a 3D Gaussian kernel for NLM procedure */
+        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
+        counterG = 0;
+        for(i=-SimilarWin; i<=SimilarWin; i++) {
+            for(j=-SimilarWin; j<=SimilarWin; j++) {
+                for(k=-SimilarWin; k<=SimilarWin; k++) {
+                    Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin));
+                    counterG++;
+                }}} /*main neighb loop */     
+        
+        /* for each voxel store indeces of the most similar neighbours (patches) */
+#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
+        for(i=0; i<dimX; i++) {
+            for(j=0; j<dimY; j++) {
+                for(k=0; k<dimZ; k++) {
+                    Indeces3D(A, H_i, H_j, H_k, Weights, (i), (j), (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+                }}}
+    }
+    free(Eucl_Vec);
+    return 1;
+}
+
+float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimY, long dimX, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
+    float *Weight_Vec, normsum, temp;
+    unsigned short *ind_i, *ind_j, temp_i, temp_j;
+    
+    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
+    
+    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
+                            if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
+                                normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
+                                counterG++;
+                            }}
+                        
+                    }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {
+                    Weight_Vec[counter] = expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;
+                    counter++;
+                }
+            }
+        }}
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter; x++)  {
+        for (y = 0; y < counter; y++)  {
+            if (Weight_Vec[y] < Weight_Vec[x]) {
+                temp = Weight_Vec[y+1];
+                temp_i = ind_i[y+1];
+                temp_j = ind_j[y+1];
+                Weight_Vec[y+1] = Weight_Vec[y];
+                Weight_Vec[y] = temp;
+                ind_i[y+1] = ind_i[y];
+                ind_i[y] = temp_i;
+                ind_j[y+1] = ind_j[y];
+                ind_j[y] = temp_j;
+            }}}
+    /*sorting loop finished*/
+    
+    /*now select the NumNeighb more prominent weights and store into arrays */
+    for(x=0; x < NumNeighb; x++) {
+        index = (dimX*dimY*x) + j*dimX+i;
+        H_i[index] = ind_i[x];
+        H_j[index] = ind_j[x];
+        Weights[index] = Weight_Vec[x];
+    }
+    
+    free(ind_i);
+    free(ind_j);
+    free(Weight_Vec);
+    return 1;
+}
+
+float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+    long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG;
+    float *Weight_Vec, normsum, temp;
+    unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k;
+    
+    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1);
+    
+    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    
+    counter = 0l;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            for(k_m=-SearchWindow; k_m<=SearchWindow; k_m++) {
+                k1 = k+k_m;
+                i1 = i+i_m;
+                j1 = j+j_m;
+                if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY)) && ((k1 >= 0) && (k1 < dimZ))) {
+                    normsum = 0.0f; counterG = 0l;
+                    for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                        for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                            for(k_c=-SimilarWin; k_c<=SimilarWin; k_c++) {
+                                i2 = i1 + i_c;
+                                j2 = j1 + j_c;
+                                k2 = k1 + k_c;
+                                i3 = i + i_c;
+                                j3 = j + j_c;
+                                k3 = k + k_c;
+                                if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY)) && ((k2 >= 0) && (k2 < dimZ))) {
+                                    if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY)) && ((k3 >= 0) && (k3 < dimZ))) {
+                                        normsum += Eucl_Vec[counterG]*pow(Aorig[(dimX*dimY*k3) + j3*dimX + (i3)] - Aorig[(dimX*dimY*k2) + j2*dimX + (i2)], 2);
+                                        counterG++;
+                                    }}
+                            }}}
+                    /* writing temporarily into vectors */
+                    if (normsum > EPS) {
+                        Weight_Vec[counter] = expf(-normsum/h2);
+                        ind_i[counter] = i1;
+                        ind_j[counter] = j1;
+                        ind_k[counter] = k1;
+                        counter ++;
+                    }
+                }
+            }}}
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter; x++)  {
+        for (y = 0; y < counter; y++)  {
+            if (Weight_Vec[y] < Weight_Vec[x]) {
+                temp = Weight_Vec[y+1];
+                temp_i = ind_i[y+1];
+                temp_j = ind_j[y+1];
+                temp_k = ind_k[y+1];
+                Weight_Vec[y+1] = Weight_Vec[y];
+                Weight_Vec[y] = temp;
+                ind_i[y+1] = ind_i[y];
+                ind_i[y] = temp_i;
+                ind_j[y+1] = ind_j[y];
+                ind_j[y] = temp_j;
+                ind_k[y+1] = ind_k[y];
+                ind_k[y] = temp_k;
+            }}}
+    /*sorting loop finished*/
+    
+    /*now select the NumNeighb more prominent weights and store into arrays */
+    for(x=0; x < NumNeighb; x++) {
+        index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+        
+        H_i[index] = ind_i[x];
+        H_j[index] = ind_j[x];
+        H_k[index] = ind_k[x];
+        
+        Weights[index] = Weight_Vec[x];
+    }
+    
+    free(ind_i);
+    free(ind_j);
+    free(ind_k);
+    free(Weight_Vec);
+    return 1;
+}
+
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.h b/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.h
new file mode 100644
index 0000000..43fce87
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect_core.h
@@ -0,0 +1,62 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+#define EPS 1.0000e-12
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+/*****************************************************************************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long dimX, long dimY, long dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+CCPI_EXPORT float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimY, long dimX, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+CCPI_EXPORT float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+#ifdef __cplusplus
+}
+#endif
-- 
cgit v1.2.3