From 5d4464a78d1807565a75c9430cfe5e6857fe9232 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Mon, 31 Jul 2017 23:57:08 +0100
Subject: New regularizers for FISTA

---
 main_func/FGP_TV.c                            | 400 ------------------------
 main_func/FISTA_REC.m                         | 100 +++---
 main_func/LLT_model.c                         | 431 --------------------------
 main_func/SplitBregman_TV.c                   | 399 ------------------------
 main_func/compile_mex.m                       |   8 +-
 main_func/regularizers_CPU/FGP_TV.c           | 400 ++++++++++++++++++++++++
 main_func/regularizers_CPU/LLT_model.c        | 431 ++++++++++++++++++++++++++
 main_func/regularizers_CPU/PatchBased_Regul.c | 295 ++++++++++++++++++
 main_func/regularizers_CPU/SplitBregman_TV.c  | 399 ++++++++++++++++++++++++
 main_func/regularizers_CPU/TGV_PD.c           | 353 +++++++++++++++++++++
 10 files changed, 1943 insertions(+), 1273 deletions(-)
 delete mode 100644 main_func/FGP_TV.c
 delete mode 100644 main_func/LLT_model.c
 delete mode 100644 main_func/SplitBregman_TV.c
 create mode 100644 main_func/regularizers_CPU/FGP_TV.c
 create mode 100644 main_func/regularizers_CPU/LLT_model.c
 create mode 100644 main_func/regularizers_CPU/PatchBased_Regul.c
 create mode 100644 main_func/regularizers_CPU/SplitBregman_TV.c
 create mode 100644 main_func/regularizers_CPU/TGV_PD.c

(limited to 'main_func')

diff --git a/main_func/FGP_TV.c b/main_func/FGP_TV.c
deleted file mode 100644
index 1a1fd13..0000000
--- a/main_func/FGP_TV.c
+++ /dev/null
@@ -1,400 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon: tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * [1] Filtered/regularized image
- * [2] last function value 
- *
- * Example of image denoising:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
- *
- * to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- *
- * D. Kazantsev, 2016-17
- *
- */
-
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY);
-
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
-    const int  *dim_array;
-    float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil, funcval;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-    
-    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 50; /* default iterations number */
-    epsil = 0.001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if (nrhs == 5)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    /*output function value (last iteration) */
-    funcval = 0.0f;
-    plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);  
-    float *funcvalA = (float *) mxGetData(plhs[1]);
-        
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    tk = 1.0f;
-    tkp1=1.0f;
-    count = 1;
-    re_old = 0.0f;
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /* begin iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
-            
-            /* projection step */
-            Proj_func2D(P1, P2, methTV, dimX, dimY);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);
-            
-            /* calculate norm */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(D[j] - D_old[j],2);
-                re1 += pow(D[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 3) {
-                Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
-                funcval = 0.0f; 
-                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                funcvalA[0] = sqrt(funcval);     
-                break; }
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) {
-                    Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
-                    funcval = 0.0f; 
-                    for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                    funcvalA[0] = sqrt(funcval);                     
-                    break; }}            
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
-            /*storing old values*/
-            copyIm(D, D_old, dimX, dimY, dimZ);
-            copyIm(P1, P1_old, dimX, dimY, dimZ);
-            copyIm(P2, P2_old, dimX, dimY, dimZ);
-            tk = tkp1;
-            
-            /* calculating the objective function value */
-            if (ll == (iter-1)) {
-            Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
-            funcval = 0.0f; 
-            for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-            funcvalA[0] = sqrt(funcval);            
-            }
-        }
-        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-    }
-    if (number_of_dims == 3) {
-        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /* begin iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func3D(A, D, R1, R2, R3,lambda, dimX, dimY, dimZ);
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
-            
-            /* projection step */
-            Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
-            
-            /* calculate norm - stopping rules*/
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(D[j] - D_old[j],2);
-                re1 += pow(D[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 3) {
-                Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
-                funcval = 0.0f; 
-                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                funcvalA[0] = sqrt(funcval);                  
-                break;}
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) {
-                Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
-                funcval = 0.0f; 
-                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                funcvalA[0] = sqrt(funcval);  
-                break; }}
-            
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
-            /*storing old values*/
-            copyIm(D, D_old, dimX, dimY, dimZ);
-            copyIm(P1, P1_old, dimX, dimY, dimZ);
-            copyIm(P2, P2_old, dimX, dimY, dimZ);
-            copyIm(P3, P3_old, dimX, dimY, dimZ);
-            tk = tkp1;
-            
-            if (ll == (iter-1)) {
-            Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
-            funcval = 0.0f; 
-            for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-            funcvalA[0] = sqrt(funcval);            
-            }
-            
-        }
-        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-    }
-}
-
-/* 2D-case related Functions */
-/*****************************************************************/
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
-{
-    float val1, val2;
-    int i,j;
-#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* boundary conditions  */
-            if (i == 0) {val1 = 0.0f;} else {val1 = R1[(i-1)*dimY + (j)];}
-            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(i)*dimY + (j-1)];}
-            D[(i)*dimY + (j)] = A[(i)*dimY + (j)] - lambda*(R1[(i)*dimY + (j)] + R2[(i)*dimY + (j)] - val1 - val2);
-        }}
-    return *D;
-}
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
-{
-    float val1, val2, multip;
-    int i,j;
-    multip = (1.0f/(8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(i,j,val1,val2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* boundary conditions */
-            if (i == dimX-1) val1 = 0.0f; else val1 = D[(i)*dimY + (j)] - D[(i+1)*dimY + (j)];
-            if (j == dimY-1) val2 = 0.0f; else val2 = D[(i)*dimY + (j)] - D[(i)*dimY + (j+1)];
-            P1[(i)*dimY + (j)] = R1[(i)*dimY + (j)] + multip*val1;
-            P2[(i)*dimY + (j)] = R2[(i)*dimY + (j)] + multip*val2;
-        }}
-    return 1;
-}
-float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY)
-{
-    float val1, val2, denom;
-    int i,j;
-    if (methTV == 0) {
-        /* isotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,denom)
-        for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {
-                denom = pow(P1[(i)*dimY + (j)],2) +  pow(P2[(i)*dimY + (j)],2);
-                if (denom > 1) {
-                    P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)]/sqrt(denom);
-                    P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)]/sqrt(denom);
-                }
-            }}
-    }
-    else {
-        /* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2)
-        for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {
-                val1 = fabs(P1[(i)*dimY + (j)]);
-                val2 = fabs(P2[(i)*dimY + (j)]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)]/val1;
-                P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)]/val2;
-            }}
-    }
-    return 1;
-}
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY)
-{
-    int i,j;
-    float multip;
-    multip = ((tk-1.0f)/tkp1);
-#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i,j)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            R1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] + multip*(P1[(i)*dimY + (j)] - P1_old[(i)*dimY + (j)]);
-            R2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] + multip*(P2[(i)*dimY + (j)] - P2_old[(i)*dimY + (j)]);
-        }}
-    return 1;
-}
-
-/* 3D-case related Functions */
-/*****************************************************************/
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
-{
-    float val1, val2, val3;
-    int i,j,k;
-#pragma omp parallel for shared(A,D,R1,R2,R3) private(i,j,k,val1,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* boundary conditions */
-                if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + (i-1)*dimY + (j)];}
-                if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (i)*dimY + (j-1)];}
-                if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + (i)*dimY + (j)];}
-                D[(dimX*dimY)*k + (i)*dimY + (j)] = A[(dimX*dimY)*k + (i)*dimY + (j)] - lambda*(R1[(dimX*dimY)*k + (i)*dimY + (j)] + R2[(dimX*dimY)*k + (i)*dimY + (j)] + R3[(dimX*dimY)*k + (i)*dimY + (j)] - val1 - val2 - val3);
-            }}}
-    return *D;
-}
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
-{
-    float val1, val2, val3, multip;
-    int i,j,k;
-    multip = (1.0f/(8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(i,j,k,val1,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* boundary conditions */
-                if (i == dimX-1) val1 = 0.0f; else val1 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i+1)*dimY + (j)];
-                if (j == dimY-1) val2 = 0.0f; else val2 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i)*dimY + (j+1)];
-                if (k == dimZ-1) val3 = 0.0f; else val3 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*(k+1) + (i)*dimY + (j)];
-                P1[(dimX*dimY)*k + (i)*dimY + (j)] = R1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val1;
-                P2[(dimX*dimY)*k + (i)*dimY + (j)] = R2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val2;
-                P3[(dimX*dimY)*k + (i)*dimY + (j)] = R3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val3;
-            }}}
-    return 1;
-}
-float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ)
-{
-    float val1, val2, val3;
-    int i,j,k;
-#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,val1,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                val1 = fabs(P1[(dimX*dimY)*k + (i)*dimY + (j)]);
-                val2 = fabs(P2[(dimX*dimY)*k + (i)*dimY + (j)]);
-                val3 = fabs(P3[(dimX*dimY)*k + (i)*dimY + (j)]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                if (val3 < 1.0f) {val3 = 1.0f;}
-                
-                P1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)]/val1;
-                P2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)]/val2;
-                P3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)]/val3;
-            }}}
-    return 1;
-}
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ)
-{
-    int i,j,k;
-    float multip;
-    multip = ((tk-1.0f)/tkp1);
-#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i,j,k)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                R1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P1[(dimX*dimY)*k + (i)*dimY + (j)] - P1_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-                R2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P2[(dimX*dimY)*k + (i)*dimY + (j)] - P2_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-                R3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P3[(dimX*dimY)*k + (i)*dimY + (j)] - P3_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-            }}}
-    return 1;
-}
-
-/* General Functions */
-/*****************************************************************/
-/* Copy Image */
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ)
-{
-    int j;
-#pragma omp parallel for shared(A, B) private(j)
-    for(j=0; j<dimX*dimY*dimZ; j++)  B[j] = A[j];
-    return *B;
-}
diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index db2b581..2823691 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -15,7 +15,6 @@ function [X,  output] = FISTA_REC(params)
 %----------------Regularization choices------------------------
 %       - .Regul_Lambda_FGPTV (FGP-TV regularization parameter)
 %       - .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter)
-%       - .Regul_Lambda_L1 (L1 regularization by soft-thresholding)
 %       - .Regul_Lambda_TVLLT (Higher order SB-LLT regularization parameter)
 %       - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04)
 %       - .Regul_Iterations (iterations for the selected penalty, default 25)
@@ -28,7 +27,7 @@ function [X,  output] = FISTA_REC(params)
 %       - .slice (for 3D volumes - slice number to imshow)
 % ___Output___:
 % 1. X - reconstructed image/volume
-% 2. output - structure with
+% 2. output - a structure with
 %    - .Resid_error - residual error (if X_ideal is given)
 %    - .objective: value of the objective function
 %    - .L_const: Lipshitz constant to avoid recalculations
@@ -74,21 +73,50 @@ if (isfield(params,'L_const'))
     L_const = params.L_const;
 else
     % using Power method (PM) to establish L constant
-    niter = 8; % number of iteration for PM
-    x = rand(N,N,SlicesZ);
-    sqweight = sqrt(weights);
-    [sino_id, y] = astra_create_sino3d_cuda(x, proj_geom, vol_geom);
-    y = sqweight.*y;
-    astra_mex_data3d('delete', sino_id);
-    
-    for i = 1:niter
-        [id,x] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geom, vol_geom);
-        s = norm(x(:));
-        x = x/s;
-        [sino_id, y] = astra_create_sino3d_cuda(x, proj_geom, vol_geom);
+    if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
+        % for parallel geometry we can do just one slice
+        fprintf('%s \n', 'Calculating Lipshitz constant for parallel beam geometry...');
+        niter = 15; % number of iteration for the PM
+        x1 = rand(N,N,1);
+        sqweight = sqrt(weights(:,:,1));
+        proj_geomT = proj_geom;
+        proj_geomT.DetectorRowCount = 1;
+        vol_geomT = vol_geom;
+        vol_geomT.GridSliceCount = 1;
+        [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geomT, vol_geomT);
         y = sqweight.*y;
         astra_mex_data3d('delete', sino_id);
-        astra_mex_data3d('delete', id);
+        
+        for i = 1:niter
+            [id,x1] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geomT, vol_geomT);
+            s = norm(x1(:));
+            x1 = x1/s;
+            [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geomT, vol_geomT);
+            y = sqweight.*y;
+            astra_mex_data3d('delete', sino_id);
+            astra_mex_data3d('delete', id);
+        end
+        clear proj_geomT vol_geomT
+    else
+        % divergen beam geometry
+        fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry...');
+        niter = 8; % number of iteration for PM
+        x1 = rand(N,N,SlicesZ);
+        sqweight = sqrt(weights);
+        [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geom, vol_geom);
+        y = sqweight.*y;
+        astra_mex_data3d('delete', sino_id);
+        
+        for i = 1:niter
+            [id,x1] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geom, vol_geom);
+            s = norm(x1(:));
+            x1 = x1/s;
+            [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geom, vol_geom);
+            y = sqweight.*y;
+            astra_mex_data3d('delete', sino_id);
+            astra_mex_data3d('delete', id);
+        end
+        clear x1
     end
     L_const = s;
 end
@@ -112,11 +140,6 @@ if (isfield(params,'Regul_Lambda_SBTV'))
 else
     lambdaSB_TV = 0;
 end
-if (isfield(params,'Regul_Lambda_L1'))
-    lambdaL1 = params.Regul_Lambda_L1;
-else
-    lambdaL1 = 0;
-end
 if (isfield(params,'Regul_tol'))
     tol = params.Regul_tol;
 else
@@ -194,19 +217,18 @@ else
     X = zeros(N,N,SlicesZ, 'single'); % storage for the solution
 end
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-Resid_error = zeros(iterFISTA,1); % error vector
-objective = zeros(iterFISTA,1); % obhective vector
+%----------------Reconstruction part------------------------
+Resid_error = zeros(iterFISTA,1); % errors vector (if the ground truth is given)
+objective = zeros(iterFISTA,1); % objective function values vector
 
 t = 1;
 X_t = X;
 
-% add_ring = zeros(size(sino),'single'); % size of sinogram array
 r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors
 r_x = r; % another ring variable
 residual = zeros(size(sino),'single');
 
-% Outer iterations loop
+% Outer FISTA iterations loop
 for i = 1:iterFISTA
     
     X_old = X;
@@ -216,12 +238,10 @@ for i = 1:iterFISTA
     [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
     
     if (lambdaR_L1 > 0)
-        % add ring removal part (Group-Huber fidelity)
+        % ring removal part (Group-Huber fidelity)
         for kkk = 1:anglesNumb
-            % add_ring(:,kkk,:) =  squeeze(sino(:,kkk,:)) - alpha_ring.*r_x;
             residual(:,kkk,:) =  squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
-        end
-        
+        end        
         vec = sum(residual,2);
         if (SlicesZ > 1)
             vec = squeeze(vec(:,1,:));
@@ -229,9 +249,10 @@ for i = 1:iterFISTA
         r = r_x - (1./L_const).*vec;
     else
         % no ring removal
-        residual = weights.*(sino_updt - sino);
+        residual = weights.*(sino_updt - sino);        
     end
-    % residual =  weights.*(sino_updt - add_ring);
+    
+    objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output
     
     [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
     
@@ -242,27 +263,22 @@ for i = 1:iterFISTA
     if (lambdaFGP_TV > 0)
         % FGP-TV regularization
         [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso');
-        objective(i) = 0.5.*norm(residual(:))^2 + f_val;
+        objective(i) = objective(i) + f_val;
     end
     if (lambdaSB_TV > 0)
         % Split Bregman regularization
         X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
-        objective(i) = 0.5.*norm(residual(:))^2;
-    end
-    if (lambdaL1 > 0)
-        % L1 soft-threhsolding regularization
-        X = max(abs(X)-lambdaL1, 0).*sign(X);
-        objective(i) = 0.5.*norm(residual(:))^2;
     end
     if (lambdaHO > 0)
         % Higher Order (LLT) regularization
         X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
         X = 0.5.*(X + X2); % averaged combination of two solutions
-        objective(i) = 0.5.*norm(residual(:))^2;
     end
     
+    
+    
     if (lambdaR_L1 > 0)
-        r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator
+        r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
     end
     
     t = (1 + sqrt(1 + 4*t^2))/2; % updating t
@@ -281,9 +297,9 @@ for i = 1:iterFISTA
     end
     if (strcmp(X_ideal, 'none' ) == 0)
         Resid_error(i) = RMSE(X(ROI), X_ideal(ROI));
-        fprintf('%s %i %s %s %.4f  %s %s %.4f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
+        fprintf('%s %i %s %s %.4f  %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
     else
-        fprintf('%s %i  %s %s %.4f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
+        fprintf('%s %i  %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
     end
 end
 output.Resid_error = Resid_error;
diff --git a/main_func/LLT_model.c b/main_func/LLT_model.c
deleted file mode 100644
index 0aed31e..0000000
--- a/main_func/LLT_model.c
+++ /dev/null
@@ -1,431 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-#define EPS 0.01
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
- *
- * Input Parameters:
- * 1. U0 - origanal noise image/volume
- * 2. lambda - regularization parameter
- * 3. tau - time-step  for explicit scheme 
- * 4. iter - iterations number
- * 5. epsil  - tolerance constant (to terminate earlier) 
- * 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
- *
- * Output:
- * Filtered/regularized image
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .03*randn(size(Im)); % adding noise
- * [Den] = LLT_model(single(u0), 10, 0.1, 1);
- *
- *
- * to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
- *
- * 28.11.16/Harwell
- */
-/* 2D functions */
-float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ);
-float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau);
-
-float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ);
-float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3,  unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau);
-
-float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ);
-float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ);
-
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, switcher;
-    const int  *dim_array;
-    float *U0, *U=NULL, *U_old=NULL, *D1=NULL, *D2=NULL, *D3=NULL, lambda, tau, re, re1, epsil, re_old;
-    unsigned short *Map=NULL;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    U0  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
-    tau =  (float) mxGetScalar(prhs[2]); /* time-step */
-    iter =  (int) mxGetScalar(prhs[3]); /*iterations number*/
-    epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
-    switcher =  (int) mxGetScalar(prhs[5]); /*switch on (1) restrictive smoothing in Z dimension*/
-     
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1];  dimZ = 1;
-    
-    if (number_of_dims == 2) {
-        /*2D case*/       
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    else if (number_of_dims == 3) {
-        /*3D case*/
-        dimZ = dim_array[2];
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        if (switcher != 0) {
-        Map = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array, mxUINT16_CLASS, mxREAL));       
-        }
-    }
-    else {mexErrMsgTxt("The input data should be 2D or 3D");}
-    
-    /*Copy U0 to U*/
-    copyIm(U0, U, dimX, dimY, dimZ);
-    
-    count = 1;
-    re_old = 0.0f; 
-    if (number_of_dims == 2) {
-        for(ll = 0; ll < iter; ll++) {
-            
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*estimate inner derrivatives */
-            der2D(U, D1, D2, dimX, dimY, dimZ);
-            /* calculate div^2 and update */
-            div_upd2D(U0, U, D1, D2, dimX, dimY, dimZ, lambda, tau);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);           
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; 
-            }
-            re_old = re;         
-        
-        } /*end of iterations*/
-        printf("HO iterations stopped at iteration: %i\n", ll);          
-    }
-    /*3D version*/
-    if (number_of_dims == 3) {
-        
-        if (switcher == 1) {
-            /* apply restrictive smoothing */            
-            calcMap(U, Map, dimX, dimY, dimZ);
-            /*clear outliers */
-            cleanMap(Map, dimX, dimY, dimZ);           
-        }
-        for(ll = 0; ll < iter; ll++) {
-            
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*estimate inner derrivatives */
-            der3D(U, D1, D2, D3, dimX, dimY, dimZ);          
-            /* calculate div^2 and update */
-            div_upd3D(U0, U, D1, D2, D3, Map, switcher, dimX, dimY, dimZ, lambda, tau);                 
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);           
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; 
-            }
-            re_old = re;     
-            
-        } /*end of iterations*/
-        printf("HO iterations stopped at iteration: %i\n", ll);       
-    }
-}
-
-float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ)
-{
-    int i, j, i_p, i_m, j_m, j_p;
-    float dxx, dyy, denom_xx, denom_yy;
-#pragma omp parallel for shared(U,D1,D2) private(i, j, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-            i_m = i - 1; if (i_m < 0) i_m = i + 1;
-            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-            j_m = j - 1; if (j_m < 0) j_m = j + 1;
-            
-            dxx = U[i_p*dimY + j] - 2.0f*U[i*dimY + j] + U[i_m*dimY + j];
-            dyy = U[i*dimY + j_p] - 2.0f*U[i*dimY + j] + U[i*dimY + j_m];
-                        
-            denom_xx = fabs(dxx) + EPS;
-            denom_yy = fabs(dyy) + EPS;
-            
-            D1[i*dimY + j] = dxx/denom_xx;
-            D2[i*dimY + j] = dyy/denom_yy;
-        }}
-    return 1;
-}
-float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-    int i, j, i_p, i_m, j_m, j_p;
-    float div, dxx, dyy;
-#pragma omp parallel for shared(U,U0,D1,D2) private(i, j, i_p, i_m, j_m, j_p, div, dxx, dyy)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-            i_m = i - 1; if (i_m < 0) i_m = i + 1;
-            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-            j_m = j - 1; if (j_m < 0) j_m = j + 1;
-            
-            dxx = D1[i_p*dimY + j] - 2.0f*D1[i*dimY + j] + D1[i_m*dimY + j];
-            dyy = D2[i*dimY + j_p] - 2.0f*D2[i*dimY + j] + D2[i*dimY + j_m];
-            
-            div = dxx + dyy;
-            
-            U[i*dimY + j] = U[i*dimY + j] - tau*div - tau*lambda*(U[i*dimY + j] - U0[i*dimY + j]);
-        }}
-    return *U0;
-}
-
-float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
-{
-    int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
-    float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
-#pragma omp parallel for shared(U,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-        i_m = i - 1; if (i_m < 0) i_m = i + 1;
-        for(j=0; j<dimY; j++) {
-            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-            j_m = j - 1; if (j_m < 0) j_m = j + 1;
-            for(k=0; k<dimZ; k++) {
-                k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-                k_m = k - 1; if (k_m < 0) k_m = k + 1;
-                
-                dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
-                dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
-                dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];                
-                
-                denom_xx = fabs(dxx) + EPS;
-                denom_yy = fabs(dyy) + EPS;
-                denom_zz = fabs(dzz) + EPS;
-                
-                D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx;
-                D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy;
-                D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz;               
-                
-            }}}
-    return 1;
-}
-
-float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3,  unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-    int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
-    float div, dxx, dyy, dzz;
-#pragma omp parallel for shared(U,U0,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, div, dxx, dyy, dzz)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-        i_m = i - 1; if (i_m < 0) i_m = i + 1;
-        for(j=0; j<dimY; j++) {
-            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-            j_m = j - 1; if (j_m < 0) j_m = j + 1;
-            for(k=0; k<dimZ; k++) {
-                k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-                k_m = k - 1; if (k_m < 0) k_m = k + 1;
-//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
-//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;   
-                
-                dxx = D1[dimX*dimY*k + i_p*dimY + j] - 2.0f*D1[dimX*dimY*k + i*dimY + j] + D1[dimX*dimY*k + i_m*dimY + j];
-                dyy = D2[dimX*dimY*k + i*dimY + j_p] - 2.0f*D2[dimX*dimY*k + i*dimY + j] + D2[dimX*dimY*k + i*dimY + j_m];               
-                dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
-                
-                if ((switcher == 1) && (Map[dimX*dimY*k + i*dimY + j] == 0)) dzz = 0;                
-                div = dxx + dyy + dzz;
-                
-//                 if (switcher == 1) {                    
-                    // if (Map2[dimX*dimY*k + i*dimY + j] == 0) dzz2 = 0;
-                    //else dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];
-//                     div = dzz + dzz2;
-//                 }
-                  
-//                 dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
-//                 dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];  
-//                 div = dzz + dzz2;
-                                
-                U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] - tau*div - tau*lambda*(U[dimX*dimY*k + i*dimY + j] - U0[dimX*dimY*k + i*dimY + j]);
-            }}}
-        return *U0;
- }   
-
-// float der3D_2(float *U, float *D1, float *D2, float *D3, float *D4, int dimX, int dimY, int dimZ)
-// {
-//     int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, k_p1, k_m1;
-//     float dxx, dyy, dzz, dzz2, denom_xx, denom_yy, denom_zz, denom_zz2;
-// #pragma omp parallel for shared(U,D1,D2,D3,D4) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, denom_zz2, dxx, dyy, dzz, dzz2, k_p1, k_m1)
-//     for(i=0; i<dimX; i++) {
-//         /* symmetric boundary conditions (Neuman) */
-//         i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-//         i_m = i - 1; if (i_m < 0) i_m = i + 1;
-//         for(j=0; j<dimY; j++) {
-//             j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-//             j_m = j - 1; if (j_m < 0) j_m = j + 1;
-//             for(k=0; k<dimZ; k++) {
-//                 k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-//                 k_m = k - 1; if (k_m < 0) k_m = k + 1;
-//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
-//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;                
-//                 
-//                 dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
-//                 dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
-//                 dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];                
-//                 dzz2 = U[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m1 + i*dimY + j];                
-//                 
-//                 denom_xx = fabs(dxx) + EPS;
-//                 denom_yy = fabs(dyy) + EPS;
-//                 denom_zz = fabs(dzz) + EPS;
-//                 denom_zz2 = fabs(dzz2) + EPS;
-//                 
-//                 D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx;
-//                 D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy;
-//                 D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz;               
-//                 D4[dimX*dimY*k + i*dimY + j] = dzz2/denom_zz2;                               
-//             }}}
-//     return 1;
-// }
-
-float calcMap(float *U, unsigned short *Map,  int dimX, int dimY, int dimZ)
-{  
-    int i,j,k,i1,j1,i2,j2,windowSize;
-    float val1, val2,thresh_val,maxval; 
-    windowSize = 1;
-    thresh_val = 0.0001; /*thresh_val = 0.0035;*/
-    
-    /* normalize volume first */
-    maxval = 0.0f;
-     for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {  
-             for(k=0; k<dimZ; k++) {  
-                if (U[dimX*dimY*k + i*dimY + j] > maxval) maxval = U[dimX*dimY*k + i*dimY + j];
-             }}}
-    
-    if (maxval != 0.0f) {
-     for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {  
-             for(k=0; k<dimZ; k++) {  
-               U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j]/maxval;
-             }}}
-    }
-    else {
-       printf("%s \n", "Maximum value is zero!");       
-       return 0;
-    }
-    
-    #pragma omp parallel for shared(U,Map) private(i, j, k, i1, j1, i2, j2, val1, val2)
-     for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {  
-             for(k=0; k<dimZ; k++) {         
-                
-                Map[dimX*dimY*k + i*dimY + j] = 0; 
-//                 Map2[dimX*dimY*k + i*dimY + j] = 0; 
-                
-                val1 = 0.0f; val2 = 0.0f; 
-                for(i1=-windowSize; i1<=windowSize; i1++) {
-                    for(j1=-windowSize; j1<=windowSize; j1++) {
-                    i2 = i+i1;
-                    j2 = j+j1;
-                    
-                    if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
-                      if (k == 0) {
-                          val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);                        
-//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                                                  
-                      }
-                      else if (k == dimZ-1) {
-                          val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
-                      }
-//                       else if (k == 1) {
-//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);  
-//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                           
-//                       }
-//                       else if (k == dimZ-2) {
-//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);      
-//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
-//                       }                      
-                      else {
-                          val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-                          val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);                           
-//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); 
-//                           val4 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);  
-                      }
-                    }                    
-                    }}
-                
-                 val1 = 0.111f*val1; val2 = 0.111f*val2;
-//                  val3 = 0.111f*val3; val4 = 0.111f*val4;
-                 if ((val1 <= thresh_val) && (val2 <= thresh_val)) Map[dimX*dimY*k + i*dimY + j] = 1;                        
-//                  if ((val3 <= thresh_val) && (val4 <= thresh_val)) Map2[dimX*dimY*k + i*dimY + j] = 1;                        
-             }}}
-     return 1;
-}
-
-float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ)
-{  
-    int i, j, k, i1, j1, i2, j2, counter; 
-     #pragma omp parallel for shared(Map) private(i, j, k, i1, j1, i2, j2, counter)
-     for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {  
-             for(k=0; k<dimZ; k++) {   
-                    
-                 counter=0;
-                 for(i1=-3; i1<=3; i1++) {
-                    for(j1=-3; j1<=3; j1++) {
-                    i2 = i+i1;
-                    j2 = j+j1;
-                    if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
-                    if (Map[dimX*dimY*k + i2*dimY + j2] == 0) counter++;                                       
-                    }
-                    }}
-                 if (counter < 24) Map[dimX*dimY*k + i*dimY + j] = 1;
-             }}}
-    return *Map;
-}
-
-    /* Copy Image */
-    float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
-    {
-        int j;
-#pragma omp parallel for shared(A, U) private(j)
-        for(j=0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-        return *U;
-    }
-    /*********************3D *********************/
\ No newline at end of file
diff --git a/main_func/SplitBregman_TV.c b/main_func/SplitBregman_TV.c
deleted file mode 100644
index f143aa6..0000000
--- a/main_func/SplitBregman_TV.c
+++ /dev/null
@@ -1,399 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
- *
- * Input Parameters:
- * 1. Noisy image/volume
- * 2. lambda - regularization parameter
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon - tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * Filtered/regularized image
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
- * u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
- *
- * to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * References:
- * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
- * D. Kazantsev, 2016*
- */
-
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-float gauss_seidel2D(float *U,  float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
-
-float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
-    const int  *dim_array;
-    float *A, *U=NULL, *U_old=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL, lambda, mu, epsil, re, re1, re_old;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    mu =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 35; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if (nrhs == 5)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    lambda = 2.0f*mu;
-    count = 1;
-    re_old = 0.0f;
-    /*Handling Matlab output data*/
-    dimY = dim_array[0]; dimX = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dy = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        By = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /*storing old values*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*GS iteration */
-            gauss_seidel2D(U, A, Dx, Dy, Bx, By, dimX, dimY, lambda, mu);
-            
-            if (methTV == 1)  updDxDy_shrinkAniso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
-            else updDxDy_shrinkIso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
-            
-            updBxBy2D(U, Dx, Dy, Bx, By, dimX, dimY);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) break;
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break;
-            }
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
-            /*copyIm(U_old, U, dimX, dimY, dimZ); */
-        }
-        printf("SB iterations stopped at iteration: %i\n", ll);
-    }
-    if (number_of_dims == 3) {
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dy = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        By = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /*storing old values*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*GS iteration */
-            gauss_seidel3D(U, A, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu);
-            
-            if (methTV == 1) updDxDyDz_shrinkAniso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
-            else updDxDyDz_shrinkIso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
-            
-            updBxByBz3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U[j] - U_old[j],2);
-                re1 += pow(U[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) break;
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; }
-            /*printf("%f %i %i \n", re, ll, count); */
-            re_old = re;
-        }
-        printf("SB iterations stopped at iteration: %i\n", ll);
-    }
-}
-
-/* 2D-case related Functions */
-/*****************************************************************/
-float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu)
-{
-    float sum, normConst;
-    int i,j,i1,i2,j1,j2;
-    normConst = 1.0f/(mu + 4.0f*lambda);
-    
-#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,sum)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            
-            sum = Dx[(i2)*dimY + (j)] - Dx[(i)*dimY + (j)] + Dy[(i)*dimY + (j2)] - Dy[(i)*dimY + (j)] - Bx[(i2)*dimY + (j)] + Bx[(i)*dimY + (j)] - By[(i)*dimY + (j2)] + By[(i)*dimY + (j)];
-            sum += (U[(i1)*dimY + (j)] + U[(i2)*dimY + (j)] + U[(i)*dimY + (j1)] + U[(i)*dimY + (j2)]);
-            sum *= lambda;
-            sum += mu*A[(i)*dimY + (j)];
-            U[(i)*dimY + (j)] = normConst*sum;
-        }}
-    return *U;
-}
-
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
-{
-    int i,j,i1,j1;
-    float val1, val11, val2, val22, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,val22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
-            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
-            
-            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-            
-            if (val1 !=0) Dx[(i)*dimY + (j)] = (val1/fabs(val1))*val11; else Dx[(i)*dimY + (j)] = 0;
-            if (val2 !=0) Dy[(i)*dimY + (j)] = (val2/fabs(val2))*val22; else Dy[(i)*dimY + (j)] = 0;
-            
-        }}
-    return 1;
-}
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
-{
-    int i,j,i1,j1;
-    float val1, val11, val2, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-    
-#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,denom)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
-            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
-            
-            denom = sqrt(val1*val1 + val2*val2);
-            
-            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-            
-            if (denom != 0.0f) {
-                Dx[(i)*dimY + (j)] = val11*(val1/denom);
-                Dy[(i)*dimY + (j)] = val11*(val2/denom);
-            }
-            else {
-                Dx[(i)*dimY + (j)] = 0;
-                Dy[(i)*dimY + (j)] = 0;
-            }
-        }}
-    return 1;
-}
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY)
-{
-    int i,j,i1,j1;
-#pragma omp parallel for shared(U) private(i,j,i1,j1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            Bx[(i)*dimY + (j)] = Bx[(i)*dimY + (j)] + ((U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) - Dx[(i)*dimY + (j)]);
-            By[(i)*dimY + (j)] = By[(i)*dimY + (j)] + ((U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) - Dy[(i)*dimY + (j)]);
-        }}
-    return 1;
-}
-
-
-/* 3D-case related Functions */
-/*****************************************************************/
-float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu)
-{
-    float normConst, d_val, b_val, sum;
-    int i,j,i1,i2,j1,j2,k,k1,k2;
-    normConst = 1.0f/(mu + 6.0f*lambda);
-#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                i2 = i-1; if (i2 < 0) i2 = i+1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                j2 = j-1; if (j2 < 0) j2 = j+1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                k2 = k-1; if (k2 < 0) k2 = k+1;
-                
-                d_val = Dx[(dimX*dimY)*k + (i2)*dimY + (j)] - Dx[(dimX*dimY)*k + (i)*dimY + (j)] + Dy[(dimX*dimY)*k + (i)*dimY + (j2)] - Dy[(dimX*dimY)*k + (i)*dimY + (j)] + Dz[(dimX*dimY)*k2 + (i)*dimY + (j)] - Dz[(dimX*dimY)*k + (i)*dimY + (j)];
-                b_val = -Bx[(dimX*dimY)*k + (i2)*dimY + (j)] + Bx[(dimX*dimY)*k + (i)*dimY + (j)] - By[(dimX*dimY)*k + (i)*dimY + (j2)] + By[(dimX*dimY)*k + (i)*dimY + (j)] - Bz[(dimX*dimY)*k2 + (i)*dimY + (j)] + Bz[(dimX*dimY)*k + (i)*dimY + (j)];
-                sum =  d_val + b_val;
-                sum += U[(dimX*dimY)*k + (i1)*dimY + (j)] + U[(dimX*dimY)*k + (i2)*dimY + (j)] + U[(dimX*dimY)*k + (i)*dimY + (j1)] + U[(dimX*dimY)*k + (i)*dimY + (j2)] + U[(dimX*dimY)*k1 + (i)*dimY + (j)] + U[(dimX*dimY)*k2 + (i)*dimY + (j)];
-                sum *= lambda;
-                sum += mu*A[(dimX*dimY)*k + (i)*dimY + (j)];
-                U[(dimX*dimY)*k + (i)*dimY + (j)] = normConst*sum;
-            }}}
-    return *U;
-}
-
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
-{
-    int i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val22, val3, val33, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + (i)*dimY + (j);
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
-                
-                val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-                val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-                val33 = fabs(val3) - denom_lam; if (val33 < 0) val33 = 0;
-                
-                if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
-                if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
-                if (val3 !=0) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0;
-                
-            }}}
-    return 1;
-}
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
-{
-    int i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val3, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + (i)*dimY + (j);
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
-                
-                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
-                
-                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-                
-                if (denom != 0.0f) {
-                    Dx[index] = val11*(val1/denom);
-                    Dy[index] = val11*(val2/denom);
-                    Dz[index] = val11*(val3/denom);
-                }
-                else {
-                    Dx[index] = 0;
-                    Dy[index] = 0;
-                    Dz[index] = 0;
-                }               
-            }}}
-    return 1;
-}
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ)
-{
-    int i,j,k,i1,j1,k1;
-#pragma omp parallel for shared(U) private(i,j,k,i1,j1,k1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                Bx[(dimX*dimY)*k + (i)*dimY + (j)] = Bx[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dx[(dimX*dimY)*k + (i)*dimY + (j)]);
-                By[(dimX*dimY)*k + (i)*dimY + (j)] = By[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dy[(dimX*dimY)*k + (i)*dimY + (j)]);
-                Bz[(dimX*dimY)*k + (i)*dimY + (j)] = Bz[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dz[(dimX*dimY)*k + (i)*dimY + (j)]);
-                
-            }}}
-    return 1;
-}
-/* General Functions */
-/*****************************************************************/
-/* Copy Image */
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ)
-{
-    int j;
-#pragma omp parallel for shared(A, B) private(j)
-    for(j=0; j<dimX*dimY*dimZ; j++)  B[j] = A[j];
-    return *B;
-}
\ No newline at end of file
diff --git a/main_func/compile_mex.m b/main_func/compile_mex.m
index 4d97bc2..7bfa8eb 100644
--- a/main_func/compile_mex.m
+++ b/main_func/compile_mex.m
@@ -1,4 +1,10 @@
-% compile mex's
+% compile mex's once
+cd regularizers_CPU/
+
 mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+
+cd ../../
+cd demos
diff --git a/main_func/regularizers_CPU/FGP_TV.c b/main_func/regularizers_CPU/FGP_TV.c
new file mode 100644
index 0000000..1a1fd13
--- /dev/null
+++ b/main_func/regularizers_CPU/FGP_TV.c
@@ -0,0 +1,400 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon: tolerance constant [OPTIONAL parameter]
+ * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ * [2] last function value 
+ *
+ * Example of image denoising:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); % adding noise
+ * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
+ *
+ * to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ *
+ * D. Kazantsev, 2016-17
+ *
+ */
+
+float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
+float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY);
+
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
+float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
+float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
+float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
+
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
+    const int  *dim_array;
+    float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil, funcval;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
+    
+    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 50; /* default iterations number */
+    epsil = 0.001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if (nrhs == 5)  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    /*output function value (last iteration) */
+    funcval = 0.0f;
+    plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);  
+    float *funcvalA = (float *) mxGetData(plhs[1]);
+        
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    tk = 1.0f;
+    tkp1=1.0f;
+    count = 1;
+    re_old = 0.0f;
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        /* begin iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* computing the gradient of the objective function */
+            Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
+            
+            /* projection step */
+            Proj_func2D(P1, P2, methTV, dimX, dimY);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);
+            
+            /* calculate norm */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(D[j] - D_old[j],2);
+                re1 += pow(D[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+            if (count > 3) {
+                Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
+                funcval = 0.0f; 
+                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
+                funcvalA[0] = sqrt(funcval);     
+                break; }
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) {
+                    Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
+                    funcval = 0.0f; 
+                    for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
+                    funcvalA[0] = sqrt(funcval);                     
+                    break; }}            
+            re_old = re;
+            /*printf("%f %i %i \n", re, ll, count); */
+            
+            /*storing old values*/
+            copyIm(D, D_old, dimX, dimY, dimZ);
+            copyIm(P1, P1_old, dimX, dimY, dimZ);
+            copyIm(P2, P2_old, dimX, dimY, dimZ);
+            tk = tkp1;
+            
+            /* calculating the objective function value */
+            if (ll == (iter-1)) {
+            Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
+            funcval = 0.0f; 
+            for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
+            funcvalA[0] = sqrt(funcval);            
+            }
+        }
+        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
+    }
+    if (number_of_dims == 3) {
+        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        /* begin iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* computing the gradient of the objective function */
+            Obj_func3D(A, D, R1, R2, R3,lambda, dimX, dimY, dimZ);
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
+            
+            /* projection step */
+            Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
+            
+            /* calculate norm - stopping rules*/
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(D[j] - D_old[j],2);
+                re1 += pow(D[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            /* stop if the norm residual is less than the tolerance EPS */
+            if (re < epsil)  count++;
+            if (count > 3) {
+                Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
+                funcval = 0.0f; 
+                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
+                funcvalA[0] = sqrt(funcval);                  
+                break;}
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) {
+                Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
+                funcval = 0.0f; 
+                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
+                funcvalA[0] = sqrt(funcval);  
+                break; }}
+            
+            re_old = re;
+            /*printf("%f %i %i \n", re, ll, count); */
+            
+            /*storing old values*/
+            copyIm(D, D_old, dimX, dimY, dimZ);
+            copyIm(P1, P1_old, dimX, dimY, dimZ);
+            copyIm(P2, P2_old, dimX, dimY, dimZ);
+            copyIm(P3, P3_old, dimX, dimY, dimZ);
+            tk = tkp1;
+            
+            if (ll == (iter-1)) {
+            Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
+            funcval = 0.0f; 
+            for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
+            funcvalA[0] = sqrt(funcval);            
+            }
+            
+        }
+        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
+    }
+}
+
+/* 2D-case related Functions */
+/*****************************************************************/
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
+{
+    float val1, val2;
+    int i,j;
+#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* boundary conditions  */
+            if (i == 0) {val1 = 0.0f;} else {val1 = R1[(i-1)*dimY + (j)];}
+            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(i)*dimY + (j-1)];}
+            D[(i)*dimY + (j)] = A[(i)*dimY + (j)] - lambda*(R1[(i)*dimY + (j)] + R2[(i)*dimY + (j)] - val1 - val2);
+        }}
+    return *D;
+}
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
+{
+    float val1, val2, multip;
+    int i,j;
+    multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(i,j,val1,val2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* boundary conditions */
+            if (i == dimX-1) val1 = 0.0f; else val1 = D[(i)*dimY + (j)] - D[(i+1)*dimY + (j)];
+            if (j == dimY-1) val2 = 0.0f; else val2 = D[(i)*dimY + (j)] - D[(i)*dimY + (j+1)];
+            P1[(i)*dimY + (j)] = R1[(i)*dimY + (j)] + multip*val1;
+            P2[(i)*dimY + (j)] = R2[(i)*dimY + (j)] + multip*val2;
+        }}
+    return 1;
+}
+float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY)
+{
+    float val1, val2, denom;
+    int i,j;
+    if (methTV == 0) {
+        /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,j,denom)
+        for(i=0; i<dimX; i++) {
+            for(j=0; j<dimY; j++) {
+                denom = pow(P1[(i)*dimY + (j)],2) +  pow(P2[(i)*dimY + (j)],2);
+                if (denom > 1) {
+                    P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)]/sqrt(denom);
+                    P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)]/sqrt(denom);
+                }
+            }}
+    }
+    else {
+        /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2)
+        for(i=0; i<dimX; i++) {
+            for(j=0; j<dimY; j++) {
+                val1 = fabs(P1[(i)*dimY + (j)]);
+                val2 = fabs(P2[(i)*dimY + (j)]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)]/val1;
+                P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)]/val2;
+            }}
+    }
+    return 1;
+}
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY)
+{
+    int i,j;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i,j)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            R1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] + multip*(P1[(i)*dimY + (j)] - P1_old[(i)*dimY + (j)]);
+            R2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] + multip*(P2[(i)*dimY + (j)] - P2_old[(i)*dimY + (j)]);
+        }}
+    return 1;
+}
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
+{
+    float val1, val2, val3;
+    int i,j,k;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(i,j,k,val1,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* boundary conditions */
+                if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + (i-1)*dimY + (j)];}
+                if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (i)*dimY + (j-1)];}
+                if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + (i)*dimY + (j)];}
+                D[(dimX*dimY)*k + (i)*dimY + (j)] = A[(dimX*dimY)*k + (i)*dimY + (j)] - lambda*(R1[(dimX*dimY)*k + (i)*dimY + (j)] + R2[(dimX*dimY)*k + (i)*dimY + (j)] + R3[(dimX*dimY)*k + (i)*dimY + (j)] - val1 - val2 - val3);
+            }}}
+    return *D;
+}
+float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
+{
+    float val1, val2, val3, multip;
+    int i,j,k;
+    multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(i,j,k,val1,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* boundary conditions */
+                if (i == dimX-1) val1 = 0.0f; else val1 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i+1)*dimY + (j)];
+                if (j == dimY-1) val2 = 0.0f; else val2 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i)*dimY + (j+1)];
+                if (k == dimZ-1) val3 = 0.0f; else val3 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*(k+1) + (i)*dimY + (j)];
+                P1[(dimX*dimY)*k + (i)*dimY + (j)] = R1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val1;
+                P2[(dimX*dimY)*k + (i)*dimY + (j)] = R2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val2;
+                P3[(dimX*dimY)*k + (i)*dimY + (j)] = R3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val3;
+            }}}
+    return 1;
+}
+float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ)
+{
+    float val1, val2, val3;
+    int i,j,k;
+#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,val1,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                val1 = fabs(P1[(dimX*dimY)*k + (i)*dimY + (j)]);
+                val2 = fabs(P2[(dimX*dimY)*k + (i)*dimY + (j)]);
+                val3 = fabs(P3[(dimX*dimY)*k + (i)*dimY + (j)]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                if (val3 < 1.0f) {val3 = 1.0f;}
+                
+                P1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)]/val1;
+                P2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)]/val2;
+                P3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)]/val3;
+            }}}
+    return 1;
+}
+float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ)
+{
+    int i,j,k;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i,j,k)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                R1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P1[(dimX*dimY)*k + (i)*dimY + (j)] - P1_old[(dimX*dimY)*k + (i)*dimY + (j)]);
+                R2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P2[(dimX*dimY)*k + (i)*dimY + (j)] - P2_old[(dimX*dimY)*k + (i)*dimY + (j)]);
+                R3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P3[(dimX*dimY)*k + (i)*dimY + (j)] - P3_old[(dimX*dimY)*k + (i)*dimY + (j)]);
+            }}}
+    return 1;
+}
+
+/* General Functions */
+/*****************************************************************/
+/* Copy Image */
+float copyIm(float *A, float *B, int dimX, int dimY, int dimZ)
+{
+    int j;
+#pragma omp parallel for shared(A, B) private(j)
+    for(j=0; j<dimX*dimY*dimZ; j++)  B[j] = A[j];
+    return *B;
+}
diff --git a/main_func/regularizers_CPU/LLT_model.c b/main_func/regularizers_CPU/LLT_model.c
new file mode 100644
index 0000000..0aed31e
--- /dev/null
+++ b/main_func/regularizers_CPU/LLT_model.c
@@ -0,0 +1,431 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+#define EPS 0.01
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
+ *
+ * Input Parameters:
+ * 1. U0 - origanal noise image/volume
+ * 2. lambda - regularization parameter
+ * 3. tau - time-step  for explicit scheme 
+ * 4. iter - iterations number
+ * 5. epsil  - tolerance constant (to terminate earlier) 
+ * 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
+ *
+ * Output:
+ * Filtered/regularized image
+ *
+ * Example:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .03*randn(size(Im)); % adding noise
+ * [Den] = LLT_model(single(u0), 10, 0.1, 1);
+ *
+ *
+ * to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
+ *
+ * 28.11.16/Harwell
+ */
+/* 2D functions */
+float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ);
+float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau);
+
+float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ);
+float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3,  unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau);
+
+float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ);
+float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ);
+
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, switcher;
+    const int  *dim_array;
+    float *U0, *U=NULL, *U_old=NULL, *D1=NULL, *D2=NULL, *D3=NULL, lambda, tau, re, re1, epsil, re_old;
+    unsigned short *Map=NULL;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    U0  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
+    tau =  (float) mxGetScalar(prhs[2]); /* time-step */
+    iter =  (int) mxGetScalar(prhs[3]); /*iterations number*/
+    epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
+    switcher =  (int) mxGetScalar(prhs[5]); /*switch on (1) restrictive smoothing in Z dimension*/
+     
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];  dimZ = 1;
+    
+    if (number_of_dims == 2) {
+        /*2D case*/       
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        D1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        D2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    else if (number_of_dims == 3) {
+        /*3D case*/
+        dimZ = dim_array[2];
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        if (switcher != 0) {
+        Map = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array, mxUINT16_CLASS, mxREAL));       
+        }
+    }
+    else {mexErrMsgTxt("The input data should be 2D or 3D");}
+    
+    /*Copy U0 to U*/
+    copyIm(U0, U, dimX, dimY, dimZ);
+    
+    count = 1;
+    re_old = 0.0f; 
+    if (number_of_dims == 2) {
+        for(ll = 0; ll < iter; ll++) {
+            
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*estimate inner derrivatives */
+            der2D(U, D1, D2, dimX, dimY, dimZ);
+            /* calculate div^2 and update */
+            div_upd2D(U0, U, D1, D2, dimX, dimY, dimZ, lambda, tau);
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U_old[j] - U[j],2);
+                re1 += pow(U_old[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);           
+            if (re < epsil)  count++;
+            if (count > 4) break;            
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break; 
+            }
+            re_old = re;         
+        
+        } /*end of iterations*/
+        printf("HO iterations stopped at iteration: %i\n", ll);          
+    }
+    /*3D version*/
+    if (number_of_dims == 3) {
+        
+        if (switcher == 1) {
+            /* apply restrictive smoothing */            
+            calcMap(U, Map, dimX, dimY, dimZ);
+            /*clear outliers */
+            cleanMap(Map, dimX, dimY, dimZ);           
+        }
+        for(ll = 0; ll < iter; ll++) {
+            
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*estimate inner derrivatives */
+            der3D(U, D1, D2, D3, dimX, dimY, dimZ);          
+            /* calculate div^2 and update */
+            div_upd3D(U0, U, D1, D2, D3, Map, switcher, dimX, dimY, dimZ, lambda, tau);                 
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U_old[j] - U[j],2);
+                re1 += pow(U_old[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);           
+            if (re < epsil)  count++;
+            if (count > 4) break;            
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break; 
+            }
+            re_old = re;     
+            
+        } /*end of iterations*/
+        printf("HO iterations stopped at iteration: %i\n", ll);       
+    }
+}
+
+float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ)
+{
+    int i, j, i_p, i_m, j_m, j_p;
+    float dxx, dyy, denom_xx, denom_yy;
+#pragma omp parallel for shared(U,D1,D2) private(i, j, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+            i_m = i - 1; if (i_m < 0) i_m = i + 1;
+            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+            j_m = j - 1; if (j_m < 0) j_m = j + 1;
+            
+            dxx = U[i_p*dimY + j] - 2.0f*U[i*dimY + j] + U[i_m*dimY + j];
+            dyy = U[i*dimY + j_p] - 2.0f*U[i*dimY + j] + U[i*dimY + j_m];
+                        
+            denom_xx = fabs(dxx) + EPS;
+            denom_yy = fabs(dyy) + EPS;
+            
+            D1[i*dimY + j] = dxx/denom_xx;
+            D2[i*dimY + j] = dyy/denom_yy;
+        }}
+    return 1;
+}
+float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+    int i, j, i_p, i_m, j_m, j_p;
+    float div, dxx, dyy;
+#pragma omp parallel for shared(U,U0,D1,D2) private(i, j, i_p, i_m, j_m, j_p, div, dxx, dyy)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+            i_m = i - 1; if (i_m < 0) i_m = i + 1;
+            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+            j_m = j - 1; if (j_m < 0) j_m = j + 1;
+            
+            dxx = D1[i_p*dimY + j] - 2.0f*D1[i*dimY + j] + D1[i_m*dimY + j];
+            dyy = D2[i*dimY + j_p] - 2.0f*D2[i*dimY + j] + D2[i*dimY + j_m];
+            
+            div = dxx + dyy;
+            
+            U[i*dimY + j] = U[i*dimY + j] - tau*div - tau*lambda*(U[i*dimY + j] - U0[i*dimY + j]);
+        }}
+    return *U0;
+}
+
+float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
+{
+    int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
+    float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
+#pragma omp parallel for shared(U,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+        i_m = i - 1; if (i_m < 0) i_m = i + 1;
+        for(j=0; j<dimY; j++) {
+            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+            j_m = j - 1; if (j_m < 0) j_m = j + 1;
+            for(k=0; k<dimZ; k++) {
+                k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+                k_m = k - 1; if (k_m < 0) k_m = k + 1;
+                
+                dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
+                dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
+                dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];                
+                
+                denom_xx = fabs(dxx) + EPS;
+                denom_yy = fabs(dyy) + EPS;
+                denom_zz = fabs(dzz) + EPS;
+                
+                D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx;
+                D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy;
+                D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz;               
+                
+            }}}
+    return 1;
+}
+
+float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3,  unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+    int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
+    float div, dxx, dyy, dzz;
+#pragma omp parallel for shared(U,U0,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, div, dxx, dyy, dzz)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+        i_m = i - 1; if (i_m < 0) i_m = i + 1;
+        for(j=0; j<dimY; j++) {
+            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+            j_m = j - 1; if (j_m < 0) j_m = j + 1;
+            for(k=0; k<dimZ; k++) {
+                k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+                k_m = k - 1; if (k_m < 0) k_m = k + 1;
+//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
+//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;   
+                
+                dxx = D1[dimX*dimY*k + i_p*dimY + j] - 2.0f*D1[dimX*dimY*k + i*dimY + j] + D1[dimX*dimY*k + i_m*dimY + j];
+                dyy = D2[dimX*dimY*k + i*dimY + j_p] - 2.0f*D2[dimX*dimY*k + i*dimY + j] + D2[dimX*dimY*k + i*dimY + j_m];               
+                dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
+                
+                if ((switcher == 1) && (Map[dimX*dimY*k + i*dimY + j] == 0)) dzz = 0;                
+                div = dxx + dyy + dzz;
+                
+//                 if (switcher == 1) {                    
+                    // if (Map2[dimX*dimY*k + i*dimY + j] == 0) dzz2 = 0;
+                    //else dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];
+//                     div = dzz + dzz2;
+//                 }
+                  
+//                 dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
+//                 dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];  
+//                 div = dzz + dzz2;
+                                
+                U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] - tau*div - tau*lambda*(U[dimX*dimY*k + i*dimY + j] - U0[dimX*dimY*k + i*dimY + j]);
+            }}}
+        return *U0;
+ }   
+
+// float der3D_2(float *U, float *D1, float *D2, float *D3, float *D4, int dimX, int dimY, int dimZ)
+// {
+//     int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, k_p1, k_m1;
+//     float dxx, dyy, dzz, dzz2, denom_xx, denom_yy, denom_zz, denom_zz2;
+// #pragma omp parallel for shared(U,D1,D2,D3,D4) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, denom_zz2, dxx, dyy, dzz, dzz2, k_p1, k_m1)
+//     for(i=0; i<dimX; i++) {
+//         /* symmetric boundary conditions (Neuman) */
+//         i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+//         i_m = i - 1; if (i_m < 0) i_m = i + 1;
+//         for(j=0; j<dimY; j++) {
+//             j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+//             j_m = j - 1; if (j_m < 0) j_m = j + 1;
+//             for(k=0; k<dimZ; k++) {
+//                 k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+//                 k_m = k - 1; if (k_m < 0) k_m = k + 1;
+//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
+//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;                
+//                 
+//                 dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
+//                 dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
+//                 dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];                
+//                 dzz2 = U[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m1 + i*dimY + j];                
+//                 
+//                 denom_xx = fabs(dxx) + EPS;
+//                 denom_yy = fabs(dyy) + EPS;
+//                 denom_zz = fabs(dzz) + EPS;
+//                 denom_zz2 = fabs(dzz2) + EPS;
+//                 
+//                 D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx;
+//                 D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy;
+//                 D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz;               
+//                 D4[dimX*dimY*k + i*dimY + j] = dzz2/denom_zz2;                               
+//             }}}
+//     return 1;
+// }
+
+float calcMap(float *U, unsigned short *Map,  int dimX, int dimY, int dimZ)
+{  
+    int i,j,k,i1,j1,i2,j2,windowSize;
+    float val1, val2,thresh_val,maxval; 
+    windowSize = 1;
+    thresh_val = 0.0001; /*thresh_val = 0.0035;*/
+    
+    /* normalize volume first */
+    maxval = 0.0f;
+     for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {  
+             for(k=0; k<dimZ; k++) {  
+                if (U[dimX*dimY*k + i*dimY + j] > maxval) maxval = U[dimX*dimY*k + i*dimY + j];
+             }}}
+    
+    if (maxval != 0.0f) {
+     for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {  
+             for(k=0; k<dimZ; k++) {  
+               U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j]/maxval;
+             }}}
+    }
+    else {
+       printf("%s \n", "Maximum value is zero!");       
+       return 0;
+    }
+    
+    #pragma omp parallel for shared(U,Map) private(i, j, k, i1, j1, i2, j2, val1, val2)
+     for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {  
+             for(k=0; k<dimZ; k++) {         
+                
+                Map[dimX*dimY*k + i*dimY + j] = 0; 
+//                 Map2[dimX*dimY*k + i*dimY + j] = 0; 
+                
+                val1 = 0.0f; val2 = 0.0f; 
+                for(i1=-windowSize; i1<=windowSize; i1++) {
+                    for(j1=-windowSize; j1<=windowSize; j1++) {
+                    i2 = i+i1;
+                    j2 = j+j1;
+                    
+                    if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
+                      if (k == 0) {
+                          val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);                        
+//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                                                  
+                      }
+                      else if (k == dimZ-1) {
+                          val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
+//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
+                      }
+//                       else if (k == 1) {
+//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
+//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);  
+//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                           
+//                       }
+//                       else if (k == dimZ-2) {
+//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
+//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);      
+//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
+//                       }                      
+                      else {
+                          val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
+                          val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);                           
+//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); 
+//                           val4 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);  
+                      }
+                    }                    
+                    }}
+                
+                 val1 = 0.111f*val1; val2 = 0.111f*val2;
+//                  val3 = 0.111f*val3; val4 = 0.111f*val4;
+                 if ((val1 <= thresh_val) && (val2 <= thresh_val)) Map[dimX*dimY*k + i*dimY + j] = 1;                        
+//                  if ((val3 <= thresh_val) && (val4 <= thresh_val)) Map2[dimX*dimY*k + i*dimY + j] = 1;                        
+             }}}
+     return 1;
+}
+
+float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ)
+{  
+    int i, j, k, i1, j1, i2, j2, counter; 
+     #pragma omp parallel for shared(Map) private(i, j, k, i1, j1, i2, j2, counter)
+     for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {  
+             for(k=0; k<dimZ; k++) {   
+                    
+                 counter=0;
+                 for(i1=-3; i1<=3; i1++) {
+                    for(j1=-3; j1<=3; j1++) {
+                    i2 = i+i1;
+                    j2 = j+j1;
+                    if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
+                    if (Map[dimX*dimY*k + i2*dimY + j2] == 0) counter++;                                       
+                    }
+                    }}
+                 if (counter < 24) Map[dimX*dimY*k + i*dimY + j] = 1;
+             }}}
+    return *Map;
+}
+
+    /* Copy Image */
+    float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
+    {
+        int j;
+#pragma omp parallel for shared(A, U) private(j)
+        for(j=0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
+        return *U;
+    }
+    /*********************3D *********************/
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/PatchBased_Regul.c b/main_func/regularizers_CPU/PatchBased_Regul.c
new file mode 100644
index 0000000..1ed29d4
--- /dev/null
+++ b/main_func/regularizers_CPU/PatchBased_Regul.c
@@ -0,0 +1,295 @@
+#define _USE_MATH_DEFINES
+
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
+ * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+ * 
+ * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+ *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+ *
+ * Input Parameters (mandatory):
+ * 1. Image (2D or 3D)
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
+ * 4. h - parameter for the PB penalty function
+ * 5. lambda - regularization parameter 
+
+ * Output:
+ * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
+ *
+ * Quick 2D denoising example in Matlab:   
+   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+   ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); 
+ *
+ * Please see more tests in a file: 
+   TestTemporalSmoothing.m
+ 
+ *
+ * Matlab + C/mex compilers needed
+ * to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
+ *
+ * D. Kazantsev *
+ * 02/07/2014
+ * Harwell, UK
+ */
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY,  int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
+float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda);
+float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda);      
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[]) 
+{    
+    int N, M, Z, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop;
+    const int  *dims;
+    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, h, lambda;
+    
+    numdims = mxGetNumberOfDimensions(prhs[0]);
+    dims = mxGetDimensions(prhs[0]);
+    
+    N = dims[0];
+    M = dims[1];
+    Z = dims[2];
+    
+    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");}
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    
+    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
+    
+    /*Handling inputs*/
+    A  = (float *) mxGetData(prhs[0]);    /* the image to regularize/filter */
+    SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
+    SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
+    h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
+    lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */   
+
+    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
+    if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0");
+       
+    SearchW = SearchW_real + 2*SimilW;
+    
+    /* SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    /* SimilW_full = 2*SimilW + 1;  */  /* the full similarity window  size */
+
+    
+    padXY = SearchW + 2*SimilW; /* padding sizes */
+    newsizeX = N + 2*(padXY); /* the X size of the padded array */
+    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
+    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
+    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
+    
+    /******************************2D case ****************************/
+    if (numdims == 2) {
+        /*Handling output*/
+        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        /**************************************************************************/
+        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        PB_FUNC2D(Ap, Bp, newsizeY, newsizeX, padXY, SearchW, SimilW, (float)h, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+    }
+    else
+    {
+        /******************************3D case ****************************/
+        /*Handling output*/
+        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        /**************************************************************************/
+        
+        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        PB_FUNC3D(Ap, Bp, newsizeY, newsizeX, newsizeZ, padXY, SearchW, SimilW, (float)h, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+    } /*end else ndims*/ 
+}    
+    
+/*2D version*/
+float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda)
+{
+    int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full;
+    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob, t1;
+                 
+    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    denh2 = 1/(2*h2);   
+    
+     /*Gaussian kernel */
+    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full,sizeof(float));
+    count = 0;
+    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+            t1 = pow(((float)i_n), 2) + pow(((float)j_n), 2);
+            Eucl_Vec[count] = exp(-(t1)/(2*SimilW*SimilW));
+            count = count + 1;                       
+        }} /*main neighb loop */   
+    
+    /*The NLM code starts here*/         
+    /* setting OMP here */
+    #pragma omp parallel for shared (A, B, dimX, dimY, Eucl_Vec, lambda, denh2) private(denom, i, j, WeightGlob, count,  i1, j1, i2, j2, i3, j3, i5, j5, Weight_norm, normsum, i_m, j_m, i_n, j_n, i_l, j_l, i_p, j_p, Weight,  value)
+    
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+             if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY))) {
+          
+                /* Massive Search window loop */
+                Weight_norm = 0; value = 0.0;
+                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
+                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
+                       /*checking boundaries*/
+                        i1 = i+i_m; j1 = j+j_m;
+                        
+                        WeightGlob = 0.0;
+                        /* if inside the searching window */                        
+                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                 i2 = i1+i_l; j2 = j1+j_l;
+                                 
+                                 i3 = i+i_l; j3 = j+j_l;   /*coordinates of the inner patch loop */
+                                
+                                 count = 0; normsum = 0.0;
+                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
+                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
+                                         i5 = i2 + i_p; j5 = j2 + j_p;
+                                         normsum = normsum + Eucl_Vec[count]*pow(A[(i3+i_p)*dimY+(j3+j_p)]-A[i5*dimY+j5], 2);        
+                                         count = count + 1;
+                                     }}
+                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
+                                  else Weight = 0.0;
+                                 WeightGlob += Weight;
+                             }}                      
+      
+                         value += A[i1*dimY+j1]*WeightGlob;
+                         Weight_norm += WeightGlob;           
+                    }}      /*search window loop end*/
+                
+                /* the final loop to average all values in searching window with weights */
+                denom = 1 + lambda*Weight_norm;
+                B[i*dimY+j] = (A[i*dimY+j] + lambda*value)/denom;         
+             }
+        }}   /*main loop*/     
+    return (*B);
+    free(Eucl_Vec);    
+}
+ 
+/*3D version*/ 
+ float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda)       
+ {
+    int SimilW_full, count, i, j, k,  i_n, j_n, k_n, i_m, j_m, k_m, i_p, j_p, k_p, i_l, j_l, k_l, i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5;
+    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob;
+        
+    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    denh2 = 1/(2*h2);
+    
+    /*Gaussian kernel */
+    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full*SimilW_full,sizeof(float));
+    count = 0;
+    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+            for(k_n=-SimilW; k_n<=SimilW; k_n++) {
+                Eucl_Vec[count] = exp(-(pow((float)i_n, 2) + pow((float)j_n, 2) + pow((float)k_n, 2))/(2*SimilW*SimilW*SimilW));
+                count = count + 1;
+            }}} /*main neighb loop */
+    
+    /*The NLM code starts here*/         
+    /* setting OMP here */
+    #pragma omp parallel for shared (A, B, dimX, dimY, dimZ, Eucl_Vec, lambda, denh2) private(denom, i, j, k, WeightGlob,count,  i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5, Weight_norm, normsum, i_m, j_m, k_m, i_n, j_n, k_n, i_l, j_l, k_l, i_p, j_p, k_p, Weight, value)    
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+            if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY)) &&  ((k >= padXY) && (k < dimZ-padXY))) {
+            /* take all elements around the pixel of interest */                             
+               /* Massive Search window loop */
+                Weight_norm = 0;  value = 0.0;
+                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
+                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
+                        for(k_m=-SearchW; k_m<=SearchW; k_m++) {
+                         /*checking boundaries*/
+                        i1 = i+i_m; j1 = j+j_m; k1 = k+k_m;
+                        
+                        WeightGlob = 0.0;
+                        /* if inside the searching window */                        
+                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                 for(k_l=-SimilW; k_l<=SimilW; k_l++) {                                 
+                                 i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
+                                 
+                                 i3 = i+i_l; j3 = j+j_l; k3 = k+k_l;   /*coordinates of the inner patch loop */
+                                
+                                 count = 0; normsum = 0.0;
+                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
+                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
+                                         for(k_p=-SimilW; k_p<=SimilW; k_p++) {
+                                         i5 = i2 + i_p; j5 = j2 + j_p; k5 = k2 + k_p;
+                                         normsum = normsum + Eucl_Vec[count]*pow(A[(dimX*dimY)*(k3+k_p)+(i3+i_p)*dimY+(j3+j_p)]-A[(dimX*dimY)*k5 + i5*dimY+j5], 2);        
+                                         count = count + 1;
+                                     }}}
+                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
+                                  else Weight = 0.0;
+                                 WeightGlob += Weight;
+                             }}}                                                 
+                         value += A[(dimX*dimY)*k1 + i1*dimY+j1]*WeightGlob;
+                         Weight_norm += WeightGlob;
+             
+                    }}}      /*search window loop end*/
+                
+                /* the final loop to average all values in searching window with weights */
+                denom = 1 + lambda*Weight_norm;               
+                B[(dimX*dimY)*k + i*dimY+j] = (A[(dimX*dimY)*k + i*dimY+j] + lambda*value)/denom;      
+            }            
+        }}}   /*main loop*/              
+       free(Eucl_Vec);        
+       return *B;
+}
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
+{
+    /* padding-cropping function */
+    int i,j,k;    
+    if (NewSizeZ > 1) {    
+           for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+              for (k=0; k < NewSizeZ; k++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
+                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
+                }
+            }}}   
+    }
+    else {
+        for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
+                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
+                }
+            }}
+    }
+    return *Ap;
+}
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/SplitBregman_TV.c b/main_func/regularizers_CPU/SplitBregman_TV.c
new file mode 100644
index 0000000..f143aa6
--- /dev/null
+++ b/main_func/regularizers_CPU/SplitBregman_TV.c
@@ -0,0 +1,399 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon - tolerance constant [OPTIONAL parameter]
+ * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ *
+ * Output:
+ * Filtered/regularized image
+ *
+ * Example:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
+ * u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
+ *
+ * to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * References:
+ * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
+ * D. Kazantsev, 2016*
+ */
+
+float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
+float gauss_seidel2D(float *U,  float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
+
+float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
+    const int  *dim_array;
+    float *A, *U=NULL, *U_old=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL, lambda, mu, epsil, re, re1, re_old;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
+    
+    /*Handling Matlab input data*/
+    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    mu =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 35; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if (nrhs == 5)  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    lambda = 2.0f*mu;
+    count = 1;
+    re_old = 0.0f;
+    /*Handling Matlab output data*/
+    dimY = dim_array[0]; dimX = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dy = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Bx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        By = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /*storing old values*/
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*GS iteration */
+            gauss_seidel2D(U, A, Dx, Dy, Bx, By, dimX, dimY, lambda, mu);
+            
+            if (methTV == 1)  updDxDy_shrinkAniso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
+            else updDxDy_shrinkIso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
+            
+            updBxBy2D(U, Dx, Dy, Bx, By, dimX, dimY);
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U_old[j] - U[j],2);
+                re1 += pow(U_old[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+            if (count > 4) break;
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break;
+            }
+            re_old = re;
+            /*printf("%f %i %i \n", re, ll, count); */
+            
+            /*copyIm(U_old, U, dimX, dimY, dimZ); */
+        }
+        printf("SB iterations stopped at iteration: %i\n", ll);
+    }
+    if (number_of_dims == 3) {
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dy = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Bx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        By = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Bz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /*storing old values*/
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*GS iteration */
+            gauss_seidel3D(U, A, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu);
+            
+            if (methTV == 1) updDxDyDz_shrinkAniso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
+            else updDxDyDz_shrinkIso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
+            
+            updBxByBz3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ);
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U[j] - U_old[j],2);
+                re1 += pow(U[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+            if (count > 4) break;
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break; }
+            /*printf("%f %i %i \n", re, ll, count); */
+            re_old = re;
+        }
+        printf("SB iterations stopped at iteration: %i\n", ll);
+    }
+}
+
+/* 2D-case related Functions */
+/*****************************************************************/
+float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu)
+{
+    float sum, normConst;
+    int i,j,i1,i2,j1,j2;
+    normConst = 1.0f/(mu + 4.0f*lambda);
+    
+#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,sum)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            
+            sum = Dx[(i2)*dimY + (j)] - Dx[(i)*dimY + (j)] + Dy[(i)*dimY + (j2)] - Dy[(i)*dimY + (j)] - Bx[(i2)*dimY + (j)] + Bx[(i)*dimY + (j)] - By[(i)*dimY + (j2)] + By[(i)*dimY + (j)];
+            sum += (U[(i1)*dimY + (j)] + U[(i2)*dimY + (j)] + U[(i)*dimY + (j1)] + U[(i)*dimY + (j2)]);
+            sum *= lambda;
+            sum += mu*A[(i)*dimY + (j)];
+            U[(i)*dimY + (j)] = normConst*sum;
+        }}
+    return *U;
+}
+
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
+{
+    int i,j,i1,j1;
+    float val1, val11, val2, val22, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,val22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            
+            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
+            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
+            
+            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+            
+            if (val1 !=0) Dx[(i)*dimY + (j)] = (val1/fabs(val1))*val11; else Dx[(i)*dimY + (j)] = 0;
+            if (val2 !=0) Dy[(i)*dimY + (j)] = (val2/fabs(val2))*val22; else Dy[(i)*dimY + (j)] = 0;
+            
+        }}
+    return 1;
+}
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
+{
+    int i,j,i1,j1;
+    float val1, val11, val2, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+    
+#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,denom)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            
+            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
+            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
+            
+            denom = sqrt(val1*val1 + val2*val2);
+            
+            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[(i)*dimY + (j)] = val11*(val1/denom);
+                Dy[(i)*dimY + (j)] = val11*(val2/denom);
+            }
+            else {
+                Dx[(i)*dimY + (j)] = 0;
+                Dy[(i)*dimY + (j)] = 0;
+            }
+        }}
+    return 1;
+}
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY)
+{
+    int i,j,i1,j1;
+#pragma omp parallel for shared(U) private(i,j,i1,j1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            
+            Bx[(i)*dimY + (j)] = Bx[(i)*dimY + (j)] + ((U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) - Dx[(i)*dimY + (j)]);
+            By[(i)*dimY + (j)] = By[(i)*dimY + (j)] + ((U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) - Dy[(i)*dimY + (j)]);
+        }}
+    return 1;
+}
+
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu)
+{
+    float normConst, d_val, b_val, sum;
+    int i,j,i1,i2,j1,j2,k,k1,k2;
+    normConst = 1.0f/(mu + 6.0f*lambda);
+#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                j2 = j-1; if (j2 < 0) j2 = j+1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                k2 = k-1; if (k2 < 0) k2 = k+1;
+                
+                d_val = Dx[(dimX*dimY)*k + (i2)*dimY + (j)] - Dx[(dimX*dimY)*k + (i)*dimY + (j)] + Dy[(dimX*dimY)*k + (i)*dimY + (j2)] - Dy[(dimX*dimY)*k + (i)*dimY + (j)] + Dz[(dimX*dimY)*k2 + (i)*dimY + (j)] - Dz[(dimX*dimY)*k + (i)*dimY + (j)];
+                b_val = -Bx[(dimX*dimY)*k + (i2)*dimY + (j)] + Bx[(dimX*dimY)*k + (i)*dimY + (j)] - By[(dimX*dimY)*k + (i)*dimY + (j2)] + By[(dimX*dimY)*k + (i)*dimY + (j)] - Bz[(dimX*dimY)*k2 + (i)*dimY + (j)] + Bz[(dimX*dimY)*k + (i)*dimY + (j)];
+                sum =  d_val + b_val;
+                sum += U[(dimX*dimY)*k + (i1)*dimY + (j)] + U[(dimX*dimY)*k + (i2)*dimY + (j)] + U[(dimX*dimY)*k + (i)*dimY + (j1)] + U[(dimX*dimY)*k + (i)*dimY + (j2)] + U[(dimX*dimY)*k1 + (i)*dimY + (j)] + U[(dimX*dimY)*k2 + (i)*dimY + (j)];
+                sum *= lambda;
+                sum += mu*A[(dimX*dimY)*k + (i)*dimY + (j)];
+                U[(dimX*dimY)*k + (i)*dimY + (j)] = normConst*sum;
+            }}}
+    return *U;
+}
+
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
+{
+    int i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val22, val3, val33, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + (i)*dimY + (j);
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
+                
+                val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+                val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+                val33 = fabs(val3) - denom_lam; if (val33 < 0) val33 = 0;
+                
+                if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
+                if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
+                if (val3 !=0) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0;
+                
+            }}}
+    return 1;
+}
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
+{
+    int i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val3, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + (i)*dimY + (j);
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
+                
+                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+                
+                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+                
+                if (denom != 0.0f) {
+                    Dx[index] = val11*(val1/denom);
+                    Dy[index] = val11*(val2/denom);
+                    Dz[index] = val11*(val3/denom);
+                }
+                else {
+                    Dx[index] = 0;
+                    Dy[index] = 0;
+                    Dz[index] = 0;
+                }               
+            }}}
+    return 1;
+}
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ)
+{
+    int i,j,k,i1,j1,k1;
+#pragma omp parallel for shared(U) private(i,j,k,i1,j1,k1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                Bx[(dimX*dimY)*k + (i)*dimY + (j)] = Bx[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dx[(dimX*dimY)*k + (i)*dimY + (j)]);
+                By[(dimX*dimY)*k + (i)*dimY + (j)] = By[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dy[(dimX*dimY)*k + (i)*dimY + (j)]);
+                Bz[(dimX*dimY)*k + (i)*dimY + (j)] = Bz[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dz[(dimX*dimY)*k + (i)*dimY + (j)]);
+                
+            }}}
+    return 1;
+}
+/* General Functions */
+/*****************************************************************/
+/* Copy Image */
+float copyIm(float *A, float *B, int dimX, int dimY, int dimZ)
+{
+    int j;
+#pragma omp parallel for shared(A, B) private(j)
+    for(j=0; j<dimX*dimY*dimZ; j++)  B[j] = A[j];
+    return *B;
+}
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c
new file mode 100644
index 0000000..41f8615
--- /dev/null
+++ b/main_func/regularizers_CPU/TGV_PD.c
@@ -0,0 +1,353 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D)
+ * 2. lambda - regularization parameter
+ * 3. parameter to control first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of CP iterations
+ *
+ * Output:
+ * Filtered/regularized image 
+ *
+ * Example:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .03*randn(size(Im)); % adding noise
+ * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
+ *
+ * to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * References:
+ * K. Bredies "Total Generalized Variation"
+ *
+ * 28.11.16/Harwell
+ */
+ 
+/* 2D functions */
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
+float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1);
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma);
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0);
+float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau);
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
+/*3D functions*/
+float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma);
+
+float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll;
+    const int  *dim_array;
+    float *A, *U, *U_old, *P1, *P2, *P3, *Q1, *Q2, *Q3, *Q4, *Q5, *Q6, *Q7, *Q8, *Q9, *V1, *V1_old, *V2, *V2_old, *V3, *V3_old, lambda, L2, tau, sigma,  alpha1, alpha0;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    A  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
+    alpha1 =  (float) mxGetScalar(prhs[2]); /*first-order term*/
+    alpha0 =  (float) mxGetScalar(prhs[3]); /*second-order term*/
+    iter =  (int) mxGetScalar(prhs[4]); /*iterations number*/
+    if(nrhs != 5) mexErrMsgTxt("Five input parameters is reqired: Image(2D/3D), Regularization parameter, alpha1, alpha0, Iterations");
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];
+    
+    if (number_of_dims == 2) {
+        /*2D case*/
+        dimZ = 1;
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        /*dual variables*/
+        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        Q1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Q2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Q3 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));   }
+    else if (number_of_dims == 3) {
+        mexErrMsgTxt("The input data should be 2D");
+        /*3D case*/
+//         dimZ = dim_array[2];
+//         U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         Q1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q4 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q5 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q6 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q7 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q8 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q9 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         V1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));	  
+    }
+    else {mexErrMsgTxt("The input data should be 2D");}
+    
+    
+    /*printf("%i \n", i);*/
+    L2 = 12.0; /*Lipshitz constant*/
+    tau = 1.0/pow(L2,0.5);
+    sigma = 1.0/pow(L2,0.5);
+    
+    /*Copy A to U*/
+    copyIm(A, U, dimX, dimY, dimZ);
+    
+    if (number_of_dims == 2) {
+        /* Here primal-dual iterations begin for 2D */
+        for(ll = 0; ll < iter; ll++) {
+            
+            /* Calculate Dual Variable P */
+            DualP_2D(U, V1, V2, P1, P2, dimX, dimY, dimZ, sigma);
+            
+            /*Projection onto convex set for P*/
+            ProjP_2D(P1, P2, dimX, dimY, dimZ, alpha1);
+            
+            /* Calculate Dual Variable Q */
+            DualQ_2D(V1, V2, Q1, Q2, Q3, dimX, dimY, dimZ, sigma);
+            
+            /*Projection onto convex set for Q*/
+            ProjQ_2D(Q1, Q2, Q3, dimX, dimY, dimZ, alpha0);
+            
+            /*saving U into U_old*/
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_2D(U, A, P1, P2, dimX, dimY, dimZ, lambda, tau);
+            
+            /*get updated solution U*/
+            newU(U, U_old, dimX, dimY, dimZ);
+            
+            /*saving V into V_old*/
+            copyIm(V1, V1_old, dimX, dimY, dimZ);
+            copyIm(V2, V2_old, dimX, dimY, dimZ);
+            
+            /* upd V*/
+            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, dimZ, tau);
+            
+            /*get new V*/
+            newU(V1, V1_old, dimX, dimY, dimZ);
+            newU(V2, V2_old, dimX, dimY, dimZ);
+        } /*end of iterations*/
+    }
+    
+//     /*3D version*/
+//     if (number_of_dims == 3) {
+//         /* Here primal-dual iterations begin for 3D */
+//         for(ll = 0; ll < iter; ll++) {
+//             
+//             /* Calculate Dual Variable P */
+//             DualP_3D(U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
+//             
+//             /*Projection onto convex set for P*/
+//             ProjP_3D(P1, P2, P3, dimX, dimY, dimZ, alpha1);
+//             
+//             /* Calculate Dual Variable Q */
+//             DualQ_3D(V1, V2, V2, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, dimX, dimY, dimZ, sigma);
+//             
+//         } /*end of iterations*/
+//     }
+}
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j;
+#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i-1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
+            else P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i + 1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
+            if (j == dimY-1) P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j-1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
+            else  P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j+1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
+        }}
+    return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1)
+{
+    float grad_magn;
+    int i,j;
+#pragma omp parallel for shared(P1,P2) private(i,j,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            grad_magn = sqrt(pow(P1[i*dimY + (j)],2) + pow(P2[i*dimY + (j)],2));
+            grad_magn = grad_magn/alpha1;
+            if (grad_magn > 1.0) {
+                P1[i*dimY + (j)] = P1[i*dimY + (j)]/grad_magn;
+                P2[i*dimY + (j)] = P2[i*dimY + (j)]/grad_magn;
+            }
+        }}
+    return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j;
+    float q1, q2, q11, q22;
+#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,q1,q2,q11,q22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1)
+            { q1 = (V1[(i-1)*dimY + (j)] - V1[i*dimY + (j)]);
+              q11 = (V2[(i-1)*dimY + (j)] - V2[i*dimY + (j)]);
+            }
+            else {
+                q1 = (V1[(i+1)*dimY + (j)] - V1[i*dimY + (j)]);
+                q11 = (V2[(i+1)*dimY + (j)] - V2[i*dimY + (j)]);
+            }
+            if (j == dimY-1) {
+                q2 = (V2[(i)*dimY + (j-1)] - V2[i*dimY + (j)]);
+                q22 = (V1[(i)*dimY + (j-1)] - V1[i*dimY + (j)]);
+            }
+            else {
+                q2 = (V2[(i)*dimY + (j+1)] - V2[i*dimY + (j)]);
+                q22 = (V1[(i)*dimY + (j+1)] - V1[i*dimY + (j)]);
+            }
+            Q1[i*dimY + (j)] = Q1[i*dimY + (j)] + sigma*(q1);
+            Q2[i*dimY + (j)] = Q2[i*dimY + (j)] + sigma*(q2);
+            Q3[i*dimY + (j)] = Q3[i*dimY + (j)]  + sigma*(0.5f*(q11 + q22));
+        }}
+    return 1;
+}
+
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0)
+{
+    float grad_magn;
+    int i,j;
+#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            grad_magn = sqrt(pow(Q1[i*dimY + (j)],2) + pow(Q2[i*dimY + (j)],2) + 2*pow(Q3[i*dimY + (j)],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0) {
+                Q1[i*dimY + (j)] = Q1[i*dimY + (j)]/grad_magn;
+                Q2[i*dimY + (j)] = Q2[i*dimY + (j)]/grad_magn;
+                Q3[i*dimY + (j)] = Q3[i*dimY + (j)]/grad_magn;
+            }
+        }}
+    return 1;
+}
+/* Divergence and projection for P*/
+float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+    int i,j;
+    float P_v1, P_v2, div;
+#pragma omp parallel for shared(U,A,P1,P2) private(i,j,P_v1,P_v2,div)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            if (i == 0) P_v1 = (P1[i*dimY + (j)]);
+            else P_v1 = (P1[i*dimY + (j)] - P1[(i-1)*dimY + (j)]);
+            if (j == 0) P_v2 = (P2[i*dimY + (j)]);
+            else  P_v2 = (P2[i*dimY + (j)] - P2[(i)*dimY + (j-1)]);
+            div = P_v1 + P_v2;
+            U[i*dimY + (j)] = (lambda*(U[i*dimY + (j)] + tau*div) + tau*A[i*dimY + (j)])/(lambda + tau);
+        }}
+    return *U;
+}
+/*get updated solution U*/
+float newU(float *U, float *U_old, int dimX, int dimY, int dimZ)
+{
+    int i;
+#pragma omp parallel for shared(U,U_old) private(i)
+    for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2*U[i] - U_old[i];
+    return *U;
+}
+
+/*get update for V*/
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau)
+{
+    int i,j;
+    float q1, q11, q2, q22, div1, div2;
+#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i,j, q1, q11, q2, q22, div1, div2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == 0) {
+                q1 = (Q1[i*dimY + (j)]);
+                q11 = (Q3[i*dimY + (j)]);
+            }
+            else {
+                q1 = (Q1[i*dimY + (j)] - Q1[(i-1)*dimY + (j)]);
+                q11 = (Q3[i*dimY + (j)] - Q3[(i-1)*dimY + (j)]);
+            }
+            if (j == 0) {
+                q2 = (Q2[i*dimY + (j)]);
+                q22 = (Q3[i*dimY + (j)]);
+            }
+            else  {
+                q2 = (Q2[i*dimY + (j)] - Q2[(i)*dimY + (j-1)]);
+                q22 = (Q3[i*dimY + (j)] - Q3[(i)*dimY + (j-1)]);
+            }
+            div1 = q1 + q22;
+            div2 = q2 + q11;
+            V1[i*dimY + (j)] = V1[i*dimY + (j)] + tau*(P1[i*dimY + (j)] + div1);
+            V2[i*dimY + (j)] = V2[i*dimY + (j)] + tau*(P2[i*dimY + (j)] + div2);
+        }}
+    return 1;
+}
+/* Copy Image */
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
+{
+    int j;
+#pragma omp parallel for shared(A, U) private(j)
+    for(j=0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
+    return *U;
+}
+/*********************3D *********************/
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j,k;
+#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                if (i == dimX-1) P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i-1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
+                else P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i + 1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
+                if (j == dimY-1) P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j-1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
+                else  P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j+1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
+                if (k == dimZ-1) P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k-1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
+                else  P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k+1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
+            }}}
+    return 1;
+}
\ No newline at end of file
-- 
cgit v1.2.3