Using memcpy and openmp

Thiago Franco de Moraes
1 parent ae0a6c50
Showing 1 changed file with 83 additions and 32 deletions Show diff stats
invesalius/data/smooth_cy.pyx
@@ -3,6 +3,7 @@ cimport numpy as np
 cimport cython
 from libc.math cimport floor, ceil, sqrt, fabs, round
+from libc.string cimport memcpy
 from cython.parallel import prange
 DTYPE8 = np.uint8
@@ -15,6 +16,7 @@ ctypedef np.float64_t DTYPEF64_t
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef inline DTYPEF64_t GS(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
     cdef int dz = I.shape[0]
     cdef int dy = I.shape[1]
@@ -31,6 +33,7 @@ cdef inline DTYPEF64_t GS(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef void perim(DTYPE8_t[:, :, :] image,
                 DTYPE8_t[:, :, :] out) nogil:
@@ -58,6 +61,7 @@ cdef void perim(DTYPE8_t[:, :, :] image,
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef DTYPEF64_t calculate_H(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
     # double fx, fy, fz, fxx, fyy, fzz, fxy, fxz, fyz, H
     cdef DTYPEF64_t fx, fy, fz, fxx, fyy, fzz, fxy, fxz, fyz, H
@@ -101,6 +105,7 @@ cdef DTYPEF64_t calculate_H(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef void replicate(DTYPEF64_t[:, :, :] source, DTYPEF64_t[:, :, :] dest) nogil:
     cdef int dz = source.shape[0]
     cdef int dy = source.shape[1]
@@ -111,28 +116,71 @@ cdef void replicate(DTYPEF64_t[:, :, :] source, DTYPEF64_t[:, :, :] dest) nogil:
             for x in xrange(dx):
                 dest[z, y, x] = source[z, y, x]
+@cython.boundscheck(False) # turn of bounds-checking for entire function
+@cython.cdivision(True)
+@cython.wraparound(False)
+cdef void replicate8(DTYPE8_t[:, :, :] source, DTYPE8_t[:, :, :] dest) nogil:
+    cdef int dz = source.shape[0]
+    cdef int dy = source.shape[1]
+    cdef int dx = source.shape[2]
+    cdef int x, y, z
+    for z in prange(dz, nogil=True):
+        for y in xrange(dy):
+            for x in xrange(dx):
+                dest[z, y, x] = source[z, y, x]
+
+
+@cython.boundscheck(False) # turn of bounds-checking for entire function
+@cython.cdivision(True)
+@cython.wraparound(False)
+cdef void _smooth(DTYPE8_t[:, :, :] image, DTYPEF64_t[:, :, :] aux, DTYPE8_t[:, :, :] mask, int x, int y, int z, DTYPEF64_t[:, :, :] out) nogil:
+
+    cdef DTYPEF64_t H, v, cn
+    cdef DTYPEF64_t dt=1/6.0
+    H = calculate_H(aux, z, y, x)
+    v = aux[z, y, x] + dt*H
+
+    if image[z, y, x]:
+        if v < 0:
+            out[z, y, x] = 0.00001
+        else:
+            out[z, y, x] = v
+    else:
+        if v > 0:
+            out[z, y, x] = -0.00001
+        else:
+            out[z, y, x] = v
+
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
-def smooth(np.ndarray[DTYPE8_t, ndim=3] image,
+@cython.wraparound(False)
+def smooth(DTYPE8_t[:, :, :] image,
            int n, int bsize,
-           np.ndarray[DTYPEF64_t,  ndim=3] out):
+           DTYPEF64_t[:, :, :] out):
-    cdef np.ndarray[DTYPE8_t, ndim=3] mask = np.zeros_like(image)
-    cdef np.ndarray[DTYPE8_t, ndim=3] _mask = np.zeros_like(image)
-    cdef np.ndarray[DTYPEF64_t, ndim=3] aux = np.zeros_like(out)
+    cdef DTYPE8_t[:, :, :] mask = np.zeros_like(image)
+    cdef DTYPE8_t[:, :, :] _mask = np.zeros_like(image)
+    cdef DTYPEF64_t[:, :, :] aux = np.zeros_like(out)
     cdef int i, x, y, z, S
     cdef DTYPEF64_t H, v, cn
     cdef DTYPEF64_t diff=0.0
     cdef DTYPEF64_t dt=1/6.0
+
     cdef DTYPEF64_t E = 0.001
-    _mask[:] = image
+    print ">>>>>>>>>", image.size
+
+    # _mask[:] = image
+    # replicate8(image, _mask)
+    memcpy(&_mask[0, 0, 0], &image[0, 0, 0], image.nbytes)
     for i in xrange(bsize):
         perim(_mask, mask)
-        _mask[:] = mask
+        # _mask[:] = mask
+        # replicate8(mask, _mask)
+        memcpy(&_mask[0, 0, 0], &mask[0, 0, 0], mask.nbytes)
         print i
     # out[:] = mask
@@ -158,33 +206,36 @@ def smooth(np.ndarray[DTYPE8_t, ndim=3] image,
                     S += 1
     for i in xrange(n):
-        replicate(out, aux)
+        # replicate(out, aux)
+        memcpy(&aux[0, 0, 0], &out[0, 0, 0], out.nbytes)
         diff = 0.0
-        for z in xrange(dz):
+        for z in prange(dz, nogil=True):
             for y in xrange(dy):
                 for x in xrange(dx):
                     if mask[z, y, x]:
-                        H = calculate_H(aux, z, y, x)
-                        v = aux[z, y, x] + dt*H
-
-                        if image[z, y, x]:
-                            if v < 0:
-                                out[z, y, x] = 0.00001
-                            else:
-                                out[z, y, x] = v
-                        else:
-                            if v > 0:
-                                out[z, y, x] = -0.00001
-                            else:
-                                out[z, y, x] = v
-
-                        diff += (out[z, y, x] - aux[z, y, x])*(out[z, y, x] - aux[z, y, x])
-
-        cn = sqrt((1.0/S) * diff)
-        print "%d - CN: %.28f - diff: %.28f\n" % (i, cn, diff)
-
-        if cn <= E:
-            break
-
-    return mask
+                        _smooth(image, aux, mask, x, y, z, out)
+                        # H = calculate_H(aux, z, y, x)
+                        # v = aux[z, y, x] + dt*H
+
+                        # if image[z, y, x]:
+                            # # if v < 0:
+                                # # out[z, y, x] = 0.00001
+                            # # else:
+                            # out[z, y, x] = v
+                        # else:
+                            # # if v > 0:
+                                # # out[z, y, x] = -0.00001
+                            # # else:
+                            # out[z, y, x] = v
+
+                    # diff += (out[z, y, x] - aux[z, y, x])*(out[z, y, x] - aux[z, y, x])
+
+        # cn = sqrt((1.0/S) * diff)
+        # print "%d - CN: %.28f - diff: %.28f\n" % (i, cn, diff)
+        print "Step %d" % i
+
+        # if cn <= E:
+            # break
+
+    return np.asarray(mask)