Using memcpy and openmp

Thiago Franco de Moraes
1 parent ae0a6c50
Showing 1 changed file with 83 additions and 32 deletions Show diff stats
invesalius/data/smooth_cy.pyx
@@ -3,6 +3,7 @@ cimport numpy as np
 cimport cython
  
 from libc.math cimport floor, ceil, sqrt, fabs, round
+from libc.string cimport memcpy
 from cython.parallel import prange
  
 DTYPE8 = np.uint8
@@ -15,6 +16,7 @@ ctypedef np.float64_t DTYPEF64_t
  
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef inline DTYPEF64_t GS(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
     cdef int dz = I.shape[0]
     cdef int dy = I.shape[1]
@@ -31,6 +33,7 @@ cdef inline DTYPEF64_t GS(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
  
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef void perim(DTYPE8_t[:, :, :] image,
                 DTYPE8_t[:, :, :] out) nogil:
  
@@ -58,6 +61,7 @@ cdef void perim(DTYPE8_t[:, :, :] image,
  
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef DTYPEF64_t calculate_H(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
     # double fx, fy, fz, fxx, fyy, fzz, fxy, fxz, fyz, H
     cdef DTYPEF64_t fx, fy, fz, fxx, fyy, fzz, fxy, fxz, fyz, H
@@ -101,6 +105,7 @@ cdef DTYPEF64_t calculate_H(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
  
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
+@cython.wraparound(False)
 cdef void replicate(DTYPEF64_t[:, :, :] source, DTYPEF64_t[:, :, :] dest) nogil:
     cdef int dz = source.shape[0]
     cdef int dy = source.shape[1]
@@ -111,28 +116,71 @@ cdef void replicate(DTYPEF64_t[:, :, :] source, DTYPEF64_t[:, :, :] dest) nogil:
             for x in xrange(dx):
                 dest[z, y, x] = source[z, y, x]
  
+@cython.boundscheck(False) # turn of bounds-checking for entire function
+@cython.cdivision(True)
+@cython.wraparound(False)
+cdef void replicate8(DTYPE8_t[:, :, :] source, DTYPE8_t[:, :, :] dest) nogil:
+    cdef int dz = source.shape[0]
+    cdef int dy = source.shape[1]
+    cdef int dx = source.shape[2]
+    cdef int x, y, z
+    for z in prange(dz, nogil=True):
+        for y in xrange(dy):
+            for x in xrange(dx):
+                dest[z, y, x] = source[z, y, x]
+
+
+@cython.boundscheck(False) # turn of bounds-checking for entire function
+@cython.cdivision(True)
+@cython.wraparound(False)
+cdef void _smooth(DTYPE8_t[:, :, :] image, DTYPEF64_t[:, :, :] aux, DTYPE8_t[:, :, :] mask, int x, int y, int z, DTYPEF64_t[:, :, :] out) nogil:
+
+    cdef DTYPEF64_t H, v, cn
+    cdef DTYPEF64_t dt=1/6.0
+    H = calculate_H(aux, z, y, x)
+    v = aux[z, y, x] + dt*H
+
+    if image[z, y, x]:
+        if v < 0:
+            out[z, y, x] = 0.00001
+        else:
+            out[z, y, x] = v
+    else:
+        if v > 0:
+            out[z, y, x] = -0.00001
+        else:
+            out[z, y, x] = v
+
  
 @cython.boundscheck(False) # turn of bounds-checking for entire function
 @cython.cdivision(True)
-def smooth(np.ndarray[DTYPE8_t, ndim=3] image,
+@cython.wraparound(False)
+def smooth(DTYPE8_t[:, :, :] image,
            int n, int bsize,
-           np.ndarray[DTYPEF64_t,  ndim=3] out):
+           DTYPEF64_t[:, :, :] out):
  
-    cdef np.ndarray[DTYPE8_t, ndim=3] mask = np.zeros_like(image)
-    cdef np.ndarray[DTYPE8_t, ndim=3] _mask = np.zeros_like(image)
-    cdef np.ndarray[DTYPEF64_t, ndim=3] aux = np.zeros_like(out)
+    cdef DTYPE8_t[:, :, :] mask = np.zeros_like(image)
+    cdef DTYPE8_t[:, :, :] _mask = np.zeros_like(image)
+    cdef DTYPEF64_t[:, :, :] aux = np.zeros_like(out)
  
     cdef int i, x, y, z, S
     cdef DTYPEF64_t H, v, cn
     cdef DTYPEF64_t diff=0.0
     cdef DTYPEF64_t dt=1/6.0
  
+
     cdef DTYPEF64_t E = 0.001
  
-    _mask[:] = image
+    print ">>>>>>>>>", image.size
+
+    # _mask[:] = image
+    # replicate8(image, _mask)
+    memcpy(&_mask[0, 0, 0], &image[0, 0, 0], image.nbytes)
     for i in xrange(bsize):
         perim(_mask, mask)
-        _mask[:] = mask
+        # _mask[:] = mask
+        # replicate8(mask, _mask)
+        memcpy(&_mask[0, 0, 0], &mask[0, 0, 0], mask.nbytes)
         print i
  
     # out[:] = mask
@@ -158,33 +206,36 @@ def smooth(np.ndarray[DTYPE8_t, ndim=3] image,
                     S += 1
  
     for i in xrange(n):
-        replicate(out, aux)
+        # replicate(out, aux)
+        memcpy(&aux[0, 0, 0], &out[0, 0, 0], out.nbytes)
         diff = 0.0
  
-        for z in xrange(dz):
+        for z in prange(dz, nogil=True):
             for y in xrange(dy):
                 for x in xrange(dx):
                     if mask[z, y, x]:
-                        H = calculate_H(aux, z, y, x)
-                        v = aux[z, y, x] + dt*H
-
-                        if image[z, y, x]:
-                            if v < 0:
-                                out[z, y, x] = 0.00001
-                            else:
-                                out[z, y, x] = v
-                        else:
-                            if v > 0:
-                                out[z, y, x] = -0.00001
-                            else:
-                                out[z, y, x] = v
-
-                        diff += (out[z, y, x] - aux[z, y, x])*(out[z, y, x] - aux[z, y, x])
-
-        cn = sqrt((1.0/S) * diff)
-        print "%d - CN: %.28f - diff: %.28f\n" % (i, cn, diff)
-
-        if cn <= E:
-            break
-
-    return mask
+                        _smooth(image, aux, mask, x, y, z, out)
+                        # H = calculate_H(aux, z, y, x)
+                        # v = aux[z, y, x] + dt*H
+
+                        # if image[z, y, x]:
+                            # # if v < 0:
+                                # # out[z, y, x] = 0.00001
+                            # # else:
+                            # out[z, y, x] = v
+                        # else:
+                            # # if v > 0:
+                                # # out[z, y, x] = -0.00001
+                            # # else:
+                            # out[z, y, x] = v
+
+                    # diff += (out[z, y, x] - aux[z, y, x])*(out[z, y, x] - aux[z, y, x])
+
+        # cn = sqrt((1.0/S) * diff)
+        # print "%d - CN: %.28f - diff: %.28f\n" % (i, cn, diff)
+        print "Step %d" % i
+
+        # if cn <= E:
+            # break
+
+    return np.asarray(mask)
...	...	@@ -3,6 +3,7 @@ cimport numpy as np
3	3	cimport cython
4	4
5	5	from libc.math cimport floor, ceil, sqrt, fabs, round
	6	+from libc.string cimport memcpy
6	7	from cython.parallel import prange
7	8
8	9	DTYPE8 = np.uint8
...	...	@@ -15,6 +16,7 @@ ctypedef np.float64_t DTYPEF64_t
15	16
16	17	@cython.boundscheck(False) # turn of bounds-checking for entire function
17	18	@cython.cdivision(True)
	19	+@cython.wraparound(False)
18	20	cdef inline DTYPEF64_t GS(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
19	21	cdef int dz = I.shape[0]
20	22	cdef int dy = I.shape[1]
...	...	@@ -31,6 +33,7 @@ cdef inline DTYPEF64_t GS(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
31	33
32	34	@cython.boundscheck(False) # turn of bounds-checking for entire function
33	35	@cython.cdivision(True)
	36	+@cython.wraparound(False)
34	37	cdef void perim(DTYPE8_t[:, :, :] image,
35	38	DTYPE8_t[:, :, :] out) nogil:
36	39
...	...	@@ -58,6 +61,7 @@ cdef void perim(DTYPE8_t[:, :, :] image,
58	61
59	62	@cython.boundscheck(False) # turn of bounds-checking for entire function
60	63	@cython.cdivision(True)
	64	+@cython.wraparound(False)
61	65	cdef DTYPEF64_t calculate_H(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
62	66	# double fx, fy, fz, fxx, fyy, fzz, fxy, fxz, fyz, H
63	67	cdef DTYPEF64_t fx, fy, fz, fxx, fyy, fzz, fxy, fxz, fyz, H
...	...	@@ -101,6 +105,7 @@ cdef DTYPEF64_t calculate_H(DTYPEF64_t[:, :, :] I, int z, int y, int x) nogil:
101	105
102	106	@cython.boundscheck(False) # turn of bounds-checking for entire function
103	107	@cython.cdivision(True)
	108	+@cython.wraparound(False)
104	109	cdef void replicate(DTYPEF64_t[:, :, :] source, DTYPEF64_t[:, :, :] dest) nogil:
105	110	cdef int dz = source.shape[0]
106	111	cdef int dy = source.shape[1]
...	...	@@ -111,28 +116,71 @@ cdef void replicate(DTYPEF64_t[:, :, :] source, DTYPEF64_t[:, :, :] dest) nogil:
111	116	for x in xrange(dx):
112	117	dest[z, y, x] = source[z, y, x]
113	118
	119	+@cython.boundscheck(False) # turn of bounds-checking for entire function
	120	+@cython.cdivision(True)
	121	+@cython.wraparound(False)
	122	+cdef void replicate8(DTYPE8_t[:, :, :] source, DTYPE8_t[:, :, :] dest) nogil:
	123	+ cdef int dz = source.shape[0]
	124	+ cdef int dy = source.shape[1]
	125	+ cdef int dx = source.shape[2]
	126	+ cdef int x, y, z
	127	+ for z in prange(dz, nogil=True):
	128	+ for y in xrange(dy):
	129	+ for x in xrange(dx):
	130	+ dest[z, y, x] = source[z, y, x]
	131	+
	132	+
	133	+@cython.boundscheck(False) # turn of bounds-checking for entire function
	134	+@cython.cdivision(True)
	135	+@cython.wraparound(False)
	136	+cdef void _smooth(DTYPE8_t[:, :, :] image, DTYPEF64_t[:, :, :] aux, DTYPE8_t[:, :, :] mask, int x, int y, int z, DTYPEF64_t[:, :, :] out) nogil:
	137	+
	138	+ cdef DTYPEF64_t H, v, cn
	139	+ cdef DTYPEF64_t dt=1/6.0
	140	+ H = calculate_H(aux, z, y, x)
	141	+ v = aux[z, y, x] + dt*H
	142	+
	143	+ if image[z, y, x]:
	144	+ if v < 0:
	145	+ out[z, y, x] = 0.00001
	146	+ else:
	147	+ out[z, y, x] = v
	148	+ else:
	149	+ if v > 0:
	150	+ out[z, y, x] = -0.00001
	151	+ else:
	152	+ out[z, y, x] = v
	153	+
114	154
115	155	@cython.boundscheck(False) # turn of bounds-checking for entire function
116	156	@cython.cdivision(True)
117		-def smooth(np.ndarray[DTYPE8_t, ndim=3] image,
	157	+@cython.wraparound(False)
	158	+def smooth(DTYPE8_t[:, :, :] image,
118	159	int n, int bsize,
119		- np.ndarray[DTYPEF64_t, ndim=3] out):
	160	+ DTYPEF64_t[:, :, :] out):
120	161
121		- cdef np.ndarray[DTYPE8_t, ndim=3] mask = np.zeros_like(image)
122		- cdef np.ndarray[DTYPE8_t, ndim=3] _mask = np.zeros_like(image)
123		- cdef np.ndarray[DTYPEF64_t, ndim=3] aux = np.zeros_like(out)
	162	+ cdef DTYPE8_t[:, :, :] mask = np.zeros_like(image)
	163	+ cdef DTYPE8_t[:, :, :] _mask = np.zeros_like(image)
	164	+ cdef DTYPEF64_t[:, :, :] aux = np.zeros_like(out)
124	165
125	166	cdef int i, x, y, z, S
126	167	cdef DTYPEF64_t H, v, cn
127	168	cdef DTYPEF64_t diff=0.0
128	169	cdef DTYPEF64_t dt=1/6.0
129	170
	171	+
130	172	cdef DTYPEF64_t E = 0.001
131	173
132		- _mask[:] = image
	174	+ print ">>>>>>>>>", image.size
	175	+
	176	+ # _mask[:] = image
	177	+ # replicate8(image, _mask)
	178	+ memcpy(&_mask[0, 0, 0], &image[0, 0, 0], image.nbytes)
133	179	for i in xrange(bsize):
134	180	perim(_mask, mask)
135		- _mask[:] = mask
	181	+ # _mask[:] = mask
	182	+ # replicate8(mask, _mask)
	183	+ memcpy(&_mask[0, 0, 0], &mask[0, 0, 0], mask.nbytes)
136	184	print i
137	185
138	186	# out[:] = mask
...	...	@@ -158,33 +206,36 @@ def smooth(np.ndarray[DTYPE8_t, ndim=3] image,
158	206	S += 1
159	207
160	208	for i in xrange(n):
161		- replicate(out, aux)
	209	+ # replicate(out, aux)
	210	+ memcpy(&aux[0, 0, 0], &out[0, 0, 0], out.nbytes)
162	211	diff = 0.0
163	212
164		- for z in xrange(dz):
	213	+ for z in prange(dz, nogil=True):
165	214	for y in xrange(dy):
166	215	for x in xrange(dx):
167	216	if mask[z, y, x]:
168		- H = calculate_H(aux, z, y, x)
169		- v = aux[z, y, x] + dt*H
170		-
171		- if image[z, y, x]:
172		- if v < 0:
173		- out[z, y, x] = 0.00001
174		- else:
175		- out[z, y, x] = v
176		- else:
177		- if v > 0:
178		- out[z, y, x] = -0.00001
179		- else:
180		- out[z, y, x] = v
181		-
182		- diff += (out[z, y, x] - aux[z, y, x])*(out[z, y, x] - aux[z, y, x])
183		-
184		- cn = sqrt((1.0/S) * diff)
185		- print "%d - CN: %.28f - diff: %.28f\n" % (i, cn, diff)
186		-
187		- if cn <= E:
188		- break
189		-
190		- return mask
	217	+ _smooth(image, aux, mask, x, y, z, out)
	218	+ # H = calculate_H(aux, z, y, x)
	219	+ # v = aux[z, y, x] + dt*H
	220	+
	221	+ # if image[z, y, x]:
	222	+ # # if v < 0:
	223	+ # # out[z, y, x] = 0.00001
	224	+ # # else:
	225	+ # out[z, y, x] = v
	226	+ # else:
	227	+ # # if v > 0:
	228	+ # # out[z, y, x] = -0.00001
	229	+ # # else:
	230	+ # out[z, y, x] = v
	231	+
	232	+ # diff += (out[z, y, x] - aux[z, y, x])*(out[z, y, x] - aux[z, y, x])
	233	+
	234	+ # cn = sqrt((1.0/S) * diff)
	235	+ # print "%d - CN: %.28f - diff: %.28f\n" % (i, cn, diff)
	236	+ print "Step %d" % i
	237	+
	238	+ # if cn <= E:
	239	+ # break
	240	+
	241	+ return np.asarray(mask)
...	...