Cythonic Laplacian solving on a 3D regular grid
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
1
down vote
favorite
I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.
Right now I'm happy with the results it returns, but I'm wondering if there was a more efficient way to code this, especially since the tutorial I have followed is about 10 years old. I have very limited knowledge of C and I'm not sure if the data types (ctypedef
) are properly chosen here and/or would have a significant impact on performance. I have tried types my indexes variables as unsigned int
in different ways but I ended up giving up as I'm not really sure if it is relevant.
Also, I'm happy with any general coding style and good practices advice.
The Python code that calls my Cython function is inside a class and looks like this:
def _solve_laplacian(self):
init = np.zeros_like(self.domain, float)
init[np.logical_not(self.border)] = 1
log.info("Solving Laplacian...")
laplace_grid, iterations, max_error = laplace_solver(
self.domain_idx_i,
self.domain_idx_j,
self.domain_idx_k,
init,
self.laplace_tolerance,
self.laplace_max_iter,
self.spacing)
log.debug(
f"Laplacian: iterations iterations, max_error = max_error")
self.cropped_laplace_grid = laplace_grid
self.laplacian_iterations = iterations
self.laplacian_max_error = max_error
and here is the actual cython function:
import numpy as np
cimport numpy as np
cimport cython
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
laplace_grid)
cdef DTYPE_INT iteration = 0
cdef DTYPE_FLOAT max_error = 1.0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT error
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while max_error > tolerance and iteration < max_iterations:
iteration += 1
prev_laplace_grid[:] = laplace_grid
for n in range(n_points):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value =
(
(laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2
) * factor
laplace_grid[i, j, k] = value
max_error = np.nanmax(
np.abs(
(prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
return laplace_grid, iteration, max_error
If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.
performance numpy cython
add a comment |Â
up vote
1
down vote
favorite
I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.
Right now I'm happy with the results it returns, but I'm wondering if there was a more efficient way to code this, especially since the tutorial I have followed is about 10 years old. I have very limited knowledge of C and I'm not sure if the data types (ctypedef
) are properly chosen here and/or would have a significant impact on performance. I have tried types my indexes variables as unsigned int
in different ways but I ended up giving up as I'm not really sure if it is relevant.
Also, I'm happy with any general coding style and good practices advice.
The Python code that calls my Cython function is inside a class and looks like this:
def _solve_laplacian(self):
init = np.zeros_like(self.domain, float)
init[np.logical_not(self.border)] = 1
log.info("Solving Laplacian...")
laplace_grid, iterations, max_error = laplace_solver(
self.domain_idx_i,
self.domain_idx_j,
self.domain_idx_k,
init,
self.laplace_tolerance,
self.laplace_max_iter,
self.spacing)
log.debug(
f"Laplacian: iterations iterations, max_error = max_error")
self.cropped_laplace_grid = laplace_grid
self.laplacian_iterations = iterations
self.laplacian_max_error = max_error
and here is the actual cython function:
import numpy as np
cimport numpy as np
cimport cython
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
laplace_grid)
cdef DTYPE_INT iteration = 0
cdef DTYPE_FLOAT max_error = 1.0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT error
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while max_error > tolerance and iteration < max_iterations:
iteration += 1
prev_laplace_grid[:] = laplace_grid
for n in range(n_points):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value =
(
(laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2
) * factor
laplace_grid[i, j, k] = value
max_error = np.nanmax(
np.abs(
(prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
return laplace_grid, iteration, max_error
If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.
performance numpy cython
add a comment |Â
up vote
1
down vote
favorite
up vote
1
down vote
favorite
I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.
Right now I'm happy with the results it returns, but I'm wondering if there was a more efficient way to code this, especially since the tutorial I have followed is about 10 years old. I have very limited knowledge of C and I'm not sure if the data types (ctypedef
) are properly chosen here and/or would have a significant impact on performance. I have tried types my indexes variables as unsigned int
in different ways but I ended up giving up as I'm not really sure if it is relevant.
Also, I'm happy with any general coding style and good practices advice.
The Python code that calls my Cython function is inside a class and looks like this:
def _solve_laplacian(self):
init = np.zeros_like(self.domain, float)
init[np.logical_not(self.border)] = 1
log.info("Solving Laplacian...")
laplace_grid, iterations, max_error = laplace_solver(
self.domain_idx_i,
self.domain_idx_j,
self.domain_idx_k,
init,
self.laplace_tolerance,
self.laplace_max_iter,
self.spacing)
log.debug(
f"Laplacian: iterations iterations, max_error = max_error")
self.cropped_laplace_grid = laplace_grid
self.laplacian_iterations = iterations
self.laplacian_max_error = max_error
and here is the actual cython function:
import numpy as np
cimport numpy as np
cimport cython
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
laplace_grid)
cdef DTYPE_INT iteration = 0
cdef DTYPE_FLOAT max_error = 1.0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT error
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while max_error > tolerance and iteration < max_iterations:
iteration += 1
prev_laplace_grid[:] = laplace_grid
for n in range(n_points):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value =
(
(laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2
) * factor
laplace_grid[i, j, k] = value
max_error = np.nanmax(
np.abs(
(prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
return laplace_grid, iteration, max_error
If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.
performance numpy cython
I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.
Right now I'm happy with the results it returns, but I'm wondering if there was a more efficient way to code this, especially since the tutorial I have followed is about 10 years old. I have very limited knowledge of C and I'm not sure if the data types (ctypedef
) are properly chosen here and/or would have a significant impact on performance. I have tried types my indexes variables as unsigned int
in different ways but I ended up giving up as I'm not really sure if it is relevant.
Also, I'm happy with any general coding style and good practices advice.
The Python code that calls my Cython function is inside a class and looks like this:
def _solve_laplacian(self):
init = np.zeros_like(self.domain, float)
init[np.logical_not(self.border)] = 1
log.info("Solving Laplacian...")
laplace_grid, iterations, max_error = laplace_solver(
self.domain_idx_i,
self.domain_idx_j,
self.domain_idx_k,
init,
self.laplace_tolerance,
self.laplace_max_iter,
self.spacing)
log.debug(
f"Laplacian: iterations iterations, max_error = max_error")
self.cropped_laplace_grid = laplace_grid
self.laplacian_iterations = iterations
self.laplacian_max_error = max_error
and here is the actual cython function:
import numpy as np
cimport numpy as np
cimport cython
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
laplace_grid)
cdef DTYPE_INT iteration = 0
cdef DTYPE_FLOAT max_error = 1.0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT error
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while max_error > tolerance and iteration < max_iterations:
iteration += 1
prev_laplace_grid[:] = laplace_grid
for n in range(n_points):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value =
(
(laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2
) * factor
laplace_grid[i, j, k] = value
max_error = np.nanmax(
np.abs(
(prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
return laplace_grid, iteration, max_error
If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.
performance numpy cython
edited Feb 9 at 13:16
Jamalâ¦
30.1k11114225
30.1k11114225
asked Feb 9 at 12:23
nicoco
24918
24918
add a comment |Â
add a comment |Â
1 Answer
1
active
oldest
votes
up vote
1
down vote
accepted
I was helped on the #scipy
IRC channel by user @ngoldbaum and could substantially speed up my code by:
- Avoiding to use any
numpy
methods in the main loop - Using OpenMP parallelization
- Using true C division as there is no risk of division by zero
- Avoiding to store the whole previous iteration values
â¦and some other minor stuff.
Here's what my solver looks like now:
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
DTYPE_INT n,
DTYPE_FLOAT tolerance):
cdef bint res = True
cdef DTYPE_INT i
for i in prange(n, nogil=True): # is prange useful here ?
if errors[i] > tolerance:
res = False
break
return res
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef DTYPE_INT iteration = 0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
np.float64)
cdef bint convergence = False
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while not convergence and iteration < max_iterations:
iteration += 1
for n in prange(n_points, nogil=True):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value = ((laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2) * factor
prev_value = laplace_grid[i, j, k]
laplace_grid[i, j, k] = value
errors[n] = fabs((prev_value - value) / prev_value)
if iteration == 1:
convergence = False
else:
convergence = has_converged(errors, n, tolerance)
return laplace_grid, iteration, np.nanmax(errors)
I think it could be a bit better if errors
were checked "on the fly", removing the second for
loop every while
iteration. However, I did not find out to do that efficiently in a multithread prange
⦠maybe it's not worth it!
add a comment |Â
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
1
down vote
accepted
I was helped on the #scipy
IRC channel by user @ngoldbaum and could substantially speed up my code by:
- Avoiding to use any
numpy
methods in the main loop - Using OpenMP parallelization
- Using true C division as there is no risk of division by zero
- Avoiding to store the whole previous iteration values
â¦and some other minor stuff.
Here's what my solver looks like now:
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
DTYPE_INT n,
DTYPE_FLOAT tolerance):
cdef bint res = True
cdef DTYPE_INT i
for i in prange(n, nogil=True): # is prange useful here ?
if errors[i] > tolerance:
res = False
break
return res
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef DTYPE_INT iteration = 0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
np.float64)
cdef bint convergence = False
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while not convergence and iteration < max_iterations:
iteration += 1
for n in prange(n_points, nogil=True):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value = ((laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2) * factor
prev_value = laplace_grid[i, j, k]
laplace_grid[i, j, k] = value
errors[n] = fabs((prev_value - value) / prev_value)
if iteration == 1:
convergence = False
else:
convergence = has_converged(errors, n, tolerance)
return laplace_grid, iteration, np.nanmax(errors)
I think it could be a bit better if errors
were checked "on the fly", removing the second for
loop every while
iteration. However, I did not find out to do that efficiently in a multithread prange
⦠maybe it's not worth it!
add a comment |Â
up vote
1
down vote
accepted
I was helped on the #scipy
IRC channel by user @ngoldbaum and could substantially speed up my code by:
- Avoiding to use any
numpy
methods in the main loop - Using OpenMP parallelization
- Using true C division as there is no risk of division by zero
- Avoiding to store the whole previous iteration values
â¦and some other minor stuff.
Here's what my solver looks like now:
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
DTYPE_INT n,
DTYPE_FLOAT tolerance):
cdef bint res = True
cdef DTYPE_INT i
for i in prange(n, nogil=True): # is prange useful here ?
if errors[i] > tolerance:
res = False
break
return res
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef DTYPE_INT iteration = 0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
np.float64)
cdef bint convergence = False
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while not convergence and iteration < max_iterations:
iteration += 1
for n in prange(n_points, nogil=True):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value = ((laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2) * factor
prev_value = laplace_grid[i, j, k]
laplace_grid[i, j, k] = value
errors[n] = fabs((prev_value - value) / prev_value)
if iteration == 1:
convergence = False
else:
convergence = has_converged(errors, n, tolerance)
return laplace_grid, iteration, np.nanmax(errors)
I think it could be a bit better if errors
were checked "on the fly", removing the second for
loop every while
iteration. However, I did not find out to do that efficiently in a multithread prange
⦠maybe it's not worth it!
add a comment |Â
up vote
1
down vote
accepted
up vote
1
down vote
accepted
I was helped on the #scipy
IRC channel by user @ngoldbaum and could substantially speed up my code by:
- Avoiding to use any
numpy
methods in the main loop - Using OpenMP parallelization
- Using true C division as there is no risk of division by zero
- Avoiding to store the whole previous iteration values
â¦and some other minor stuff.
Here's what my solver looks like now:
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
DTYPE_INT n,
DTYPE_FLOAT tolerance):
cdef bint res = True
cdef DTYPE_INT i
for i in prange(n, nogil=True): # is prange useful here ?
if errors[i] > tolerance:
res = False
break
return res
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef DTYPE_INT iteration = 0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
np.float64)
cdef bint convergence = False
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while not convergence and iteration < max_iterations:
iteration += 1
for n in prange(n_points, nogil=True):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value = ((laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2) * factor
prev_value = laplace_grid[i, j, k]
laplace_grid[i, j, k] = value
errors[n] = fabs((prev_value - value) / prev_value)
if iteration == 1:
convergence = False
else:
convergence = has_converged(errors, n, tolerance)
return laplace_grid, iteration, np.nanmax(errors)
I think it could be a bit better if errors
were checked "on the fly", removing the second for
loop every while
iteration. However, I did not find out to do that efficiently in a multithread prange
⦠maybe it's not worth it!
I was helped on the #scipy
IRC channel by user @ngoldbaum and could substantially speed up my code by:
- Avoiding to use any
numpy
methods in the main loop - Using OpenMP parallelization
- Using true C division as there is no risk of division by zero
- Avoiding to store the whole previous iteration values
â¦and some other minor stuff.
Here's what my solver looks like now:
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs
ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT
# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')
@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
DTYPE_INT n,
DTYPE_FLOAT tolerance):
cdef bint res = True
cdef DTYPE_INT i
for i in prange(n, nogil=True): # is prange useful here ?
if errors[i] > tolerance:
res = False
break
return res
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
np.ndarray[DTYPE_FLOAT, ndim=3] init,
DTYPE_FLOAT tolerance,
DTYPE_INT max_iterations,
np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
cdef DTYPE_INT iteration = 0
cdef DTYPE_INT n_points = len(domain_idx_i)
cdef DTYPE_INT i, j, k, n
cdef DTYPE_FLOAT value
cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
np.float64)
cdef bint convergence = False
hi, hj, hk = spacing
hi2, hj2, hk2 = spacing ** 2
factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))
while not convergence and iteration < max_iterations:
iteration += 1
for n in prange(n_points, nogil=True):
i = domain_idx_i[n]
j = domain_idx_j[n]
k = domain_idx_k[n]
value = ((laplace_grid[i + 1, j, k] +
laplace_grid[i - 1, j, k]) / hi2 +
(laplace_grid[i, j + 1, k] +
laplace_grid[i, j - 1, k]) / hj2 +
(laplace_grid[i, j, k - 1] +
laplace_grid[i, j, k + 1]) / hk2) * factor
prev_value = laplace_grid[i, j, k]
laplace_grid[i, j, k] = value
errors[n] = fabs((prev_value - value) / prev_value)
if iteration == 1:
convergence = False
else:
convergence = has_converged(errors, n, tolerance)
return laplace_grid, iteration, np.nanmax(errors)
I think it could be a bit better if errors
were checked "on the fly", removing the second for
loop every while
iteration. However, I did not find out to do that efficiently in a multithread prange
⦠maybe it's not worth it!
answered Feb 15 at 12:08
nicoco
24918
24918
add a comment |Â
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f187180%2fcythonic-laplacian-solving-on-a-3d-regular-grid%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password