Cythonic Laplacian solving on a 3D regular grid

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
1
down vote

favorite

I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.

Right now I'm happy with the results it returns, but I'm wondering if there was a more efficient way to code this, especially since the tutorial I have followed is about 10 years old. I have very limited knowledge of C and I'm not sure if the data types (ctypedef) are properly chosen here and/or would have a significant impact on performance. I have tried types my indexes variables as unsigned int in different ways but I ended up giving up as I'm not really sure if it is relevant.

Also, I'm happy with any general coding style and good practices advice.

The Python code that calls my Cython function is inside a class and looks like this:

def _solve_laplacian(self):
 init = np.zeros_like(self.domain, float)
 init[np.logical_not(self.border)] = 1
 log.info("Solving Laplacian...")
 laplace_grid, iterations, max_error = laplace_solver(
 self.domain_idx_i,
 self.domain_idx_j,
 self.domain_idx_k,
 init,
 self.laplace_tolerance,
 self.laplace_max_iter,
 self.spacing)
 log.debug(
 f"Laplacian: iterations iterations, max_error = max_error")
 self.cropped_laplace_grid = laplace_grid
 self.laplacian_iterations = iterations
 self.laplacian_max_error = max_error

and here is the actual cython function:

import numpy as np
cimport numpy as np
cimport cython

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT


# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
 laplace_grid)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_FLOAT max_error = 1.0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT error
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while max_error > tolerance and iteration < max_iterations:
 iteration += 1
 prev_laplace_grid[:] = laplace_grid
 for n in range(n_points):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = 
 (
 (laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2
 ) * factor
 laplace_grid[i, j, k] = value
 max_error = np.nanmax(
 np.abs(
 (prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
 laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
 prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
 return laplace_grid, iteration, max_error

If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.

edited Feb 9 at 13:16

Jamalâ™¦

30.1k11114225

asked Feb 9 at 12:23

nicoco

24918

add a commentÂ |Â

up vote
1
down vote

favorite

I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.

Also, I'm happy with any general coding style and good practices advice.

The Python code that calls my Cython function is inside a class and looks like this:

def _solve_laplacian(self):
 init = np.zeros_like(self.domain, float)
 init[np.logical_not(self.border)] = 1
 log.info("Solving Laplacian...")
 laplace_grid, iterations, max_error = laplace_solver(
 self.domain_idx_i,
 self.domain_idx_j,
 self.domain_idx_k,
 init,
 self.laplace_tolerance,
 self.laplace_max_iter,
 self.spacing)
 log.debug(
 f"Laplacian: iterations iterations, max_error = max_error")
 self.cropped_laplace_grid = laplace_grid
 self.laplacian_iterations = iterations
 self.laplacian_max_error = max_error

and here is the actual cython function:

import numpy as np
cimport numpy as np
cimport cython

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT


# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
 laplace_grid)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_FLOAT max_error = 1.0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT error
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while max_error > tolerance and iteration < max_iterations:
 iteration += 1
 prev_laplace_grid[:] = laplace_grid
 for n in range(n_points):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = 
 (
 (laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2
 ) * factor
 laplace_grid[i, j, k] = value
 max_error = np.nanmax(
 np.abs(
 (prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
 laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
 prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
 return laplace_grid, iteration, max_error

If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.

edited Feb 9 at 13:16

Jamalâ™¦

30.1k11114225

asked Feb 9 at 12:23

nicoco

24918

add a commentÂ |Â

up vote
1
down vote

favorite

I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.

Also, I'm happy with any general coding style and good practices advice.

The Python code that calls my Cython function is inside a class and looks like this:

def _solve_laplacian(self):
 init = np.zeros_like(self.domain, float)
 init[np.logical_not(self.border)] = 1
 log.info("Solving Laplacian...")
 laplace_grid, iterations, max_error = laplace_solver(
 self.domain_idx_i,
 self.domain_idx_j,
 self.domain_idx_k,
 init,
 self.laplace_tolerance,
 self.laplace_max_iter,
 self.spacing)
 log.debug(
 f"Laplacian: iterations iterations, max_error = max_error")
 self.cropped_laplace_grid = laplace_grid
 self.laplacian_iterations = iterations
 self.laplacian_max_error = max_error

and here is the actual cython function:

import numpy as np
cimport numpy as np
cimport cython

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT


# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
 laplace_grid)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_FLOAT max_error = 1.0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT error
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while max_error > tolerance and iteration < max_iterations:
 iteration += 1
 prev_laplace_grid[:] = laplace_grid
 for n in range(n_points):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = 
 (
 (laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2
 ) * factor
 laplace_grid[i, j, k] = value
 max_error = np.nanmax(
 np.abs(
 (prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
 laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
 prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
 return laplace_grid, iteration, max_error

If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.

edited Feb 9 at 13:16

Jamalâ™¦

30.1k11114225

asked Feb 9 at 12:23

nicoco

24918

I have followed to official Cython for NumPy users to make a Laplacian solver on a 3D regular grid with Dirichlet conditions.

Also, I'm happy with any general coding style and good practices advice.

The Python code that calls my Cython function is inside a class and looks like this:

def _solve_laplacian(self):
 init = np.zeros_like(self.domain, float)
 init[np.logical_not(self.border)] = 1
 log.info("Solving Laplacian...")
 laplace_grid, iterations, max_error = laplace_solver(
 self.domain_idx_i,
 self.domain_idx_j,
 self.domain_idx_k,
 init,
 self.laplace_tolerance,
 self.laplace_max_iter,
 self.spacing)
 log.debug(
 f"Laplacian: iterations iterations, max_error = max_error")
 self.cropped_laplace_grid = laplace_grid
 self.laplacian_iterations = iterations
 self.laplacian_max_error = max_error

and here is the actual cython function:

import numpy as np
cimport numpy as np
cimport cython

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT


# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] prev_laplace_grid = np.copy(
 laplace_grid)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_FLOAT max_error = 1.0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT error
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while max_error > tolerance and iteration < max_iterations:
 iteration += 1
 prev_laplace_grid[:] = laplace_grid
 for n in range(n_points):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = 
 (
 (laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2
 ) * factor
 laplace_grid[i, j, k] = value
 max_error = np.nanmax(
 np.abs(
 (prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k] -
 laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]) /
 prev_laplace_grid[domain_idx_i, domain_idx_j, domain_idx_k]))
 return laplace_grid, iteration, max_error

If you're interested in the complete Python package this is part of, it is available here and is quite ugly right now. I'm trying to improve it so I'm not ashamed of it anymore.

edited Feb 9 at 13:16

Jamalâ™¦

30.1k11114225

asked Feb 9 at 12:23

nicoco

24918

edited Feb 9 at 13:16

Jamalâ™¦

30.1k11114225

edited Feb 9 at 13:16

Jamalâ™¦

30.1k11114225

edited Feb 9 at 13:16

Jamalâ™¦

30.1k11114225

asked Feb 9 at 12:23

nicoco

24918

asked Feb 9 at 12:23

nicoco

24918

asked Feb 9 at 12:23

nicoco

24918

add a commentÂ |Â

1 Answer
1

active

oldest

votes

up vote
1
down vote

accepted

I was helped on the #scipy IRC channel by user @ngoldbaum and could substantially speed up my code by:

Avoiding to use any numpy methods in the main loop

Using OpenMP parallelization

Using true C division as there is no risk of division by zero

Avoiding to store the whole previous iteration values

Ã¢Â€Â¦and some other minor stuff.

Here's what my solver looks like now:

import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT

# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
 DTYPE_INT n,
 DTYPE_FLOAT tolerance):
 cdef bint res = True
 cdef DTYPE_INT i
 for i in prange(n, nogil=True): # is prange useful here ?
 if errors[i] > tolerance:
 res = False
 break
 return res

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
 cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
 np.float64)
 cdef bint convergence = False

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while not convergence and iteration < max_iterations:
 iteration += 1
 for n in prange(n_points, nogil=True):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = ((laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2) * factor
 prev_value = laplace_grid[i, j, k]
 laplace_grid[i, j, k] = value
 errors[n] = fabs((prev_value - value) / prev_value)

 if iteration == 1:
 convergence = False
 else:
 convergence = has_converged(errors, n, tolerance)

 return laplace_grid, iteration, np.nanmax(errors)

I think it could be a bit better if errors were checked "on the fly", removing the second for loop every while iteration. However, I did not find out to do that efficiently in a multithread prangeÃ¢Â€Â¦ maybe it's not worth it!

answered Feb 15 at 12:08

nicoco

24918

add a commentÂ |Â

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f187180%2fcythonic-laplacian-solving-on-a-3d-regular-grid%23new-answer', 'question_page');

);

Post as a guest

Name

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

up vote
1
down vote

accepted

I was helped on the #scipy IRC channel by user @ngoldbaum and could substantially speed up my code by:

Avoiding to use any numpy methods in the main loop

Using OpenMP parallelization

Using true C division as there is no risk of division by zero

Avoiding to store the whole previous iteration values

Ã¢Â€Â¦and some other minor stuff.

Here's what my solver looks like now:

import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT

# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
 DTYPE_INT n,
 DTYPE_FLOAT tolerance):
 cdef bint res = True
 cdef DTYPE_INT i
 for i in prange(n, nogil=True): # is prange useful here ?
 if errors[i] > tolerance:
 res = False
 break
 return res

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
 cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
 np.float64)
 cdef bint convergence = False

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while not convergence and iteration < max_iterations:
 iteration += 1
 for n in prange(n_points, nogil=True):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = ((laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2) * factor
 prev_value = laplace_grid[i, j, k]
 laplace_grid[i, j, k] = value
 errors[n] = fabs((prev_value - value) / prev_value)

 if iteration == 1:
 convergence = False
 else:
 convergence = has_converged(errors, n, tolerance)

 return laplace_grid, iteration, np.nanmax(errors)

answered Feb 15 at 12:08

nicoco

24918

add a commentÂ |Â

up vote
1
down vote

accepted

I was helped on the #scipy IRC channel by user @ngoldbaum and could substantially speed up my code by:

Avoiding to use any numpy methods in the main loop

Using OpenMP parallelization

Using true C division as there is no risk of division by zero

Avoiding to store the whole previous iteration values

Ã¢Â€Â¦and some other minor stuff.

Here's what my solver looks like now:

import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT

# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
 DTYPE_INT n,
 DTYPE_FLOAT tolerance):
 cdef bint res = True
 cdef DTYPE_INT i
 for i in prange(n, nogil=True): # is prange useful here ?
 if errors[i] > tolerance:
 res = False
 break
 return res

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
 cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
 np.float64)
 cdef bint convergence = False

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while not convergence and iteration < max_iterations:
 iteration += 1
 for n in prange(n_points, nogil=True):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = ((laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2) * factor
 prev_value = laplace_grid[i, j, k]
 laplace_grid[i, j, k] = value
 errors[n] = fabs((prev_value - value) / prev_value)

 if iteration == 1:
 convergence = False
 else:
 convergence = has_converged(errors, n, tolerance)

 return laplace_grid, iteration, np.nanmax(errors)

answered Feb 15 at 12:08

nicoco

24918

add a commentÂ |Â

up vote
1
down vote

accepted

I was helped on the #scipy IRC channel by user @ngoldbaum and could substantially speed up my code by:

Avoiding to use any numpy methods in the main loop

Using OpenMP parallelization

Using true C division as there is no risk of division by zero

Avoiding to store the whole previous iteration values

Ã¢Â€Â¦and some other minor stuff.

Here's what my solver looks like now:

import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT

# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
 DTYPE_INT n,
 DTYPE_FLOAT tolerance):
 cdef bint res = True
 cdef DTYPE_INT i
 for i in prange(n, nogil=True): # is prange useful here ?
 if errors[i] > tolerance:
 res = False
 break
 return res

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
 cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
 np.float64)
 cdef bint convergence = False

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while not convergence and iteration < max_iterations:
 iteration += 1
 for n in prange(n_points, nogil=True):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = ((laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2) * factor
 prev_value = laplace_grid[i, j, k]
 laplace_grid[i, j, k] = value
 errors[n] = fabs((prev_value - value) / prev_value)

 if iteration == 1:
 convergence = False
 else:
 convergence = has_converged(errors, n, tolerance)

 return laplace_grid, iteration, np.nanmax(errors)

answered Feb 15 at 12:08

nicoco

24918

I was helped on the #scipy IRC channel by user @ngoldbaum and could substantially speed up my code by:

Avoiding to use any numpy methods in the main loop

Using OpenMP parallelization

Using true C division as there is no risk of division by zero

Avoiding to store the whole previous iteration values

Ã¢Â€Â¦and some other minor stuff.

Here's what my solver looks like now:

import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs

ctypedef np.float64_t DTYPE_FLOAT
ctypedef np.int_t DTYPE_INT

# dirty fix to get rid of annoying warnings
np.seterr(divide='ignore', invalid='ignore')

@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint has_converged(np.ndarray[DTYPE_FLOAT, ndim=1] errors,
 DTYPE_INT n,
 DTYPE_FLOAT tolerance):
 cdef bint res = True
 cdef DTYPE_INT i
 for i in prange(n, nogil=True): # is prange useful here ?
 if errors[i] > tolerance:
 res = False
 break
 return res

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def solve_3D(np.ndarray[DTYPE_INT, ndim=1] domain_idx_i,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_j,
 np.ndarray[DTYPE_INT, ndim=1] domain_idx_k,
 np.ndarray[DTYPE_FLOAT, ndim=3] init,
 DTYPE_FLOAT tolerance,
 DTYPE_INT max_iterations,
 np.ndarray[DTYPE_FLOAT, ndim=1] spacing):
 cdef np.ndarray[DTYPE_FLOAT, ndim=3] laplace_grid = np.copy(init)
 cdef DTYPE_INT iteration = 0
 cdef DTYPE_INT n_points = len(domain_idx_i)
 cdef DTYPE_INT i, j, k, n
 cdef DTYPE_FLOAT value
 cdef DTYPE_FLOAT hi, hj, hk, hi2, hj2, hk2, factor, prev_value
 cdef np.ndarray[DTYPE_FLOAT, ndim=1] errors = np.zeros(n_points,
 np.float64)
 cdef bint convergence = False

 hi, hj, hk = spacing
 hi2, hj2, hk2 = spacing ** 2

 factor = (hi2 * hj2 * hk2) / (2 * (hi2 * hj2 + hi2 * hk2 + hj2 * hk2))

 while not convergence and iteration < max_iterations:
 iteration += 1
 for n in prange(n_points, nogil=True):
 i = domain_idx_i[n]
 j = domain_idx_j[n]
 k = domain_idx_k[n]
 value = ((laplace_grid[i + 1, j, k] +
 laplace_grid[i - 1, j, k]) / hi2 +
 (laplace_grid[i, j + 1, k] +
 laplace_grid[i, j - 1, k]) / hj2 +
 (laplace_grid[i, j, k - 1] +
 laplace_grid[i, j, k + 1]) / hk2) * factor
 prev_value = laplace_grid[i, j, k]
 laplace_grid[i, j, k] = value
 errors[n] = fabs((prev_value - value) / prev_value)

 if iteration == 1:
 convergence = False
 else:
 convergence = has_converged(errors, n, tolerance)

 return laplace_grid, iteration, np.nanmax(errors)

answered Feb 15 at 12:08

nicoco

24918

answered Feb 15 at 12:08

nicoco

24918

answered Feb 15 at 12:08

nicoco

24918

answered Feb 15 at 12:08

nicoco

24918

add a commentÂ |Â

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr