How to speed up the matrix inversion using the LU decomposition

Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
0
down vote
favorite
I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.
matrix.pyx
cdef class Matrix:
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)
if Identity:
self._IdentityMatrix()
if ones:
self._fillWithOnes()
def __dealloc__(self):
del self.matrix
property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y
cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]
cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)
cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):
cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse
cdef class MatrixList:
def __cinit__(self):
self.inner =
cdef void append(self, Matrix a):
self.inner.append(a)
cdef Matrix get(self, int i):
return <Matrix> self.inner[i]
def __len__(self):
return len(self.inner)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b
@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)
for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)
big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i
if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU
matrix.pxd
from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)
cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones
cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)
I would like to know how is it possible to parallelize this code or improve its speed?
python performance algorithm matrix cython
add a comment |Â
up vote
0
down vote
favorite
I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.
matrix.pyx
cdef class Matrix:
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)
if Identity:
self._IdentityMatrix()
if ones:
self._fillWithOnes()
def __dealloc__(self):
del self.matrix
property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y
cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]
cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)
cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):
cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse
cdef class MatrixList:
def __cinit__(self):
self.inner =
cdef void append(self, Matrix a):
self.inner.append(a)
cdef Matrix get(self, int i):
return <Matrix> self.inner[i]
def __len__(self):
return len(self.inner)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b
@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)
for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)
big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i
if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU
matrix.pxd
from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)
cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones
cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)
I would like to know how is it possible to parallelize this code or improve its speed?
python performance algorithm matrix cython
1
Did you consider usingscipy.linalg?
â Gareth Rees
Jan 29 at 13:31
@GarethRees Before I installopensslandmkl-Intel, my code was a bit faster thannumpybut now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â Dalek
Jan 29 at 13:42
2
@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â Jorge Bellón
Jan 30 at 9:46
add a comment |Â
up vote
0
down vote
favorite
up vote
0
down vote
favorite
I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.
matrix.pyx
cdef class Matrix:
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)
if Identity:
self._IdentityMatrix()
if ones:
self._fillWithOnes()
def __dealloc__(self):
del self.matrix
property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y
cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]
cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)
cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):
cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse
cdef class MatrixList:
def __cinit__(self):
self.inner =
cdef void append(self, Matrix a):
self.inner.append(a)
cdef Matrix get(self, int i):
return <Matrix> self.inner[i]
def __len__(self):
return len(self.inner)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b
@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)
for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)
big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i
if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU
matrix.pxd
from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)
cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones
cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)
I would like to know how is it possible to parallelize this code or improve its speed?
python performance algorithm matrix cython
I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.
matrix.pyx
cdef class Matrix:
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)
if Identity:
self._IdentityMatrix()
if ones:
self._fillWithOnes()
def __dealloc__(self):
del self.matrix
property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y
cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]
cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)
cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):
cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse
cdef class MatrixList:
def __cinit__(self):
self.inner =
cdef void append(self, Matrix a):
self.inner.append(a)
cdef Matrix get(self, int i):
return <Matrix> self.inner[i]
def __len__(self):
return len(self.inner)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b
@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)
for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)
big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i
if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU
matrix.pxd
from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)
cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones
cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)
I would like to know how is it possible to parallelize this code or improve its speed?
python performance algorithm matrix cython
edited Jan 29 at 13:11
asked Jan 28 at 22:37
Dalek
747
747
1
Did you consider usingscipy.linalg?
â Gareth Rees
Jan 29 at 13:31
@GarethRees Before I installopensslandmkl-Intel, my code was a bit faster thannumpybut now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â Dalek
Jan 29 at 13:42
2
@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â Jorge Bellón
Jan 30 at 9:46
add a comment |Â
1
Did you consider usingscipy.linalg?
â Gareth Rees
Jan 29 at 13:31
@GarethRees Before I installopensslandmkl-Intel, my code was a bit faster thannumpybut now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â Dalek
Jan 29 at 13:42
2
@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â Jorge Bellón
Jan 30 at 9:46
1
1
Did you consider using
scipy.linalg?â Gareth Rees
Jan 29 at 13:31
Did you consider using
scipy.linalg?â Gareth Rees
Jan 29 at 13:31
@GarethRees Before I install
openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.â Dalek
Jan 29 at 13:42
@GarethRees Before I install
openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.â Dalek
Jan 29 at 13:42
2
2
@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â Jorge Bellón
Jan 30 at 9:46
@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â Jorge Bellón
Jan 30 at 9:46
add a comment |Â
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f186219%2fhow-to-speed-up-the-matrix-inversion-using-the-lu-decomposition%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
1
Did you consider using
scipy.linalg?â Gareth Rees
Jan 29 at 13:31
@GarethRees Before I install
opensslandmkl-Intel, my code was a bit faster thannumpybut now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.â Dalek
Jan 29 at 13:42
2
@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â Jorge Bellón
Jan 30 at 9:46