How to speed up the matrix inversion using the LU decomposition

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
0
down vote

favorite












I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.



matrix.pyx



cdef class Matrix: 
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)

if Identity:
self._IdentityMatrix()

if ones:
self._fillWithOnes()

def __dealloc__(self):
del self.matrix

property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y

cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]

cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)

cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):

cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse



cdef class MatrixList:
def __cinit__(self):
self.inner =

cdef void append(self, Matrix a):
self.inner.append(a)

cdef Matrix get(self, int i):
return <Matrix> self.inner[i]

def __len__(self):
return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b

@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)

for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)

big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i

if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU


matrix.pxd



from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)

cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones

cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)


I would like to know how is it possible to parallelize this code or improve its speed?







share|improve this question

















  • 1




    Did you consider using scipy.linalg?
    – Gareth Rees
    Jan 29 at 13:31










  • @GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
    – Dalek
    Jan 29 at 13:42






  • 2




    @Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
    – Jorge Bellón
    Jan 30 at 9:46
















up vote
0
down vote

favorite












I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.



matrix.pyx



cdef class Matrix: 
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)

if Identity:
self._IdentityMatrix()

if ones:
self._fillWithOnes()

def __dealloc__(self):
del self.matrix

property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y

cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]

cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)

cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):

cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse



cdef class MatrixList:
def __cinit__(self):
self.inner =

cdef void append(self, Matrix a):
self.inner.append(a)

cdef Matrix get(self, int i):
return <Matrix> self.inner[i]

def __len__(self):
return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b

@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)

for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)

big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i

if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU


matrix.pxd



from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)

cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones

cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)


I would like to know how is it possible to parallelize this code or improve its speed?







share|improve this question

















  • 1




    Did you consider using scipy.linalg?
    – Gareth Rees
    Jan 29 at 13:31










  • @GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
    – Dalek
    Jan 29 at 13:42






  • 2




    @Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
    – Jorge Bellón
    Jan 30 at 9:46












up vote
0
down vote

favorite









up vote
0
down vote

favorite











I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.



matrix.pyx



cdef class Matrix: 
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)

if Identity:
self._IdentityMatrix()

if ones:
self._fillWithOnes()

def __dealloc__(self):
del self.matrix

property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y

cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]

cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)

cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):

cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse



cdef class MatrixList:
def __cinit__(self):
self.inner =

cdef void append(self, Matrix a):
self.inner.append(a)

cdef Matrix get(self, int i):
return <Matrix> self.inner[i]

def __len__(self):
return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b

@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)

for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)

big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i

if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU


matrix.pxd



from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)

cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones

cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)


I would like to know how is it possible to parallelize this code or improve its speed?







share|improve this question













I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.



matrix.pyx



cdef class Matrix: 
def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
self._rows=rows
self._columns=columns
self.matrix=new vector[double]()
self.matrix.resize(rows*columns)

if Identity:
self._IdentityMatrix()

if ones:
self._fillWithOnes()

def __dealloc__(self):
del self.matrix

property rows:
def __get__(self):
return self._rows
def __set__(self, size_t x):
self._rows = x
property columns:
def __get__(self):
return self._columns
def __set__(self, size_t y):
self._columns = y

cpdef double getVal(self, size_t r, size_t c):
return self.matrix[0][r*self._columns+c]

cpdef void setVal(self, size_t r, size_t c, double v):
self.matrix[0][r*self._columns+c] = v

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _fillWithOnes(self):
fill(self.matrix.begin(),self.matrix.end(),1.)

cdef void _IdentityMatrix(self):
cdef size_t i
if (self._rows!=self._columns):
raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
else:
for i from 0 <= i <self._columns:
self.setVal(i,i,1.0)
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef Matrix Inv(self):

cdef Matrix A_inverse = Matrix(self._rows,self._columns)
cdef MatrixList LU = ludcmp(self)
cdef Matrix A = LU.get(0)
cdef Matrix indx = LU.get(1)
cdef Matrix d = LU.get(2)
cdef double det = d.getVal(0,0)
cdef int i, j
cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
cdef Matrix col = Matrix(self._rows,1)
for i from 0 <= i < self._rows:
for j from 0 <= j < self._columns:
if (j>i):
U[i,j]=A.getVal(i,j)
L[i,j]=0
elif (j<i):
U[i,j]=0
L[i,j]=A.getVal(i,j)
else:
U[i,j]=A.getVal(i,j)
L[i,j]=1
print np.dot(L, U)
for i from 0 <= i < self._rows:
det*= A.getVal(i,i)
for j from 0 <= j < self._columns:
if (i==j):
col.setVal(j,0,1)
else:
col.setVal(j,0,0)
col=lubksb(A, indx, col)
for j from 0 <= j < self._columns:
A_inverse.setVal(j,i,col.getVal(j,0))
print "determinant of matrix %.4f"%(det)
return A_inverse



cdef class MatrixList:
def __cinit__(self):
self.inner =

cdef void append(self, Matrix a):
self.inner.append(a)

cdef Matrix get(self, int i):
return <Matrix> self.inner[i]

def __len__(self):
return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
cdef int n = a.rows
cdef int i, ip, j
cdef int ii = 0
cdef double su
for i from 0 <= i < n:
ip = <int>indx.getVal(i,0)
su = b.getVal(ip,0)
b.setVal(ip,0, b.getVal(i,0))
if (ii>=0):
for j from ii <= j <= (i-1):
su -= a.getVal(i,j) * b.getVal(j,0)
elif (su):
ii = i
b.setVal(i, 0, su)
for i from n >= i >= 0:
su = b.getVal(i,0)
for j from (i+1) <= j < n:
su -= a.getVal(i,j) * b.getVal(j,0)
if (a.getVal(i,i)==0.0):
a.setVal(i,i, 1.1e-16)
b.setVal(i, 0, su/a.getVal(i,i))
return b

@cython.boundscheck(False)
@cython.wraparound(False)
cdef MatrixList ludcmp(Matrix a):
#Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
cdef MatrixList LU = MatrixList()
cdef int n = a.rows
cdef int i, j, k, imax
cdef double big, dum, su, temp
cdef Matrix vv = Matrix(n,1)
cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
cdef double TINY = 1.1e-16
for i from 0 <= i < n:
big = 0.0
for j from 0 <= j < n:
temp=fabs(a.getVal(i,j))
if (temp > big):
big=temp
if (big ==0.0):
raise Exception("ERROR! ludcmp: Singular matrixn")
vv.setVal(i,0,1.0/big)

for j from 0 <= j < n:
for i from 0 <= i < j:
su = a.getVal(i,j)
for k from 0 <= k < i:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i,j,su)

big=0.0
for i from j<= i< n:
su = a.getVal(i,j)
for k from 0 <= k < j:
su -= a.getVal(i,k)*a.getVal(k,j)
a.setVal(i, j, su)
dum=vv.getVal(i,0)*fabs(su )
if (dum >= big):
big = dum
imax = i

if (j != imax):
for k from 0 <= k < n:
dum = a.getVal(imax,k)
a.setVal(imax, k, a.getVal(j,k))
a.setVal(j,k, dum)
d.setVal(0, 0, -d.getVal(0,0))
vv.setVal(imax, 0, vv.getVal(j, 0))
indx.setVal(j, 0, imax)
if (a.getVal(j,j) == 0.0):
a.setVal(j,j, TINY)
if (j != (n-1)):
dum=1.0/a.getVal(j,j)
for i from (j+1) <= i < n:
a.setVal(i,j, a.getVal(i,j)*dum)
LU.append(<Matrix>a)
LU.append(<Matrix>indx)
LU.append(<Matrix>d)
return LU


matrix.pxd



from libcpp.vector cimport vector
cdef class MatrixList:
cdef list inner
cdef void append(self, Matrix a)
cdef Matrix get(self, int i)

cdef class Matrix:
cdef vector[double] *matrix
cdef size_t _rows
cdef size_t _columns
cdef bint Identity
cdef bint ones

cpdef double getVal(self, size_t r, size_t c)
cpdef void setVal(self, size_t r, size_t c, double v)
cpdef Matrix transpose(self)
cdef void _IdentityMatrix(self)
cdef void _fillWithOnes(self)
cpdef Matrix Inv(self)
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b)
cdef MatrixList ludcmp(Matrix a)


I would like to know how is it possible to parallelize this code or improve its speed?









share|improve this question












share|improve this question




share|improve this question








edited Jan 29 at 13:11
























asked Jan 28 at 22:37









Dalek

747




747







  • 1




    Did you consider using scipy.linalg?
    – Gareth Rees
    Jan 29 at 13:31










  • @GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
    – Dalek
    Jan 29 at 13:42






  • 2




    @Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
    – Jorge Bellón
    Jan 30 at 9:46












  • 1




    Did you consider using scipy.linalg?
    – Gareth Rees
    Jan 29 at 13:31










  • @GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
    – Dalek
    Jan 29 at 13:42






  • 2




    @Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
    – Jorge Bellón
    Jan 30 at 9:46







1




1




Did you consider using scipy.linalg?
– Gareth Rees
Jan 29 at 13:31




Did you consider using scipy.linalg?
– Gareth Rees
Jan 29 at 13:31












@GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
– Dalek
Jan 29 at 13:42




@GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
– Dalek
Jan 29 at 13:42




2




2




@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
– Jorge Bellón
Jan 30 at 9:46




@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
– Jorge Bellón
Jan 30 at 9:46















active

oldest

votes











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f186219%2fhow-to-speed-up-the-matrix-inversion-using-the-lu-decomposition%23new-answer', 'question_page');

);

Post as a guest



































active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes










 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f186219%2fhow-to-speed-up-the-matrix-inversion-using-the-lu-decomposition%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Python Lists

Aion

JavaScript Array Iteration Methods