How to speed up the matrix inversion using the LU decomposition

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
0
down vote

favorite

I have written the following Matrix class in cython for the matrix inversion and some other linear algebra operations. I tried to use the LU decomposition, in order to compute the inverse of a matrix. The speed of code is good. I tried to implement this code in cython. The code is working.

matrix.pyx

cdef class Matrix: 
 def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
 self._rows=rows
 self._columns=columns
 self.matrix=new vector[double]()
 self.matrix.resize(rows*columns)

 if Identity:
 self._IdentityMatrix()

 if ones:
 self._fillWithOnes()

 def __dealloc__(self):
 del self.matrix

 property rows:
 def __get__(self):
 return self._rows
 def __set__(self, size_t x):
 self._rows = x 
 property columns:
 def __get__(self):
 return self._columns
 def __set__(self, size_t y):
 self._columns = y 

 cpdef double getVal(self, size_t r, size_t c):
 return self.matrix[0][r*self._columns+c]

 cpdef void setVal(self, size_t r, size_t c, double v): 
 self.matrix[0][r*self._columns+c] = v

 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef void _fillWithOnes(self):
 fill(self.matrix.begin(),self.matrix.end(),1.)

 cdef void _IdentityMatrix(self):
 cdef size_t i 
 if (self._rows!=self._columns):
 raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
 else:
 for i from 0 <= i <self._columns:
 self.setVal(i,i,1.0)
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cpdef Matrix Inv(self):

 cdef Matrix A_inverse = Matrix(self._rows,self._columns)
 cdef MatrixList LU = ludcmp(self)
 cdef Matrix A = LU.get(0)
 cdef Matrix indx = LU.get(1)
 cdef Matrix d = LU.get(2)
 cdef double det = d.getVal(0,0)
 cdef int i, j
 cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef Matrix col = Matrix(self._rows,1)
 for i from 0 <= i < self._rows: 
 for j from 0 <= j < self._columns: 
 if (j>i):
 U[i,j]=A.getVal(i,j)
 L[i,j]=0
 elif (j<i):
 U[i,j]=0
 L[i,j]=A.getVal(i,j)
 else:
 U[i,j]=A.getVal(i,j)
 L[i,j]=1
 print np.dot(L, U)
 for i from 0 <= i < self._rows: 
 det*= A.getVal(i,i)
 for j from 0 <= j < self._columns:
 if (i==j):
 col.setVal(j,0,1)
 else:
 col.setVal(j,0,0)
 col=lubksb(A, indx, col) 
 for j from 0 <= j < self._columns:
 A_inverse.setVal(j,i,col.getVal(j,0))
 print "determinant of matrix %.4f"%(det)
 return A_inverse



cdef class MatrixList:
 def __cinit__(self):
 self.inner = 

 cdef void append(self, Matrix a):
 self.inner.append(a)

 cdef Matrix get(self, int i):
 return <Matrix> self.inner[i]

 def __len__(self):
 return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
 cdef int n = a.rows
 cdef int i, ip, j
 cdef int ii = 0
 cdef double su
 for i from 0 <= i < n: 
 ip = <int>indx.getVal(i,0)
 su = b.getVal(ip,0)
 b.setVal(ip,0, b.getVal(i,0))
 if (ii>=0):
 for j from ii <= j <= (i-1): 
 su -= a.getVal(i,j) * b.getVal(j,0)
 elif (su):
 ii = i 
 b.setVal(i, 0, su)
 for i from n >= i >= 0: 
 su = b.getVal(i,0)
 for j from (i+1) <= j < n:
 su -= a.getVal(i,j) * b.getVal(j,0)
 if (a.getVal(i,i)==0.0):
 a.setVal(i,i, 1.1e-16)
 b.setVal(i, 0, su/a.getVal(i,i))
 return b

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef MatrixList ludcmp(Matrix a):
 #Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
 cdef MatrixList LU = MatrixList()
 cdef int n = a.rows
 cdef int i, j, k, imax
 cdef double big, dum, su, temp
 cdef Matrix vv = Matrix(n,1)
 cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
 cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
 cdef double TINY = 1.1e-16
 for i from 0 <= i < n: 
 big = 0.0
 for j from 0 <= j < n:
 temp=fabs(a.getVal(i,j))
 if (temp > big):
 big=temp
 if (big ==0.0):
 raise Exception("ERROR! ludcmp: Singular matrixn")
 vv.setVal(i,0,1.0/big)

 for j from 0 <= j < n:
 for i from 0 <= i < j: 
 su = a.getVal(i,j)
 for k from 0 <= k < i:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i,j,su)

 big=0.0
 for i from j<= i< n:
 su = a.getVal(i,j)
 for k from 0 <= k < j:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i, j, su)
 dum=vv.getVal(i,0)*fabs(su )
 if (dum >= big):
 big = dum
 imax = i

 if (j != imax):
 for k from 0 <= k < n:
 dum = a.getVal(imax,k)
 a.setVal(imax, k, a.getVal(j,k))
 a.setVal(j,k, dum)
 d.setVal(0, 0, -d.getVal(0,0))
 vv.setVal(imax, 0, vv.getVal(j, 0))
 indx.setVal(j, 0, imax)
 if (a.getVal(j,j) == 0.0):
 a.setVal(j,j, TINY)
 if (j != (n-1)):
 dum=1.0/a.getVal(j,j)
 for i from (j+1) <= i < n:
 a.setVal(i,j, a.getVal(i,j)*dum)
 LU.append(<Matrix>a)
 LU.append(<Matrix>indx)
 LU.append(<Matrix>d)
 return LU

matrix.pxd

from libcpp.vector cimport vector
cdef class MatrixList:
 cdef list inner
 cdef void append(self, Matrix a)
 cdef Matrix get(self, int i)

cdef class Matrix:
 cdef vector[double] *matrix 
 cdef size_t _rows
 cdef size_t _columns
 cdef bint Identity
 cdef bint ones

 cpdef double getVal(self, size_t r, size_t c)
 cpdef void setVal(self, size_t r, size_t c, double v)
 cpdef Matrix transpose(self)
 cdef void _IdentityMatrix(self)
 cdef void _fillWithOnes(self)
 cpdef Matrix Inv(self) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b) 
cdef MatrixList ludcmp(Matrix a)

I would like to know how is it possible to parallelize this code or improve its speed?

edited Jan 29 at 13:11

asked Jan 28 at 22:37

Dalek

747

1

Did you consider using scipy.linalg?
â€“Â Gareth Rees
Jan 29 at 13:31

@GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â€“Â Dalek
Jan 29 at 13:42

2

@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â€“Â Jorge BellÃ³n
Jan 30 at 9:46

add a commentÂ |Â

up vote
0
down vote

favorite

matrix.pyx

cdef class Matrix: 
 def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
 self._rows=rows
 self._columns=columns
 self.matrix=new vector[double]()
 self.matrix.resize(rows*columns)

 if Identity:
 self._IdentityMatrix()

 if ones:
 self._fillWithOnes()

 def __dealloc__(self):
 del self.matrix

 property rows:
 def __get__(self):
 return self._rows
 def __set__(self, size_t x):
 self._rows = x 
 property columns:
 def __get__(self):
 return self._columns
 def __set__(self, size_t y):
 self._columns = y 

 cpdef double getVal(self, size_t r, size_t c):
 return self.matrix[0][r*self._columns+c]

 cpdef void setVal(self, size_t r, size_t c, double v): 
 self.matrix[0][r*self._columns+c] = v

 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef void _fillWithOnes(self):
 fill(self.matrix.begin(),self.matrix.end(),1.)

 cdef void _IdentityMatrix(self):
 cdef size_t i 
 if (self._rows!=self._columns):
 raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
 else:
 for i from 0 <= i <self._columns:
 self.setVal(i,i,1.0)
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cpdef Matrix Inv(self):

 cdef Matrix A_inverse = Matrix(self._rows,self._columns)
 cdef MatrixList LU = ludcmp(self)
 cdef Matrix A = LU.get(0)
 cdef Matrix indx = LU.get(1)
 cdef Matrix d = LU.get(2)
 cdef double det = d.getVal(0,0)
 cdef int i, j
 cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef Matrix col = Matrix(self._rows,1)
 for i from 0 <= i < self._rows: 
 for j from 0 <= j < self._columns: 
 if (j>i):
 U[i,j]=A.getVal(i,j)
 L[i,j]=0
 elif (j<i):
 U[i,j]=0
 L[i,j]=A.getVal(i,j)
 else:
 U[i,j]=A.getVal(i,j)
 L[i,j]=1
 print np.dot(L, U)
 for i from 0 <= i < self._rows: 
 det*= A.getVal(i,i)
 for j from 0 <= j < self._columns:
 if (i==j):
 col.setVal(j,0,1)
 else:
 col.setVal(j,0,0)
 col=lubksb(A, indx, col) 
 for j from 0 <= j < self._columns:
 A_inverse.setVal(j,i,col.getVal(j,0))
 print "determinant of matrix %.4f"%(det)
 return A_inverse



cdef class MatrixList:
 def __cinit__(self):
 self.inner = 

 cdef void append(self, Matrix a):
 self.inner.append(a)

 cdef Matrix get(self, int i):
 return <Matrix> self.inner[i]

 def __len__(self):
 return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
 cdef int n = a.rows
 cdef int i, ip, j
 cdef int ii = 0
 cdef double su
 for i from 0 <= i < n: 
 ip = <int>indx.getVal(i,0)
 su = b.getVal(ip,0)
 b.setVal(ip,0, b.getVal(i,0))
 if (ii>=0):
 for j from ii <= j <= (i-1): 
 su -= a.getVal(i,j) * b.getVal(j,0)
 elif (su):
 ii = i 
 b.setVal(i, 0, su)
 for i from n >= i >= 0: 
 su = b.getVal(i,0)
 for j from (i+1) <= j < n:
 su -= a.getVal(i,j) * b.getVal(j,0)
 if (a.getVal(i,i)==0.0):
 a.setVal(i,i, 1.1e-16)
 b.setVal(i, 0, su/a.getVal(i,i))
 return b

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef MatrixList ludcmp(Matrix a):
 #Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
 cdef MatrixList LU = MatrixList()
 cdef int n = a.rows
 cdef int i, j, k, imax
 cdef double big, dum, su, temp
 cdef Matrix vv = Matrix(n,1)
 cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
 cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
 cdef double TINY = 1.1e-16
 for i from 0 <= i < n: 
 big = 0.0
 for j from 0 <= j < n:
 temp=fabs(a.getVal(i,j))
 if (temp > big):
 big=temp
 if (big ==0.0):
 raise Exception("ERROR! ludcmp: Singular matrixn")
 vv.setVal(i,0,1.0/big)

 for j from 0 <= j < n:
 for i from 0 <= i < j: 
 su = a.getVal(i,j)
 for k from 0 <= k < i:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i,j,su)

 big=0.0
 for i from j<= i< n:
 su = a.getVal(i,j)
 for k from 0 <= k < j:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i, j, su)
 dum=vv.getVal(i,0)*fabs(su )
 if (dum >= big):
 big = dum
 imax = i

 if (j != imax):
 for k from 0 <= k < n:
 dum = a.getVal(imax,k)
 a.setVal(imax, k, a.getVal(j,k))
 a.setVal(j,k, dum)
 d.setVal(0, 0, -d.getVal(0,0))
 vv.setVal(imax, 0, vv.getVal(j, 0))
 indx.setVal(j, 0, imax)
 if (a.getVal(j,j) == 0.0):
 a.setVal(j,j, TINY)
 if (j != (n-1)):
 dum=1.0/a.getVal(j,j)
 for i from (j+1) <= i < n:
 a.setVal(i,j, a.getVal(i,j)*dum)
 LU.append(<Matrix>a)
 LU.append(<Matrix>indx)
 LU.append(<Matrix>d)
 return LU

matrix.pxd

from libcpp.vector cimport vector
cdef class MatrixList:
 cdef list inner
 cdef void append(self, Matrix a)
 cdef Matrix get(self, int i)

cdef class Matrix:
 cdef vector[double] *matrix 
 cdef size_t _rows
 cdef size_t _columns
 cdef bint Identity
 cdef bint ones

 cpdef double getVal(self, size_t r, size_t c)
 cpdef void setVal(self, size_t r, size_t c, double v)
 cpdef Matrix transpose(self)
 cdef void _IdentityMatrix(self)
 cdef void _fillWithOnes(self)
 cpdef Matrix Inv(self) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b) 
cdef MatrixList ludcmp(Matrix a)

I would like to know how is it possible to parallelize this code or improve its speed?

edited Jan 29 at 13:11

asked Jan 28 at 22:37

Dalek

747

1

Did you consider using scipy.linalg?
â€“Â Gareth Rees
Jan 29 at 13:31

@GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â€“Â Dalek
Jan 29 at 13:42

2

@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â€“Â Jorge BellÃ³n
Jan 30 at 9:46

add a commentÂ |Â

up vote
0
down vote

favorite

matrix.pyx

cdef class Matrix: 
 def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
 self._rows=rows
 self._columns=columns
 self.matrix=new vector[double]()
 self.matrix.resize(rows*columns)

 if Identity:
 self._IdentityMatrix()

 if ones:
 self._fillWithOnes()

 def __dealloc__(self):
 del self.matrix

 property rows:
 def __get__(self):
 return self._rows
 def __set__(self, size_t x):
 self._rows = x 
 property columns:
 def __get__(self):
 return self._columns
 def __set__(self, size_t y):
 self._columns = y 

 cpdef double getVal(self, size_t r, size_t c):
 return self.matrix[0][r*self._columns+c]

 cpdef void setVal(self, size_t r, size_t c, double v): 
 self.matrix[0][r*self._columns+c] = v

 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef void _fillWithOnes(self):
 fill(self.matrix.begin(),self.matrix.end(),1.)

 cdef void _IdentityMatrix(self):
 cdef size_t i 
 if (self._rows!=self._columns):
 raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
 else:
 for i from 0 <= i <self._columns:
 self.setVal(i,i,1.0)
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cpdef Matrix Inv(self):

 cdef Matrix A_inverse = Matrix(self._rows,self._columns)
 cdef MatrixList LU = ludcmp(self)
 cdef Matrix A = LU.get(0)
 cdef Matrix indx = LU.get(1)
 cdef Matrix d = LU.get(2)
 cdef double det = d.getVal(0,0)
 cdef int i, j
 cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef Matrix col = Matrix(self._rows,1)
 for i from 0 <= i < self._rows: 
 for j from 0 <= j < self._columns: 
 if (j>i):
 U[i,j]=A.getVal(i,j)
 L[i,j]=0
 elif (j<i):
 U[i,j]=0
 L[i,j]=A.getVal(i,j)
 else:
 U[i,j]=A.getVal(i,j)
 L[i,j]=1
 print np.dot(L, U)
 for i from 0 <= i < self._rows: 
 det*= A.getVal(i,i)
 for j from 0 <= j < self._columns:
 if (i==j):
 col.setVal(j,0,1)
 else:
 col.setVal(j,0,0)
 col=lubksb(A, indx, col) 
 for j from 0 <= j < self._columns:
 A_inverse.setVal(j,i,col.getVal(j,0))
 print "determinant of matrix %.4f"%(det)
 return A_inverse



cdef class MatrixList:
 def __cinit__(self):
 self.inner = 

 cdef void append(self, Matrix a):
 self.inner.append(a)

 cdef Matrix get(self, int i):
 return <Matrix> self.inner[i]

 def __len__(self):
 return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
 cdef int n = a.rows
 cdef int i, ip, j
 cdef int ii = 0
 cdef double su
 for i from 0 <= i < n: 
 ip = <int>indx.getVal(i,0)
 su = b.getVal(ip,0)
 b.setVal(ip,0, b.getVal(i,0))
 if (ii>=0):
 for j from ii <= j <= (i-1): 
 su -= a.getVal(i,j) * b.getVal(j,0)
 elif (su):
 ii = i 
 b.setVal(i, 0, su)
 for i from n >= i >= 0: 
 su = b.getVal(i,0)
 for j from (i+1) <= j < n:
 su -= a.getVal(i,j) * b.getVal(j,0)
 if (a.getVal(i,i)==0.0):
 a.setVal(i,i, 1.1e-16)
 b.setVal(i, 0, su/a.getVal(i,i))
 return b

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef MatrixList ludcmp(Matrix a):
 #Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
 cdef MatrixList LU = MatrixList()
 cdef int n = a.rows
 cdef int i, j, k, imax
 cdef double big, dum, su, temp
 cdef Matrix vv = Matrix(n,1)
 cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
 cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
 cdef double TINY = 1.1e-16
 for i from 0 <= i < n: 
 big = 0.0
 for j from 0 <= j < n:
 temp=fabs(a.getVal(i,j))
 if (temp > big):
 big=temp
 if (big ==0.0):
 raise Exception("ERROR! ludcmp: Singular matrixn")
 vv.setVal(i,0,1.0/big)

 for j from 0 <= j < n:
 for i from 0 <= i < j: 
 su = a.getVal(i,j)
 for k from 0 <= k < i:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i,j,su)

 big=0.0
 for i from j<= i< n:
 su = a.getVal(i,j)
 for k from 0 <= k < j:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i, j, su)
 dum=vv.getVal(i,0)*fabs(su )
 if (dum >= big):
 big = dum
 imax = i

 if (j != imax):
 for k from 0 <= k < n:
 dum = a.getVal(imax,k)
 a.setVal(imax, k, a.getVal(j,k))
 a.setVal(j,k, dum)
 d.setVal(0, 0, -d.getVal(0,0))
 vv.setVal(imax, 0, vv.getVal(j, 0))
 indx.setVal(j, 0, imax)
 if (a.getVal(j,j) == 0.0):
 a.setVal(j,j, TINY)
 if (j != (n-1)):
 dum=1.0/a.getVal(j,j)
 for i from (j+1) <= i < n:
 a.setVal(i,j, a.getVal(i,j)*dum)
 LU.append(<Matrix>a)
 LU.append(<Matrix>indx)
 LU.append(<Matrix>d)
 return LU

matrix.pxd

from libcpp.vector cimport vector
cdef class MatrixList:
 cdef list inner
 cdef void append(self, Matrix a)
 cdef Matrix get(self, int i)

cdef class Matrix:
 cdef vector[double] *matrix 
 cdef size_t _rows
 cdef size_t _columns
 cdef bint Identity
 cdef bint ones

 cpdef double getVal(self, size_t r, size_t c)
 cpdef void setVal(self, size_t r, size_t c, double v)
 cpdef Matrix transpose(self)
 cdef void _IdentityMatrix(self)
 cdef void _fillWithOnes(self)
 cpdef Matrix Inv(self) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b) 
cdef MatrixList ludcmp(Matrix a)

I would like to know how is it possible to parallelize this code or improve its speed?

edited Jan 29 at 13:11

asked Jan 28 at 22:37

Dalek

747

matrix.pyx

cdef class Matrix: 
 def __cinit__(self, size_t rows=0, size_t columns=0, bint Identity=False, bint ones=False):
 self._rows=rows
 self._columns=columns
 self.matrix=new vector[double]()
 self.matrix.resize(rows*columns)

 if Identity:
 self._IdentityMatrix()

 if ones:
 self._fillWithOnes()

 def __dealloc__(self):
 del self.matrix

 property rows:
 def __get__(self):
 return self._rows
 def __set__(self, size_t x):
 self._rows = x 
 property columns:
 def __get__(self):
 return self._columns
 def __set__(self, size_t y):
 self._columns = y 

 cpdef double getVal(self, size_t r, size_t c):
 return self.matrix[0][r*self._columns+c]

 cpdef void setVal(self, size_t r, size_t c, double v): 
 self.matrix[0][r*self._columns+c] = v

 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef void _fillWithOnes(self):
 fill(self.matrix.begin(),self.matrix.end(),1.)

 cdef void _IdentityMatrix(self):
 cdef size_t i 
 if (self._rows!=self._columns):
 raise Exception('In order to generate identity matrix, the number of rows and columns must be equal')
 else:
 for i from 0 <= i <self._columns:
 self.setVal(i,i,1.0)
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cpdef Matrix Inv(self):

 cdef Matrix A_inverse = Matrix(self._rows,self._columns)
 cdef MatrixList LU = ludcmp(self)
 cdef Matrix A = LU.get(0)
 cdef Matrix indx = LU.get(1)
 cdef Matrix d = LU.get(2)
 cdef double det = d.getVal(0,0)
 cdef int i, j
 cdef np.ndarray[np.float64_t, ndim=2] L = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef np.ndarray[np.float64_t, ndim=2] U = np.zeros((self._rows,self._columns),dtype=np.float64)
 cdef Matrix col = Matrix(self._rows,1)
 for i from 0 <= i < self._rows: 
 for j from 0 <= j < self._columns: 
 if (j>i):
 U[i,j]=A.getVal(i,j)
 L[i,j]=0
 elif (j<i):
 U[i,j]=0
 L[i,j]=A.getVal(i,j)
 else:
 U[i,j]=A.getVal(i,j)
 L[i,j]=1
 print np.dot(L, U)
 for i from 0 <= i < self._rows: 
 det*= A.getVal(i,i)
 for j from 0 <= j < self._columns:
 if (i==j):
 col.setVal(j,0,1)
 else:
 col.setVal(j,0,0)
 col=lubksb(A, indx, col) 
 for j from 0 <= j < self._columns:
 A_inverse.setVal(j,i,col.getVal(j,0))
 print "determinant of matrix %.4f"%(det)
 return A_inverse



cdef class MatrixList:
 def __cinit__(self):
 self.inner = 

 cdef void append(self, Matrix a):
 self.inner.append(a)

 cdef Matrix get(self, int i):
 return <Matrix> self.inner[i]

 def __len__(self):
 return len(self.inner)

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b):
 cdef int n = a.rows
 cdef int i, ip, j
 cdef int ii = 0
 cdef double su
 for i from 0 <= i < n: 
 ip = <int>indx.getVal(i,0)
 su = b.getVal(ip,0)
 b.setVal(ip,0, b.getVal(i,0))
 if (ii>=0):
 for j from ii <= j <= (i-1): 
 su -= a.getVal(i,j) * b.getVal(j,0)
 elif (su):
 ii = i 
 b.setVal(i, 0, su)
 for i from n >= i >= 0: 
 su = b.getVal(i,0)
 for j from (i+1) <= j < n:
 su -= a.getVal(i,j) * b.getVal(j,0)
 if (a.getVal(i,i)==0.0):
 a.setVal(i,i, 1.1e-16)
 b.setVal(i, 0, su/a.getVal(i,i))
 return b

@cython.boundscheck(False)
@cython.wraparound(False) 
cdef MatrixList ludcmp(Matrix a):
 #Given a matrix a_nxn, this routine replaces it by the LU decomposition of a row-wise permutation of itself.
 cdef MatrixList LU = MatrixList()
 cdef int n = a.rows
 cdef int i, j, k, imax
 cdef double big, dum, su, temp
 cdef Matrix vv = Matrix(n,1)
 cdef Matrix indx = Matrix(n,1) #an output vector that records the row permutation effected by the partial pivoting
 cdef Matrix d = Matrix(1,1, ones= True) #an output as +1 or -1 depending on whether the number of row interchanges was even or odd, respectively
 cdef double TINY = 1.1e-16
 for i from 0 <= i < n: 
 big = 0.0
 for j from 0 <= j < n:
 temp=fabs(a.getVal(i,j))
 if (temp > big):
 big=temp
 if (big ==0.0):
 raise Exception("ERROR! ludcmp: Singular matrixn")
 vv.setVal(i,0,1.0/big)

 for j from 0 <= j < n:
 for i from 0 <= i < j: 
 su = a.getVal(i,j)
 for k from 0 <= k < i:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i,j,su)

 big=0.0
 for i from j<= i< n:
 su = a.getVal(i,j)
 for k from 0 <= k < j:
 su -= a.getVal(i,k)*a.getVal(k,j)
 a.setVal(i, j, su)
 dum=vv.getVal(i,0)*fabs(su )
 if (dum >= big):
 big = dum
 imax = i

 if (j != imax):
 for k from 0 <= k < n:
 dum = a.getVal(imax,k)
 a.setVal(imax, k, a.getVal(j,k))
 a.setVal(j,k, dum)
 d.setVal(0, 0, -d.getVal(0,0))
 vv.setVal(imax, 0, vv.getVal(j, 0))
 indx.setVal(j, 0, imax)
 if (a.getVal(j,j) == 0.0):
 a.setVal(j,j, TINY)
 if (j != (n-1)):
 dum=1.0/a.getVal(j,j)
 for i from (j+1) <= i < n:
 a.setVal(i,j, a.getVal(i,j)*dum)
 LU.append(<Matrix>a)
 LU.append(<Matrix>indx)
 LU.append(<Matrix>d)
 return LU

matrix.pxd

from libcpp.vector cimport vector
cdef class MatrixList:
 cdef list inner
 cdef void append(self, Matrix a)
 cdef Matrix get(self, int i)

cdef class Matrix:
 cdef vector[double] *matrix 
 cdef size_t _rows
 cdef size_t _columns
 cdef bint Identity
 cdef bint ones

 cpdef double getVal(self, size_t r, size_t c)
 cpdef void setVal(self, size_t r, size_t c, double v)
 cpdef Matrix transpose(self)
 cdef void _IdentityMatrix(self)
 cdef void _fillWithOnes(self)
 cpdef Matrix Inv(self) 
cdef Matrix lubksb(Matrix a, Matrix indx, Matrix b) 
cdef MatrixList ludcmp(Matrix a)

I would like to know how is it possible to parallelize this code or improve its speed?

edited Jan 29 at 13:11

asked Jan 28 at 22:37

Dalek

747

edited Jan 29 at 13:11

asked Jan 28 at 22:37

Dalek

747

asked Jan 28 at 22:37

Dalek

747

asked Jan 28 at 22:37

Dalek

747

1

Did you consider using scipy.linalg?
â€“Â Gareth Rees
Jan 29 at 13:31

@GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â€“Â Dalek
Jan 29 at 13:42

2

@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â€“Â Jorge BellÃ³n
Jan 30 at 9:46

add a commentÂ |Â

1

Did you consider using scipy.linalg?
â€“Â Gareth Rees
Jan 29 at 13:31

@GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â€“Â Dalek
Jan 29 at 13:42

2

@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â€“Â Jorge BellÃ³n
Jan 30 at 9:46

Did you consider using scipy.linalg?
â€“Â Gareth Rees
Jan 29 at 13:31

@GarethRees Before I install openssl and mkl-Intel, my code was a bit faster than numpy but now it is not. I was wondering whether I can reach to better speed by parallelizing my code or doing some tricks to improve its speed.
â€“Â Dalek
Jan 29 at 13:42

@Dalek Intel supports MKL through numpy. They make many clever tricks to speed up their routines. If you already have a MKL license, I would try to build numpy following these instructions. Otherwise, most of the open source linear algebra libraries I've tried have a reasonable multithreaded implementation (you usually need to compile them for this), and all of them use SIMD as efficiently as possible. I would try using numpy with a BLAS library: beating people that have been working on this for decades is not trivial.
â€“Â Jorge BellÃ³n
Jan 30 at 9:46

add a commentÂ |Â

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f186219%2fhow-to-speed-up-the-matrix-inversion-using-the-lu-decomposition%23new-answer', 'question_page');

);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr