Peak finding algorithm

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
6
down vote

favorite












This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.



How can I improve my coding to make it more efficient or concise?



import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])


def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)

loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))

bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))

if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])



def findmax(loc,matrix):
(bestloc,bestval)=(None,0)

for i in loc:

if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))

bestloc=i
bestval= get_value(i,matrix)


return (bestloc,bestval)

def get_value(i,matrix):

y=i[1]
x=i[0]

return matrix[x][y]

def bestneighbour(i,Nrow,Ncol,matrix):

y=i[0][1]
x=i[0][0]
bestval=i[1]

inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]

if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]

if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]

return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1






algo(matrix)








share|improve this question





















  • Is your data sorted?
    – Peilonrayz
    Apr 27 at 11:14










  • @Peilonrayz it can be either way,sorted or not sorted
    – sek fook tan
    Apr 27 at 12:10










  • @Georgy Sry,what I meant is 2d
    – sek fook tan
    May 6 at 10:15
















up vote
6
down vote

favorite












This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.



How can I improve my coding to make it more efficient or concise?



import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])


def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)

loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))

bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))

if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])



def findmax(loc,matrix):
(bestloc,bestval)=(None,0)

for i in loc:

if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))

bestloc=i
bestval= get_value(i,matrix)


return (bestloc,bestval)

def get_value(i,matrix):

y=i[1]
x=i[0]

return matrix[x][y]

def bestneighbour(i,Nrow,Ncol,matrix):

y=i[0][1]
x=i[0][0]
bestval=i[1]

inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]

if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]

if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]

return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1






algo(matrix)








share|improve this question





















  • Is your data sorted?
    – Peilonrayz
    Apr 27 at 11:14










  • @Peilonrayz it can be either way,sorted or not sorted
    – sek fook tan
    Apr 27 at 12:10










  • @Georgy Sry,what I meant is 2d
    – sek fook tan
    May 6 at 10:15












up vote
6
down vote

favorite









up vote
6
down vote

favorite











This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.



How can I improve my coding to make it more efficient or concise?



import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])


def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)

loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))

bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))

if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])



def findmax(loc,matrix):
(bestloc,bestval)=(None,0)

for i in loc:

if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))

bestloc=i
bestval= get_value(i,matrix)


return (bestloc,bestval)

def get_value(i,matrix):

y=i[1]
x=i[0]

return matrix[x][y]

def bestneighbour(i,Nrow,Ncol,matrix):

y=i[0][1]
x=i[0][0]
bestval=i[1]

inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]

if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]

if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]

return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1






algo(matrix)








share|improve this question













This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.



How can I improve my coding to make it more efficient or concise?



import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])


def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)

loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))

bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))

if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])



def findmax(loc,matrix):
(bestloc,bestval)=(None,0)

for i in loc:

if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))

bestloc=i
bestval= get_value(i,matrix)


return (bestloc,bestval)

def get_value(i,matrix):

y=i[1]
x=i[0]

return matrix[x][y]

def bestneighbour(i,Nrow,Ncol,matrix):

y=i[0][1]
x=i[0][0]
bestval=i[1]

inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]

if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]

if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]

return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1






algo(matrix)










share|improve this question












share|improve this question




share|improve this question








edited May 6 at 12:53









Georgy

5521415




5521415









asked Apr 27 at 10:58









sek fook tan

313




313











  • Is your data sorted?
    – Peilonrayz
    Apr 27 at 11:14










  • @Peilonrayz it can be either way,sorted or not sorted
    – sek fook tan
    Apr 27 at 12:10










  • @Georgy Sry,what I meant is 2d
    – sek fook tan
    May 6 at 10:15
















  • Is your data sorted?
    – Peilonrayz
    Apr 27 at 11:14










  • @Peilonrayz it can be either way,sorted or not sorted
    – sek fook tan
    Apr 27 at 12:10










  • @Georgy Sry,what I meant is 2d
    – sek fook tan
    May 6 at 10:15















Is your data sorted?
– Peilonrayz
Apr 27 at 11:14




Is your data sorted?
– Peilonrayz
Apr 27 at 11:14












@Peilonrayz it can be either way,sorted or not sorted
– sek fook tan
Apr 27 at 12:10




@Peilonrayz it can be either way,sorted or not sorted
– sek fook tan
Apr 27 at 12:10












@Georgy Sry,what I meant is 2d
– sek fook tan
May 6 at 10:15




@Georgy Sry,what I meant is 2d
– sek fook tan
May 6 at 10:15










2 Answers
2






active

oldest

votes

















up vote
6
down vote














1. algo



I can see 3 areas of local improvements here:



for i in problem:
Ncol=len(i)
break


doing a loop to just set Ncol for the first element is really ugly. Since your list isn't empty just do:



Ncol = len(problem[0])


Now why 3 lines for this:



subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))


when you can create your list directly:



subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]


and last thing, learn about list comprehensions for things like this:



loc=
for i in range(Nrow):
loc.append((i,mid))


that can be rewritten as



loc = [(i,mid) for i in range(Nrow)]



2. findmax / getvalue



As mentionned in comments, the indentation of return statement is off, but since the code works, I'm assuming that it's just a typo. Here:



for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
bestloc=i
bestval= get_value(i,matrix)

return (bestloc,bestval)



  • get_value is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value

  • set bestloc to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test for None in the whole loop. For this we have to handle the "loc is empty" cornercase at start.

So rewrite function like:



def findmax(loc,matrix):
if not loc:
# get rid of the "loc is empty" case
return (None,0)
bestloc,bestval = loc[0]

for i in loc:
x,y = i # unpack from i, no indice access
mvalue = matrix[x][y]

if bestval < mvalue:
bestloc = i
bestval = mvalue

return (bestloc,bestval)


only one access to the matrix, no unnecessary test for None, no function calls, faster index access.




get_subproblem



  • Use chaining comparisons since row and col are tested twice (now you can group both conditions)

  • don't slice then return, return the slice

like this:



for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]


if you don't care about the print you could unpack directly in loop:



for start_R,start_C,num_of_row,num_of_col in sub:





share|improve this answer






























    up vote
    3
    down vote













    When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.



    Now let's discuss the style first.



    Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:



    1. Functions should be surrounded by two blank lines.

    2. Operators should be surrounded with whitespaces. Add whitespaces after commas as well.

    3. Both variable and function names should be lowercase, with words separated by underscores.


    4. Comparison with None should be always done by is or is not, and never by equality operators.


    5. Continuation lines should be aligned vertically. This goes to your matrix declaration. It can be, for example, like this:



      matrix = np.array([[3, 2, 3], 
      [100,55,7],
      [344,9,37]])


    Apart from that I would also recommend you:



    1. Remove all commented code.

    2. Be consistent with blank lines. In algo function your code is pretty dense and not very readable, while in findmax there are just too many of them. Don't use 2 blank lines inside functions.


    3. Use chained comparison. For example, your



      if start_R<=row and row<start_R+num_of_row:


      should become



      if start_r <= row < start_r + num_of_row:



    4. Use self-descriptive names for functions and variables. Some suggestions:



      algo -> peak
      problem -> matrix
      nrow -> rows_count
      ncol -> columns_count
      mid -> middle_column_index
      loc -> location
      val -> value
      i -> index
      ...



    5. Remove redundant parentheses around tuples. For example:



      (x, y) = bestloc1 -> x, y = bestloc1
      (bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
      return (bestloc, bestval) -> return bestloc, bestval
      ...



    6. Use one-line assignments with a tuple. Instead of



      y = i[0][1]
      x = i[0][0]


      you should write



      x, y = i[0]



    7. Instead of indexing numpy arrays like this:



      matrix[x][y]


      use the following:



      matrix[x, y]


      They both produce the same result but the first one is more inefficient.




    8. Wrap the call of the main function along with matrix declaration in the following construct:



      if __name__ == '__main__':
      matrix = np.array([[3, 2, 3],
      [100, 55, 7],
      [344, 9, 37]])
      peak(matrix)


      For explanations see here.




    9. Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".

      As an example, this is how signature of your get_value will look like:



      def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:


      But it won't look good for some other functions in your case, like for bestneighbor. This is not fault of type hints but of the way how you defined i in your code before:



      def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
      nrow: int,
      ncol: int,
      matrix: np.ndarray) -> Tuple[int, int]:


      After some refactoring it can be significantly simplified.
      Don't forget that List, Tuple and Optional should be imported from typing module.




    Now, let's get to more interesting stuff.




    1. First thing that you are doing is checking if there are no elements in the matrix




      Nrow=len(problem)
      for i in problem:
      Ncol=len(i)
      break
      if Nrow <=0 or Ncol<=0:
      return None



      This can be done much easier:



      if matrix.size == 0:
      return None



    2. Getting number of rows and columns can be done by accessing shape attribute of numpy array:



      rows_count, columns_count = matrix.shape



    3. Here goes crazy staff:




      (substartR,subnrow)=(0,Nrow)
      (substartc1,subncol1)=(0,mid)
      (substartc2,subncol2)=(mid,Ncol-mid+1)
      subproblem=
      subproblem.append((substartR,substartc1,subnrow,subncol1))
      subproblem.append((substartR,substartc2,subnrow,subncol2))



      No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.

      Left part:



      matrix[:, :middle_column_index]


      Right part:



      matrix[:, middle_column_index + 1:]


      I suggest you to read carefully about indexing in numpy.




    4. Here you are calculating list of indices of a middle column:




      loc=
      for i in range(Nrow):
      loc.append((i,mid))



      But then later in findmax you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?

      For example, let's get that middle column first:



      middle_column_index = matrix.shape[1] // 2
      middle_column = matrix[:, middle_column_index]


      And then get index of a maximum value by np.argmax and its value:



      max_index = np.argmax(middle_column)
      max_value = middle_column[max_index]



    5. Next you are taking that maximum and compare it against its neighbors in bestneighbour. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:



      row = matrix[max_index]
      neighbours = row[max(0, middle_column_index - 1)
      : min(len(row), middle_column_index + 1) + 1]


      This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".




    6. Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by get_subproblem, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:



      if max_value == np.amax(neighbours):
      return max_value

      if neighbours[0] > neighbours[-1]:
      return peak(matrix[:, :middle_column_index])

      return peak(matrix[:, middle_column_index + 1:])


      where np.amax returns a maximum of array.




    In the end after all refactoring this is what I've got:



    from typing import Optional

    import numpy as np


    def peak(matrix: np.ndarray) -> Optional[int]:
    """
    Finds the peak element in the matrix recursively
    starting from the middle column.
    :param matrix: input array
    :return: peak value
    """
    if matrix.size == 0:
    return None

    middle_column_index = matrix.shape[1] // 2

    middle_column = matrix[:, middle_column_index]
    max_index = np.argmax(middle_column)
    max_value = middle_column[max_index]

    row = matrix[max_index]
    neighbours = row[max(0, middle_column_index - 1)
    : min(len(row), middle_column_index + 1) + 1]

    if max_value == np.amax(neighbours):
    return max_value

    if neighbours[0] > neighbours[-1]:
    return peak(matrix[:, :middle_column_index])

    return peak(matrix[:, middle_column_index + 1:])


    if __name__ == '__main__':
    matrix_example = np.array([[3, 2, 3],
    [100, 55, 7],
    [344, 9, 37]])
    print(peak(matrix_example))



    There is still some work left for you to do here:



    1. Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum, np.argmax should return all the indices. Will it break the program?

    2. Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".

    3. Change the docstring. I was lazy to come up with something good there :)





    share|improve this answer

















    • 1




      Thx mate,it really helps me a lot
      – sek fook tan
      May 6 at 10:15










    Your Answer




    StackExchange.ifUsing("editor", function ()
    return StackExchange.using("mathjaxEditing", function ()
    StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
    StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
    );
    );
    , "mathjax-editing");

    StackExchange.ifUsing("editor", function ()
    StackExchange.using("externalEditor", function ()
    StackExchange.using("snippets", function ()
    StackExchange.snippets.init();
    );
    );
    , "code-snippets");

    StackExchange.ready(function()
    var channelOptions =
    tags: "".split(" "),
    id: "196"
    ;
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function()
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled)
    StackExchange.using("snippets", function()
    createEditor();
    );

    else
    createEditor();

    );

    function createEditor()
    StackExchange.prepareEditor(
    heartbeatType: 'answer',
    convertImagesToLinks: false,
    noModals: false,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: null,
    bindNavPrevention: true,
    postfix: "",
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    );



    );








     

    draft saved


    draft discarded


















    StackExchange.ready(
    function ()
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f193067%2fpeak-finding-algorithm%23new-answer', 'question_page');

    );

    Post as a guest






























    2 Answers
    2






    active

    oldest

    votes








    2 Answers
    2






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes








    up vote
    6
    down vote














    1. algo



    I can see 3 areas of local improvements here:



    for i in problem:
    Ncol=len(i)
    break


    doing a loop to just set Ncol for the first element is really ugly. Since your list isn't empty just do:



    Ncol = len(problem[0])


    Now why 3 lines for this:



    subproblem=
    subproblem.append((substartR,substartc1,subnrow,subncol1))
    subproblem.append((substartR,substartc2,subnrow,subncol2))


    when you can create your list directly:



    subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]


    and last thing, learn about list comprehensions for things like this:



    loc=
    for i in range(Nrow):
    loc.append((i,mid))


    that can be rewritten as



    loc = [(i,mid) for i in range(Nrow)]



    2. findmax / getvalue



    As mentionned in comments, the indentation of return statement is off, but since the code works, I'm assuming that it's just a typo. Here:



    for i in loc:
    if bestloc == None or bestval < get_value(i,matrix) :
    bestloc=i
    bestval= get_value(i,matrix)

    return (bestloc,bestval)



    • get_value is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value

    • set bestloc to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test for None in the whole loop. For this we have to handle the "loc is empty" cornercase at start.

    So rewrite function like:



    def findmax(loc,matrix):
    if not loc:
    # get rid of the "loc is empty" case
    return (None,0)
    bestloc,bestval = loc[0]

    for i in loc:
    x,y = i # unpack from i, no indice access
    mvalue = matrix[x][y]

    if bestval < mvalue:
    bestloc = i
    bestval = mvalue

    return (bestloc,bestval)


    only one access to the matrix, no unnecessary test for None, no function calls, faster index access.




    get_subproblem



    • Use chaining comparisons since row and col are tested twice (now you can group both conditions)

    • don't slice then return, return the slice

    like this:



    for i in sub:
    print(i)
    (start_R,start_C,num_of_row,num_of_col)=i
    if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
    return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]


    if you don't care about the print you could unpack directly in loop:



    for start_R,start_C,num_of_row,num_of_col in sub:





    share|improve this answer



























      up vote
      6
      down vote














      1. algo



      I can see 3 areas of local improvements here:



      for i in problem:
      Ncol=len(i)
      break


      doing a loop to just set Ncol for the first element is really ugly. Since your list isn't empty just do:



      Ncol = len(problem[0])


      Now why 3 lines for this:



      subproblem=
      subproblem.append((substartR,substartc1,subnrow,subncol1))
      subproblem.append((substartR,substartc2,subnrow,subncol2))


      when you can create your list directly:



      subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]


      and last thing, learn about list comprehensions for things like this:



      loc=
      for i in range(Nrow):
      loc.append((i,mid))


      that can be rewritten as



      loc = [(i,mid) for i in range(Nrow)]



      2. findmax / getvalue



      As mentionned in comments, the indentation of return statement is off, but since the code works, I'm assuming that it's just a typo. Here:



      for i in loc:
      if bestloc == None or bestval < get_value(i,matrix) :
      bestloc=i
      bestval= get_value(i,matrix)

      return (bestloc,bestval)



      • get_value is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value

      • set bestloc to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test for None in the whole loop. For this we have to handle the "loc is empty" cornercase at start.

      So rewrite function like:



      def findmax(loc,matrix):
      if not loc:
      # get rid of the "loc is empty" case
      return (None,0)
      bestloc,bestval = loc[0]

      for i in loc:
      x,y = i # unpack from i, no indice access
      mvalue = matrix[x][y]

      if bestval < mvalue:
      bestloc = i
      bestval = mvalue

      return (bestloc,bestval)


      only one access to the matrix, no unnecessary test for None, no function calls, faster index access.




      get_subproblem



      • Use chaining comparisons since row and col are tested twice (now you can group both conditions)

      • don't slice then return, return the slice

      like this:



      for i in sub:
      print(i)
      (start_R,start_C,num_of_row,num_of_col)=i
      if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
      return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]


      if you don't care about the print you could unpack directly in loop:



      for start_R,start_C,num_of_row,num_of_col in sub:





      share|improve this answer

























        up vote
        6
        down vote










        up vote
        6
        down vote










        1. algo



        I can see 3 areas of local improvements here:



        for i in problem:
        Ncol=len(i)
        break


        doing a loop to just set Ncol for the first element is really ugly. Since your list isn't empty just do:



        Ncol = len(problem[0])


        Now why 3 lines for this:



        subproblem=
        subproblem.append((substartR,substartc1,subnrow,subncol1))
        subproblem.append((substartR,substartc2,subnrow,subncol2))


        when you can create your list directly:



        subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]


        and last thing, learn about list comprehensions for things like this:



        loc=
        for i in range(Nrow):
        loc.append((i,mid))


        that can be rewritten as



        loc = [(i,mid) for i in range(Nrow)]



        2. findmax / getvalue



        As mentionned in comments, the indentation of return statement is off, but since the code works, I'm assuming that it's just a typo. Here:



        for i in loc:
        if bestloc == None or bestval < get_value(i,matrix) :
        bestloc=i
        bestval= get_value(i,matrix)

        return (bestloc,bestval)



        • get_value is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value

        • set bestloc to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test for None in the whole loop. For this we have to handle the "loc is empty" cornercase at start.

        So rewrite function like:



        def findmax(loc,matrix):
        if not loc:
        # get rid of the "loc is empty" case
        return (None,0)
        bestloc,bestval = loc[0]

        for i in loc:
        x,y = i # unpack from i, no indice access
        mvalue = matrix[x][y]

        if bestval < mvalue:
        bestloc = i
        bestval = mvalue

        return (bestloc,bestval)


        only one access to the matrix, no unnecessary test for None, no function calls, faster index access.




        get_subproblem



        • Use chaining comparisons since row and col are tested twice (now you can group both conditions)

        • don't slice then return, return the slice

        like this:



        for i in sub:
        print(i)
        (start_R,start_C,num_of_row,num_of_col)=i
        if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
        return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]


        if you don't care about the print you could unpack directly in loop:



        for start_R,start_C,num_of_row,num_of_col in sub:





        share|improve this answer
















        1. algo



        I can see 3 areas of local improvements here:



        for i in problem:
        Ncol=len(i)
        break


        doing a loop to just set Ncol for the first element is really ugly. Since your list isn't empty just do:



        Ncol = len(problem[0])


        Now why 3 lines for this:



        subproblem=
        subproblem.append((substartR,substartc1,subnrow,subncol1))
        subproblem.append((substartR,substartc2,subnrow,subncol2))


        when you can create your list directly:



        subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]


        and last thing, learn about list comprehensions for things like this:



        loc=
        for i in range(Nrow):
        loc.append((i,mid))


        that can be rewritten as



        loc = [(i,mid) for i in range(Nrow)]



        2. findmax / getvalue



        As mentionned in comments, the indentation of return statement is off, but since the code works, I'm assuming that it's just a typo. Here:



        for i in loc:
        if bestloc == None or bestval < get_value(i,matrix) :
        bestloc=i
        bestval= get_value(i,matrix)

        return (bestloc,bestval)



        • get_value is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value

        • set bestloc to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test for None in the whole loop. For this we have to handle the "loc is empty" cornercase at start.

        So rewrite function like:



        def findmax(loc,matrix):
        if not loc:
        # get rid of the "loc is empty" case
        return (None,0)
        bestloc,bestval = loc[0]

        for i in loc:
        x,y = i # unpack from i, no indice access
        mvalue = matrix[x][y]

        if bestval < mvalue:
        bestloc = i
        bestval = mvalue

        return (bestloc,bestval)


        only one access to the matrix, no unnecessary test for None, no function calls, faster index access.




        get_subproblem



        • Use chaining comparisons since row and col are tested twice (now you can group both conditions)

        • don't slice then return, return the slice

        like this:



        for i in sub:
        print(i)
        (start_R,start_C,num_of_row,num_of_col)=i
        if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
        return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]


        if you don't care about the print you could unpack directly in loop:



        for start_R,start_C,num_of_row,num_of_col in sub:






        share|improve this answer















        share|improve this answer



        share|improve this answer








        edited Apr 28 at 10:45


























        answered Apr 27 at 20:21









        Jean-François Fabre

        783210




        783210






















            up vote
            3
            down vote













            When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.



            Now let's discuss the style first.



            Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:



            1. Functions should be surrounded by two blank lines.

            2. Operators should be surrounded with whitespaces. Add whitespaces after commas as well.

            3. Both variable and function names should be lowercase, with words separated by underscores.


            4. Comparison with None should be always done by is or is not, and never by equality operators.


            5. Continuation lines should be aligned vertically. This goes to your matrix declaration. It can be, for example, like this:



              matrix = np.array([[3, 2, 3], 
              [100,55,7],
              [344,9,37]])


            Apart from that I would also recommend you:



            1. Remove all commented code.

            2. Be consistent with blank lines. In algo function your code is pretty dense and not very readable, while in findmax there are just too many of them. Don't use 2 blank lines inside functions.


            3. Use chained comparison. For example, your



              if start_R<=row and row<start_R+num_of_row:


              should become



              if start_r <= row < start_r + num_of_row:



            4. Use self-descriptive names for functions and variables. Some suggestions:



              algo -> peak
              problem -> matrix
              nrow -> rows_count
              ncol -> columns_count
              mid -> middle_column_index
              loc -> location
              val -> value
              i -> index
              ...



            5. Remove redundant parentheses around tuples. For example:



              (x, y) = bestloc1 -> x, y = bestloc1
              (bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
              return (bestloc, bestval) -> return bestloc, bestval
              ...



            6. Use one-line assignments with a tuple. Instead of



              y = i[0][1]
              x = i[0][0]


              you should write



              x, y = i[0]



            7. Instead of indexing numpy arrays like this:



              matrix[x][y]


              use the following:



              matrix[x, y]


              They both produce the same result but the first one is more inefficient.




            8. Wrap the call of the main function along with matrix declaration in the following construct:



              if __name__ == '__main__':
              matrix = np.array([[3, 2, 3],
              [100, 55, 7],
              [344, 9, 37]])
              peak(matrix)


              For explanations see here.




            9. Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".

              As an example, this is how signature of your get_value will look like:



              def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:


              But it won't look good for some other functions in your case, like for bestneighbor. This is not fault of type hints but of the way how you defined i in your code before:



              def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
              nrow: int,
              ncol: int,
              matrix: np.ndarray) -> Tuple[int, int]:


              After some refactoring it can be significantly simplified.
              Don't forget that List, Tuple and Optional should be imported from typing module.




            Now, let's get to more interesting stuff.




            1. First thing that you are doing is checking if there are no elements in the matrix




              Nrow=len(problem)
              for i in problem:
              Ncol=len(i)
              break
              if Nrow <=0 or Ncol<=0:
              return None



              This can be done much easier:



              if matrix.size == 0:
              return None



            2. Getting number of rows and columns can be done by accessing shape attribute of numpy array:



              rows_count, columns_count = matrix.shape



            3. Here goes crazy staff:




              (substartR,subnrow)=(0,Nrow)
              (substartc1,subncol1)=(0,mid)
              (substartc2,subncol2)=(mid,Ncol-mid+1)
              subproblem=
              subproblem.append((substartR,substartc1,subnrow,subncol1))
              subproblem.append((substartR,substartc2,subnrow,subncol2))



              No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.

              Left part:



              matrix[:, :middle_column_index]


              Right part:



              matrix[:, middle_column_index + 1:]


              I suggest you to read carefully about indexing in numpy.




            4. Here you are calculating list of indices of a middle column:




              loc=
              for i in range(Nrow):
              loc.append((i,mid))



              But then later in findmax you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?

              For example, let's get that middle column first:



              middle_column_index = matrix.shape[1] // 2
              middle_column = matrix[:, middle_column_index]


              And then get index of a maximum value by np.argmax and its value:



              max_index = np.argmax(middle_column)
              max_value = middle_column[max_index]



            5. Next you are taking that maximum and compare it against its neighbors in bestneighbour. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:



              row = matrix[max_index]
              neighbours = row[max(0, middle_column_index - 1)
              : min(len(row), middle_column_index + 1) + 1]


              This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".




            6. Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by get_subproblem, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:



              if max_value == np.amax(neighbours):
              return max_value

              if neighbours[0] > neighbours[-1]:
              return peak(matrix[:, :middle_column_index])

              return peak(matrix[:, middle_column_index + 1:])


              where np.amax returns a maximum of array.




            In the end after all refactoring this is what I've got:



            from typing import Optional

            import numpy as np


            def peak(matrix: np.ndarray) -> Optional[int]:
            """
            Finds the peak element in the matrix recursively
            starting from the middle column.
            :param matrix: input array
            :return: peak value
            """
            if matrix.size == 0:
            return None

            middle_column_index = matrix.shape[1] // 2

            middle_column = matrix[:, middle_column_index]
            max_index = np.argmax(middle_column)
            max_value = middle_column[max_index]

            row = matrix[max_index]
            neighbours = row[max(0, middle_column_index - 1)
            : min(len(row), middle_column_index + 1) + 1]

            if max_value == np.amax(neighbours):
            return max_value

            if neighbours[0] > neighbours[-1]:
            return peak(matrix[:, :middle_column_index])

            return peak(matrix[:, middle_column_index + 1:])


            if __name__ == '__main__':
            matrix_example = np.array([[3, 2, 3],
            [100, 55, 7],
            [344, 9, 37]])
            print(peak(matrix_example))



            There is still some work left for you to do here:



            1. Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum, np.argmax should return all the indices. Will it break the program?

            2. Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".

            3. Change the docstring. I was lazy to come up with something good there :)





            share|improve this answer

















            • 1




              Thx mate,it really helps me a lot
              – sek fook tan
              May 6 at 10:15














            up vote
            3
            down vote













            When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.



            Now let's discuss the style first.



            Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:



            1. Functions should be surrounded by two blank lines.

            2. Operators should be surrounded with whitespaces. Add whitespaces after commas as well.

            3. Both variable and function names should be lowercase, with words separated by underscores.


            4. Comparison with None should be always done by is or is not, and never by equality operators.


            5. Continuation lines should be aligned vertically. This goes to your matrix declaration. It can be, for example, like this:



              matrix = np.array([[3, 2, 3], 
              [100,55,7],
              [344,9,37]])


            Apart from that I would also recommend you:



            1. Remove all commented code.

            2. Be consistent with blank lines. In algo function your code is pretty dense and not very readable, while in findmax there are just too many of them. Don't use 2 blank lines inside functions.


            3. Use chained comparison. For example, your



              if start_R<=row and row<start_R+num_of_row:


              should become



              if start_r <= row < start_r + num_of_row:



            4. Use self-descriptive names for functions and variables. Some suggestions:



              algo -> peak
              problem -> matrix
              nrow -> rows_count
              ncol -> columns_count
              mid -> middle_column_index
              loc -> location
              val -> value
              i -> index
              ...



            5. Remove redundant parentheses around tuples. For example:



              (x, y) = bestloc1 -> x, y = bestloc1
              (bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
              return (bestloc, bestval) -> return bestloc, bestval
              ...



            6. Use one-line assignments with a tuple. Instead of



              y = i[0][1]
              x = i[0][0]


              you should write



              x, y = i[0]



            7. Instead of indexing numpy arrays like this:



              matrix[x][y]


              use the following:



              matrix[x, y]


              They both produce the same result but the first one is more inefficient.




            8. Wrap the call of the main function along with matrix declaration in the following construct:



              if __name__ == '__main__':
              matrix = np.array([[3, 2, 3],
              [100, 55, 7],
              [344, 9, 37]])
              peak(matrix)


              For explanations see here.




            9. Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".

              As an example, this is how signature of your get_value will look like:



              def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:


              But it won't look good for some other functions in your case, like for bestneighbor. This is not fault of type hints but of the way how you defined i in your code before:



              def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
              nrow: int,
              ncol: int,
              matrix: np.ndarray) -> Tuple[int, int]:


              After some refactoring it can be significantly simplified.
              Don't forget that List, Tuple and Optional should be imported from typing module.




            Now, let's get to more interesting stuff.




            1. First thing that you are doing is checking if there are no elements in the matrix




              Nrow=len(problem)
              for i in problem:
              Ncol=len(i)
              break
              if Nrow <=0 or Ncol<=0:
              return None



              This can be done much easier:



              if matrix.size == 0:
              return None



            2. Getting number of rows and columns can be done by accessing shape attribute of numpy array:



              rows_count, columns_count = matrix.shape



            3. Here goes crazy staff:




              (substartR,subnrow)=(0,Nrow)
              (substartc1,subncol1)=(0,mid)
              (substartc2,subncol2)=(mid,Ncol-mid+1)
              subproblem=
              subproblem.append((substartR,substartc1,subnrow,subncol1))
              subproblem.append((substartR,substartc2,subnrow,subncol2))



              No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.

              Left part:



              matrix[:, :middle_column_index]


              Right part:



              matrix[:, middle_column_index + 1:]


              I suggest you to read carefully about indexing in numpy.




            4. Here you are calculating list of indices of a middle column:




              loc=
              for i in range(Nrow):
              loc.append((i,mid))



              But then later in findmax you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?

              For example, let's get that middle column first:



              middle_column_index = matrix.shape[1] // 2
              middle_column = matrix[:, middle_column_index]


              And then get index of a maximum value by np.argmax and its value:



              max_index = np.argmax(middle_column)
              max_value = middle_column[max_index]



            5. Next you are taking that maximum and compare it against its neighbors in bestneighbour. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:



              row = matrix[max_index]
              neighbours = row[max(0, middle_column_index - 1)
              : min(len(row), middle_column_index + 1) + 1]


              This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".




            6. Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by get_subproblem, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:



              if max_value == np.amax(neighbours):
              return max_value

              if neighbours[0] > neighbours[-1]:
              return peak(matrix[:, :middle_column_index])

              return peak(matrix[:, middle_column_index + 1:])


              where np.amax returns a maximum of array.




            In the end after all refactoring this is what I've got:



            from typing import Optional

            import numpy as np


            def peak(matrix: np.ndarray) -> Optional[int]:
            """
            Finds the peak element in the matrix recursively
            starting from the middle column.
            :param matrix: input array
            :return: peak value
            """
            if matrix.size == 0:
            return None

            middle_column_index = matrix.shape[1] // 2

            middle_column = matrix[:, middle_column_index]
            max_index = np.argmax(middle_column)
            max_value = middle_column[max_index]

            row = matrix[max_index]
            neighbours = row[max(0, middle_column_index - 1)
            : min(len(row), middle_column_index + 1) + 1]

            if max_value == np.amax(neighbours):
            return max_value

            if neighbours[0] > neighbours[-1]:
            return peak(matrix[:, :middle_column_index])

            return peak(matrix[:, middle_column_index + 1:])


            if __name__ == '__main__':
            matrix_example = np.array([[3, 2, 3],
            [100, 55, 7],
            [344, 9, 37]])
            print(peak(matrix_example))



            There is still some work left for you to do here:



            1. Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum, np.argmax should return all the indices. Will it break the program?

            2. Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".

            3. Change the docstring. I was lazy to come up with something good there :)





            share|improve this answer

















            • 1




              Thx mate,it really helps me a lot
              – sek fook tan
              May 6 at 10:15












            up vote
            3
            down vote










            up vote
            3
            down vote









            When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.



            Now let's discuss the style first.



            Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:



            1. Functions should be surrounded by two blank lines.

            2. Operators should be surrounded with whitespaces. Add whitespaces after commas as well.

            3. Both variable and function names should be lowercase, with words separated by underscores.


            4. Comparison with None should be always done by is or is not, and never by equality operators.


            5. Continuation lines should be aligned vertically. This goes to your matrix declaration. It can be, for example, like this:



              matrix = np.array([[3, 2, 3], 
              [100,55,7],
              [344,9,37]])


            Apart from that I would also recommend you:



            1. Remove all commented code.

            2. Be consistent with blank lines. In algo function your code is pretty dense and not very readable, while in findmax there are just too many of them. Don't use 2 blank lines inside functions.


            3. Use chained comparison. For example, your



              if start_R<=row and row<start_R+num_of_row:


              should become



              if start_r <= row < start_r + num_of_row:



            4. Use self-descriptive names for functions and variables. Some suggestions:



              algo -> peak
              problem -> matrix
              nrow -> rows_count
              ncol -> columns_count
              mid -> middle_column_index
              loc -> location
              val -> value
              i -> index
              ...



            5. Remove redundant parentheses around tuples. For example:



              (x, y) = bestloc1 -> x, y = bestloc1
              (bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
              return (bestloc, bestval) -> return bestloc, bestval
              ...



            6. Use one-line assignments with a tuple. Instead of



              y = i[0][1]
              x = i[0][0]


              you should write



              x, y = i[0]



            7. Instead of indexing numpy arrays like this:



              matrix[x][y]


              use the following:



              matrix[x, y]


              They both produce the same result but the first one is more inefficient.




            8. Wrap the call of the main function along with matrix declaration in the following construct:



              if __name__ == '__main__':
              matrix = np.array([[3, 2, 3],
              [100, 55, 7],
              [344, 9, 37]])
              peak(matrix)


              For explanations see here.




            9. Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".

              As an example, this is how signature of your get_value will look like:



              def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:


              But it won't look good for some other functions in your case, like for bestneighbor. This is not fault of type hints but of the way how you defined i in your code before:



              def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
              nrow: int,
              ncol: int,
              matrix: np.ndarray) -> Tuple[int, int]:


              After some refactoring it can be significantly simplified.
              Don't forget that List, Tuple and Optional should be imported from typing module.




            Now, let's get to more interesting stuff.




            1. First thing that you are doing is checking if there are no elements in the matrix




              Nrow=len(problem)
              for i in problem:
              Ncol=len(i)
              break
              if Nrow <=0 or Ncol<=0:
              return None



              This can be done much easier:



              if matrix.size == 0:
              return None



            2. Getting number of rows and columns can be done by accessing shape attribute of numpy array:



              rows_count, columns_count = matrix.shape



            3. Here goes crazy staff:




              (substartR,subnrow)=(0,Nrow)
              (substartc1,subncol1)=(0,mid)
              (substartc2,subncol2)=(mid,Ncol-mid+1)
              subproblem=
              subproblem.append((substartR,substartc1,subnrow,subncol1))
              subproblem.append((substartR,substartc2,subnrow,subncol2))



              No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.

              Left part:



              matrix[:, :middle_column_index]


              Right part:



              matrix[:, middle_column_index + 1:]


              I suggest you to read carefully about indexing in numpy.




            4. Here you are calculating list of indices of a middle column:




              loc=
              for i in range(Nrow):
              loc.append((i,mid))



              But then later in findmax you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?

              For example, let's get that middle column first:



              middle_column_index = matrix.shape[1] // 2
              middle_column = matrix[:, middle_column_index]


              And then get index of a maximum value by np.argmax and its value:



              max_index = np.argmax(middle_column)
              max_value = middle_column[max_index]



            5. Next you are taking that maximum and compare it against its neighbors in bestneighbour. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:



              row = matrix[max_index]
              neighbours = row[max(0, middle_column_index - 1)
              : min(len(row), middle_column_index + 1) + 1]


              This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".




            6. Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by get_subproblem, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:



              if max_value == np.amax(neighbours):
              return max_value

              if neighbours[0] > neighbours[-1]:
              return peak(matrix[:, :middle_column_index])

              return peak(matrix[:, middle_column_index + 1:])


              where np.amax returns a maximum of array.




            In the end after all refactoring this is what I've got:



            from typing import Optional

            import numpy as np


            def peak(matrix: np.ndarray) -> Optional[int]:
            """
            Finds the peak element in the matrix recursively
            starting from the middle column.
            :param matrix: input array
            :return: peak value
            """
            if matrix.size == 0:
            return None

            middle_column_index = matrix.shape[1] // 2

            middle_column = matrix[:, middle_column_index]
            max_index = np.argmax(middle_column)
            max_value = middle_column[max_index]

            row = matrix[max_index]
            neighbours = row[max(0, middle_column_index - 1)
            : min(len(row), middle_column_index + 1) + 1]

            if max_value == np.amax(neighbours):
            return max_value

            if neighbours[0] > neighbours[-1]:
            return peak(matrix[:, :middle_column_index])

            return peak(matrix[:, middle_column_index + 1:])


            if __name__ == '__main__':
            matrix_example = np.array([[3, 2, 3],
            [100, 55, 7],
            [344, 9, 37]])
            print(peak(matrix_example))



            There is still some work left for you to do here:



            1. Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum, np.argmax should return all the indices. Will it break the program?

            2. Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".

            3. Change the docstring. I was lazy to come up with something good there :)





            share|improve this answer













            When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.



            Now let's discuss the style first.



            Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:



            1. Functions should be surrounded by two blank lines.

            2. Operators should be surrounded with whitespaces. Add whitespaces after commas as well.

            3. Both variable and function names should be lowercase, with words separated by underscores.


            4. Comparison with None should be always done by is or is not, and never by equality operators.


            5. Continuation lines should be aligned vertically. This goes to your matrix declaration. It can be, for example, like this:



              matrix = np.array([[3, 2, 3], 
              [100,55,7],
              [344,9,37]])


            Apart from that I would also recommend you:



            1. Remove all commented code.

            2. Be consistent with blank lines. In algo function your code is pretty dense and not very readable, while in findmax there are just too many of them. Don't use 2 blank lines inside functions.


            3. Use chained comparison. For example, your



              if start_R<=row and row<start_R+num_of_row:


              should become



              if start_r <= row < start_r + num_of_row:



            4. Use self-descriptive names for functions and variables. Some suggestions:



              algo -> peak
              problem -> matrix
              nrow -> rows_count
              ncol -> columns_count
              mid -> middle_column_index
              loc -> location
              val -> value
              i -> index
              ...



            5. Remove redundant parentheses around tuples. For example:



              (x, y) = bestloc1 -> x, y = bestloc1
              (bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
              return (bestloc, bestval) -> return bestloc, bestval
              ...



            6. Use one-line assignments with a tuple. Instead of



              y = i[0][1]
              x = i[0][0]


              you should write



              x, y = i[0]



            7. Instead of indexing numpy arrays like this:



              matrix[x][y]


              use the following:



              matrix[x, y]


              They both produce the same result but the first one is more inefficient.




            8. Wrap the call of the main function along with matrix declaration in the following construct:



              if __name__ == '__main__':
              matrix = np.array([[3, 2, 3],
              [100, 55, 7],
              [344, 9, 37]])
              peak(matrix)


              For explanations see here.




            9. Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".

              As an example, this is how signature of your get_value will look like:



              def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:


              But it won't look good for some other functions in your case, like for bestneighbor. This is not fault of type hints but of the way how you defined i in your code before:



              def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
              nrow: int,
              ncol: int,
              matrix: np.ndarray) -> Tuple[int, int]:


              After some refactoring it can be significantly simplified.
              Don't forget that List, Tuple and Optional should be imported from typing module.




            Now, let's get to more interesting stuff.




            1. First thing that you are doing is checking if there are no elements in the matrix




              Nrow=len(problem)
              for i in problem:
              Ncol=len(i)
              break
              if Nrow <=0 or Ncol<=0:
              return None



              This can be done much easier:



              if matrix.size == 0:
              return None



            2. Getting number of rows and columns can be done by accessing shape attribute of numpy array:



              rows_count, columns_count = matrix.shape



            3. Here goes crazy staff:




              (substartR,subnrow)=(0,Nrow)
              (substartc1,subncol1)=(0,mid)
              (substartc2,subncol2)=(mid,Ncol-mid+1)
              subproblem=
              subproblem.append((substartR,substartc1,subnrow,subncol1))
              subproblem.append((substartR,substartc2,subnrow,subncol2))



              No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.

              Left part:



              matrix[:, :middle_column_index]


              Right part:



              matrix[:, middle_column_index + 1:]


              I suggest you to read carefully about indexing in numpy.




            4. Here you are calculating list of indices of a middle column:




              loc=
              for i in range(Nrow):
              loc.append((i,mid))



              But then later in findmax you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?

              For example, let's get that middle column first:



              middle_column_index = matrix.shape[1] // 2
              middle_column = matrix[:, middle_column_index]


              And then get index of a maximum value by np.argmax and its value:



              max_index = np.argmax(middle_column)
              max_value = middle_column[max_index]



            5. Next you are taking that maximum and compare it against its neighbors in bestneighbour. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:



              row = matrix[max_index]
              neighbours = row[max(0, middle_column_index - 1)
              : min(len(row), middle_column_index + 1) + 1]


              This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".




            6. Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by get_subproblem, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:



              if max_value == np.amax(neighbours):
              return max_value

              if neighbours[0] > neighbours[-1]:
              return peak(matrix[:, :middle_column_index])

              return peak(matrix[:, middle_column_index + 1:])


              where np.amax returns a maximum of array.




            In the end after all refactoring this is what I've got:



            from typing import Optional

            import numpy as np


            def peak(matrix: np.ndarray) -> Optional[int]:
            """
            Finds the peak element in the matrix recursively
            starting from the middle column.
            :param matrix: input array
            :return: peak value
            """
            if matrix.size == 0:
            return None

            middle_column_index = matrix.shape[1] // 2

            middle_column = matrix[:, middle_column_index]
            max_index = np.argmax(middle_column)
            max_value = middle_column[max_index]

            row = matrix[max_index]
            neighbours = row[max(0, middle_column_index - 1)
            : min(len(row), middle_column_index + 1) + 1]

            if max_value == np.amax(neighbours):
            return max_value

            if neighbours[0] > neighbours[-1]:
            return peak(matrix[:, :middle_column_index])

            return peak(matrix[:, middle_column_index + 1:])


            if __name__ == '__main__':
            matrix_example = np.array([[3, 2, 3],
            [100, 55, 7],
            [344, 9, 37]])
            print(peak(matrix_example))



            There is still some work left for you to do here:



            1. Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum, np.argmax should return all the indices. Will it break the program?

            2. Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".

            3. Change the docstring. I was lazy to come up with something good there :)






            share|improve this answer













            share|improve this answer



            share|improve this answer











            answered May 1 at 19:49









            Georgy

            5521415




            5521415







            • 1




              Thx mate,it really helps me a lot
              – sek fook tan
              May 6 at 10:15












            • 1




              Thx mate,it really helps me a lot
              – sek fook tan
              May 6 at 10:15







            1




            1




            Thx mate,it really helps me a lot
            – sek fook tan
            May 6 at 10:15




            Thx mate,it really helps me a lot
            – sek fook tan
            May 6 at 10:15












             

            draft saved


            draft discarded


























             


            draft saved


            draft discarded














            StackExchange.ready(
            function ()
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f193067%2fpeak-finding-algorithm%23new-answer', 'question_page');

            );

            Post as a guest













































































            Popular posts from this blog

            Chat program with C++ and SFML

            Function to Return a JSON Like Objects Using VBA Collections and Arrays

            Will my employers contract hold up in court?