Peak finding algorithm
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
6
down vote
favorite
This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.
How can I improve my coding to make it more efficient or concise?
import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])
def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)
loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))
bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))
if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])
def findmax(loc,matrix):
(bestloc,bestval)=(None,0)
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
def get_value(i,matrix):
y=i[1]
x=i[0]
return matrix[x][y]
def bestneighbour(i,Nrow,Ncol,matrix):
y=i[0][1]
x=i[0][0]
bestval=i[1]
inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]
if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]
if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]
return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1
algo(matrix)
python array numpy
add a comment |Â
up vote
6
down vote
favorite
This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.
How can I improve my coding to make it more efficient or concise?
import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])
def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)
loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))
bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))
if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])
def findmax(loc,matrix):
(bestloc,bestval)=(None,0)
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
def get_value(i,matrix):
y=i[1]
x=i[0]
return matrix[x][y]
def bestneighbour(i,Nrow,Ncol,matrix):
y=i[0][1]
x=i[0][0]
bestval=i[1]
inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]
if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]
if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]
return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1
algo(matrix)
python array numpy
Is your data sorted?
â Peilonrayz
Apr 27 at 11:14
@Peilonrayz it can be either way,sorted or not sorted
â sek fook tan
Apr 27 at 12:10
@Georgy Sry,what I meant is 2d
â sek fook tan
May 6 at 10:15
add a comment |Â
up vote
6
down vote
favorite
up vote
6
down vote
favorite
This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.
How can I improve my coding to make it more efficient or concise?
import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])
def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)
loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))
bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))
if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])
def findmax(loc,matrix):
(bestloc,bestval)=(None,0)
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
def get_value(i,matrix):
y=i[1]
x=i[0]
return matrix[x][y]
def bestneighbour(i,Nrow,Ncol,matrix):
y=i[0][1]
x=i[0][0]
bestval=i[1]
inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]
if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]
if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]
return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1
algo(matrix)
python array numpy
This is my peak finding algorithm. I try to split the 2D array in half and find the maximum element in the middle column. After that, the algorithm will check whether there are any other element bigger than it on the left or the right side. If there is any, the matrix will slice in half depending on the position of the element and go into recursion. You can simply change the size or the elements of the matrix.
How can I improve my coding to make it more efficient or concise?
import numpy as np
matrix=np.array([[3,2,3],
[100,55,7],
[344,9,37]
])
def algo(problem):
print('matrix =' + str(problem))
Nrow=len(problem)
print("row"+str(Nrow))
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return None
print("col"+str(Ncol))
mid=Ncol//2
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
print(subproblem)
loc=
for i in range(Nrow):
loc.append((i,mid))
# bestloc is a tuple of location and biggest val
bestloc=findmax(loc,problem)
print("bestloc" + str(bestloc[0]))
bestloc1=bestneighbour(bestloc,Nrow,Ncol,problem)
print("bestloc1 =" + str(bestloc1))
if bestloc[0] != bestloc1:
print('sub problem')
#print(problem)
new_matrix=get_subproblem(subproblem,bestloc1,problem)
return algo(new_matrix)
else:
(x,y)=bestloc1
#print("the peak " +str( problem[x][y]))
print( problem[x][y])
def findmax(loc,matrix):
(bestloc,bestval)=(None,0)
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
#print(get_value(i))
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
def get_value(i,matrix):
y=i[1]
x=i[0]
return matrix[x][y]
def bestneighbour(i,Nrow,Ncol,matrix):
y=i[0][1]
x=i[0][0]
bestval=i[1]
inew=(x,y)
if x-1>=0 and matrix[x-1][y]>bestval:
inew=(x-1,y)
bestval=matrix[x-1][y]
if x+1<Nrow and matrix[x+1][y]>bestval:
inew=(x+1,y)
bestval=matrix[x+1][y]
if y-1>=0 and matrix[x][y-1]>bestval:
inew=(x,y-1)
bestval=matrix[x][y-1]
if y+1<Ncol and matrix[x][y+1]>bestval:
inew=(x,y+1)
bestval=matrix[x][y+1]
return inew
def get_subproblem(sub,bestloc,matrix1):
#global matrix
(row,col)=bestloc
#print(row)
#print(col)
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R<=row and row<start_R+num_of_row:
if start_C<=col and col<start_C+num_of_col:
matrix1=matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
return matrix1
algo(matrix)
python array numpy
edited May 6 at 12:53
Georgy
5521415
5521415
asked Apr 27 at 10:58
sek fook tan
313
313
Is your data sorted?
â Peilonrayz
Apr 27 at 11:14
@Peilonrayz it can be either way,sorted or not sorted
â sek fook tan
Apr 27 at 12:10
@Georgy Sry,what I meant is 2d
â sek fook tan
May 6 at 10:15
add a comment |Â
Is your data sorted?
â Peilonrayz
Apr 27 at 11:14
@Peilonrayz it can be either way,sorted or not sorted
â sek fook tan
Apr 27 at 12:10
@Georgy Sry,what I meant is 2d
â sek fook tan
May 6 at 10:15
Is your data sorted?
â Peilonrayz
Apr 27 at 11:14
Is your data sorted?
â Peilonrayz
Apr 27 at 11:14
@Peilonrayz it can be either way,sorted or not sorted
â sek fook tan
Apr 27 at 12:10
@Peilonrayz it can be either way,sorted or not sorted
â sek fook tan
Apr 27 at 12:10
@Georgy Sry,what I meant is 2d
â sek fook tan
May 6 at 10:15
@Georgy Sry,what I meant is 2d
â sek fook tan
May 6 at 10:15
add a comment |Â
2 Answers
2
active
oldest
votes
up vote
6
down vote
1. algo
I can see 3 areas of local improvements here:
for i in problem:
Ncol=len(i)
break
doing a loop to just set Ncol
for the first element is really ugly. Since your list isn't empty just do:
Ncol = len(problem[0])
Now why 3 lines for this:
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
when you can create your list directly:
subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]
and last thing, learn about list comprehensions for things like this:
loc=
for i in range(Nrow):
loc.append((i,mid))
that can be rewritten as
loc = [(i,mid) for i in range(Nrow)]
2. findmax
/ getvalue
As mentionned in comments, the indentation of return
statement is off, but since the code works, I'm assuming that it's just a typo. Here:
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
get_value
is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value- set
bestloc
to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test forNone
in the whole loop. For this we have to handle the "loc
is empty" cornercase at start.
So rewrite function like:
def findmax(loc,matrix):
if not loc:
# get rid of the "loc is empty" case
return (None,0)
bestloc,bestval = loc[0]
for i in loc:
x,y = i # unpack from i, no indice access
mvalue = matrix[x][y]
if bestval < mvalue:
bestloc = i
bestval = mvalue
return (bestloc,bestval)
only one access to the matrix, no unnecessary test for None
, no function calls, faster index access.
get_subproblem
- Use chaining comparisons since
row
andcol
are tested twice (now you can group both conditions) - don't slice then return, return the slice
like this:
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
if you don't care about the print you could
unpack directly in loop:
for start_R,start_C,num_of_row,num_of_col in sub:
add a comment |Â
up vote
3
down vote
When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.
Now let's discuss the style first.
Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:
- Functions should be surrounded by two blank lines.
- Operators should be surrounded with whitespaces. Add whitespaces after commas as well.
- Both variable and function names should be lowercase, with words separated by underscores.
Comparison withNone
should be always done byis
oris not
, and never by equality operators.Continuation lines should be aligned vertically. This goes to your
matrix
declaration. It can be, for example, like this:matrix = np.array([[3, 2, 3],
[100,55,7],
[344,9,37]])
Apart from that I would also recommend you:
- Remove all commented code.
- Be consistent with blank lines. In
algo
function your code is pretty dense and not very readable, while infindmax
there are just too many of them. Don't use 2 blank lines inside functions. Use chained comparison. For example, your
if start_R<=row and row<start_R+num_of_row:
should become
if start_r <= row < start_r + num_of_row:
Use self-descriptive names for functions and variables. Some suggestions:
algo -> peak
problem -> matrix
nrow -> rows_count
ncol -> columns_count
mid -> middle_column_index
loc -> location
val -> value
i -> index
...Remove redundant parentheses around tuples. For example:
(x, y) = bestloc1 -> x, y = bestloc1
(bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
return (bestloc, bestval) -> return bestloc, bestval
...Use one-line assignments with a tuple. Instead of
y = i[0][1]
x = i[0][0]you should write
x, y = i[0]
Instead of indexing numpy arrays like this:
matrix[x][y]
use the following:
matrix[x, y]
They both produce the same result but the first one is more inefficient.
Wrap the call of the main function along with
matrix
declaration in the following construct:if __name__ == '__main__':
matrix = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
peak(matrix)For explanations see here.
Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".
As an example, this is how signature of yourget_value
will look like:def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:
But it won't look good for some other functions in your case, like for
bestneighbor
. This is not fault of type hints but of the way how you definedi
in your code before:def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
nrow: int,
ncol: int,
matrix: np.ndarray) -> Tuple[int, int]:After some refactoring it can be significantly simplified.
Don't forget thatList
,Tuple
andOptional
should be imported fromtyping
module.
Now, let's get to more interesting stuff.
First thing that you are doing is checking if there are no elements in the matrix
Nrow=len(problem)
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return NoneThis can be done much easier:
if matrix.size == 0:
return NoneGetting number of rows and columns can be done by accessing
shape
attribute of numpy array:rows_count, columns_count = matrix.shape
Here goes crazy staff:
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.
Left part:matrix[:, :middle_column_index]
Right part:
matrix[:, middle_column_index + 1:]
I suggest you to read carefully about indexing in numpy.
Here you are calculating list of indices of a middle column:
loc=
for i in range(Nrow):
loc.append((i,mid))But then later in
findmax
you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?
For example, let's get that middle column first:middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]And then get index of a maximum value by
np.argmax
and its value:max_index = np.argmax(middle_column)
max_value = middle_column[max_index]Next you are taking that maximum and compare it against its neighbors in
bestneighbour
. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".
Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by
get_subproblem
, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])where
np.amax
returns a maximum of array.
In the end after all refactoring this is what I've got:
from typing import Optional
import numpy as np
def peak(matrix: np.ndarray) -> Optional[int]:
"""
Finds the peak element in the matrix recursively
starting from the middle column.
:param matrix: input array
:return: peak value
"""
if matrix.size == 0:
return None
middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]
max_index = np.argmax(middle_column)
max_value = middle_column[max_index]
row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]
if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])
if __name__ == '__main__':
matrix_example = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
print(peak(matrix_example))
There is still some work left for you to do here:
- Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum,
np.argmax
should return all the indices. Will it break the program? - Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".
- Change the docstring. I was lazy to come up with something good there :)
1
Thx mate,it really helps me a lot
â sek fook tan
May 6 at 10:15
add a comment |Â
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
6
down vote
1. algo
I can see 3 areas of local improvements here:
for i in problem:
Ncol=len(i)
break
doing a loop to just set Ncol
for the first element is really ugly. Since your list isn't empty just do:
Ncol = len(problem[0])
Now why 3 lines for this:
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
when you can create your list directly:
subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]
and last thing, learn about list comprehensions for things like this:
loc=
for i in range(Nrow):
loc.append((i,mid))
that can be rewritten as
loc = [(i,mid) for i in range(Nrow)]
2. findmax
/ getvalue
As mentionned in comments, the indentation of return
statement is off, but since the code works, I'm assuming that it's just a typo. Here:
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
get_value
is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value- set
bestloc
to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test forNone
in the whole loop. For this we have to handle the "loc
is empty" cornercase at start.
So rewrite function like:
def findmax(loc,matrix):
if not loc:
# get rid of the "loc is empty" case
return (None,0)
bestloc,bestval = loc[0]
for i in loc:
x,y = i # unpack from i, no indice access
mvalue = matrix[x][y]
if bestval < mvalue:
bestloc = i
bestval = mvalue
return (bestloc,bestval)
only one access to the matrix, no unnecessary test for None
, no function calls, faster index access.
get_subproblem
- Use chaining comparisons since
row
andcol
are tested twice (now you can group both conditions) - don't slice then return, return the slice
like this:
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
if you don't care about the print you could
unpack directly in loop:
for start_R,start_C,num_of_row,num_of_col in sub:
add a comment |Â
up vote
6
down vote
1. algo
I can see 3 areas of local improvements here:
for i in problem:
Ncol=len(i)
break
doing a loop to just set Ncol
for the first element is really ugly. Since your list isn't empty just do:
Ncol = len(problem[0])
Now why 3 lines for this:
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
when you can create your list directly:
subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]
and last thing, learn about list comprehensions for things like this:
loc=
for i in range(Nrow):
loc.append((i,mid))
that can be rewritten as
loc = [(i,mid) for i in range(Nrow)]
2. findmax
/ getvalue
As mentionned in comments, the indentation of return
statement is off, but since the code works, I'm assuming that it's just a typo. Here:
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
get_value
is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value- set
bestloc
to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test forNone
in the whole loop. For this we have to handle the "loc
is empty" cornercase at start.
So rewrite function like:
def findmax(loc,matrix):
if not loc:
# get rid of the "loc is empty" case
return (None,0)
bestloc,bestval = loc[0]
for i in loc:
x,y = i # unpack from i, no indice access
mvalue = matrix[x][y]
if bestval < mvalue:
bestloc = i
bestval = mvalue
return (bestloc,bestval)
only one access to the matrix, no unnecessary test for None
, no function calls, faster index access.
get_subproblem
- Use chaining comparisons since
row
andcol
are tested twice (now you can group both conditions) - don't slice then return, return the slice
like this:
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
if you don't care about the print you could
unpack directly in loop:
for start_R,start_C,num_of_row,num_of_col in sub:
add a comment |Â
up vote
6
down vote
up vote
6
down vote
1. algo
I can see 3 areas of local improvements here:
for i in problem:
Ncol=len(i)
break
doing a loop to just set Ncol
for the first element is really ugly. Since your list isn't empty just do:
Ncol = len(problem[0])
Now why 3 lines for this:
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
when you can create your list directly:
subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]
and last thing, learn about list comprehensions for things like this:
loc=
for i in range(Nrow):
loc.append((i,mid))
that can be rewritten as
loc = [(i,mid) for i in range(Nrow)]
2. findmax
/ getvalue
As mentionned in comments, the indentation of return
statement is off, but since the code works, I'm assuming that it's just a typo. Here:
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
get_value
is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value- set
bestloc
to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test forNone
in the whole loop. For this we have to handle the "loc
is empty" cornercase at start.
So rewrite function like:
def findmax(loc,matrix):
if not loc:
# get rid of the "loc is empty" case
return (None,0)
bestloc,bestval = loc[0]
for i in loc:
x,y = i # unpack from i, no indice access
mvalue = matrix[x][y]
if bestval < mvalue:
bestloc = i
bestval = mvalue
return (bestloc,bestval)
only one access to the matrix, no unnecessary test for None
, no function calls, faster index access.
get_subproblem
- Use chaining comparisons since
row
andcol
are tested twice (now you can group both conditions) - don't slice then return, return the slice
like this:
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
if you don't care about the print you could
unpack directly in loop:
for start_R,start_C,num_of_row,num_of_col in sub:
1. algo
I can see 3 areas of local improvements here:
for i in problem:
Ncol=len(i)
break
doing a loop to just set Ncol
for the first element is really ugly. Since your list isn't empty just do:
Ncol = len(problem[0])
Now why 3 lines for this:
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))
when you can create your list directly:
subproblem= [(substartR,substartc1,subnrow,subncol1),(substartR,substartc2,subnrow,subncol2)]
and last thing, learn about list comprehensions for things like this:
loc=
for i in range(Nrow):
loc.append((i,mid))
that can be rewritten as
loc = [(i,mid) for i in range(Nrow)]
2. findmax
/ getvalue
As mentionned in comments, the indentation of return
statement is off, but since the code works, I'm assuming that it's just a typo. Here:
for i in loc:
if bestloc == None or bestval < get_value(i,matrix) :
bestloc=i
bestval= get_value(i,matrix)
return (bestloc,bestval)
get_value
is a function created just for this method. It introduces a lot of overhead & doesn't improve readability either. Scrap it, "inline" & cache the value- set
bestloc
to the first value (and value is okay, but first one is the most logical choice) before the loop starts, so you don't have to test forNone
in the whole loop. For this we have to handle the "loc
is empty" cornercase at start.
So rewrite function like:
def findmax(loc,matrix):
if not loc:
# get rid of the "loc is empty" case
return (None,0)
bestloc,bestval = loc[0]
for i in loc:
x,y = i # unpack from i, no indice access
mvalue = matrix[x][y]
if bestval < mvalue:
bestloc = i
bestval = mvalue
return (bestloc,bestval)
only one access to the matrix, no unnecessary test for None
, no function calls, faster index access.
get_subproblem
- Use chaining comparisons since
row
andcol
are tested twice (now you can group both conditions) - don't slice then return, return the slice
like this:
for i in sub:
print(i)
(start_R,start_C,num_of_row,num_of_col)=i
if start_R <= row <start_R+num_of_row and start_C <= col <start_C+num_of_col:
return matrix1[start_R:num_of_row,start_C:num_of_col+start_C]
if you don't care about the print you could
unpack directly in loop:
for start_R,start_C,num_of_row,num_of_col in sub:
edited Apr 28 at 10:45
answered Apr 27 at 20:21
Jean-François Fabre
783210
783210
add a comment |Â
add a comment |Â
up vote
3
down vote
When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.
Now let's discuss the style first.
Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:
- Functions should be surrounded by two blank lines.
- Operators should be surrounded with whitespaces. Add whitespaces after commas as well.
- Both variable and function names should be lowercase, with words separated by underscores.
Comparison withNone
should be always done byis
oris not
, and never by equality operators.Continuation lines should be aligned vertically. This goes to your
matrix
declaration. It can be, for example, like this:matrix = np.array([[3, 2, 3],
[100,55,7],
[344,9,37]])
Apart from that I would also recommend you:
- Remove all commented code.
- Be consistent with blank lines. In
algo
function your code is pretty dense and not very readable, while infindmax
there are just too many of them. Don't use 2 blank lines inside functions. Use chained comparison. For example, your
if start_R<=row and row<start_R+num_of_row:
should become
if start_r <= row < start_r + num_of_row:
Use self-descriptive names for functions and variables. Some suggestions:
algo -> peak
problem -> matrix
nrow -> rows_count
ncol -> columns_count
mid -> middle_column_index
loc -> location
val -> value
i -> index
...Remove redundant parentheses around tuples. For example:
(x, y) = bestloc1 -> x, y = bestloc1
(bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
return (bestloc, bestval) -> return bestloc, bestval
...Use one-line assignments with a tuple. Instead of
y = i[0][1]
x = i[0][0]you should write
x, y = i[0]
Instead of indexing numpy arrays like this:
matrix[x][y]
use the following:
matrix[x, y]
They both produce the same result but the first one is more inefficient.
Wrap the call of the main function along with
matrix
declaration in the following construct:if __name__ == '__main__':
matrix = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
peak(matrix)For explanations see here.
Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".
As an example, this is how signature of yourget_value
will look like:def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:
But it won't look good for some other functions in your case, like for
bestneighbor
. This is not fault of type hints but of the way how you definedi
in your code before:def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
nrow: int,
ncol: int,
matrix: np.ndarray) -> Tuple[int, int]:After some refactoring it can be significantly simplified.
Don't forget thatList
,Tuple
andOptional
should be imported fromtyping
module.
Now, let's get to more interesting stuff.
First thing that you are doing is checking if there are no elements in the matrix
Nrow=len(problem)
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return NoneThis can be done much easier:
if matrix.size == 0:
return NoneGetting number of rows and columns can be done by accessing
shape
attribute of numpy array:rows_count, columns_count = matrix.shape
Here goes crazy staff:
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.
Left part:matrix[:, :middle_column_index]
Right part:
matrix[:, middle_column_index + 1:]
I suggest you to read carefully about indexing in numpy.
Here you are calculating list of indices of a middle column:
loc=
for i in range(Nrow):
loc.append((i,mid))But then later in
findmax
you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?
For example, let's get that middle column first:middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]And then get index of a maximum value by
np.argmax
and its value:max_index = np.argmax(middle_column)
max_value = middle_column[max_index]Next you are taking that maximum and compare it against its neighbors in
bestneighbour
. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".
Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by
get_subproblem
, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])where
np.amax
returns a maximum of array.
In the end after all refactoring this is what I've got:
from typing import Optional
import numpy as np
def peak(matrix: np.ndarray) -> Optional[int]:
"""
Finds the peak element in the matrix recursively
starting from the middle column.
:param matrix: input array
:return: peak value
"""
if matrix.size == 0:
return None
middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]
max_index = np.argmax(middle_column)
max_value = middle_column[max_index]
row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]
if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])
if __name__ == '__main__':
matrix_example = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
print(peak(matrix_example))
There is still some work left for you to do here:
- Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum,
np.argmax
should return all the indices. Will it break the program? - Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".
- Change the docstring. I was lazy to come up with something good there :)
1
Thx mate,it really helps me a lot
â sek fook tan
May 6 at 10:15
add a comment |Â
up vote
3
down vote
When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.
Now let's discuss the style first.
Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:
- Functions should be surrounded by two blank lines.
- Operators should be surrounded with whitespaces. Add whitespaces after commas as well.
- Both variable and function names should be lowercase, with words separated by underscores.
Comparison withNone
should be always done byis
oris not
, and never by equality operators.Continuation lines should be aligned vertically. This goes to your
matrix
declaration. It can be, for example, like this:matrix = np.array([[3, 2, 3],
[100,55,7],
[344,9,37]])
Apart from that I would also recommend you:
- Remove all commented code.
- Be consistent with blank lines. In
algo
function your code is pretty dense and not very readable, while infindmax
there are just too many of them. Don't use 2 blank lines inside functions. Use chained comparison. For example, your
if start_R<=row and row<start_R+num_of_row:
should become
if start_r <= row < start_r + num_of_row:
Use self-descriptive names for functions and variables. Some suggestions:
algo -> peak
problem -> matrix
nrow -> rows_count
ncol -> columns_count
mid -> middle_column_index
loc -> location
val -> value
i -> index
...Remove redundant parentheses around tuples. For example:
(x, y) = bestloc1 -> x, y = bestloc1
(bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
return (bestloc, bestval) -> return bestloc, bestval
...Use one-line assignments with a tuple. Instead of
y = i[0][1]
x = i[0][0]you should write
x, y = i[0]
Instead of indexing numpy arrays like this:
matrix[x][y]
use the following:
matrix[x, y]
They both produce the same result but the first one is more inefficient.
Wrap the call of the main function along with
matrix
declaration in the following construct:if __name__ == '__main__':
matrix = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
peak(matrix)For explanations see here.
Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".
As an example, this is how signature of yourget_value
will look like:def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:
But it won't look good for some other functions in your case, like for
bestneighbor
. This is not fault of type hints but of the way how you definedi
in your code before:def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
nrow: int,
ncol: int,
matrix: np.ndarray) -> Tuple[int, int]:After some refactoring it can be significantly simplified.
Don't forget thatList
,Tuple
andOptional
should be imported fromtyping
module.
Now, let's get to more interesting stuff.
First thing that you are doing is checking if there are no elements in the matrix
Nrow=len(problem)
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return NoneThis can be done much easier:
if matrix.size == 0:
return NoneGetting number of rows and columns can be done by accessing
shape
attribute of numpy array:rows_count, columns_count = matrix.shape
Here goes crazy staff:
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.
Left part:matrix[:, :middle_column_index]
Right part:
matrix[:, middle_column_index + 1:]
I suggest you to read carefully about indexing in numpy.
Here you are calculating list of indices of a middle column:
loc=
for i in range(Nrow):
loc.append((i,mid))But then later in
findmax
you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?
For example, let's get that middle column first:middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]And then get index of a maximum value by
np.argmax
and its value:max_index = np.argmax(middle_column)
max_value = middle_column[max_index]Next you are taking that maximum and compare it against its neighbors in
bestneighbour
. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".
Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by
get_subproblem
, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])where
np.amax
returns a maximum of array.
In the end after all refactoring this is what I've got:
from typing import Optional
import numpy as np
def peak(matrix: np.ndarray) -> Optional[int]:
"""
Finds the peak element in the matrix recursively
starting from the middle column.
:param matrix: input array
:return: peak value
"""
if matrix.size == 0:
return None
middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]
max_index = np.argmax(middle_column)
max_value = middle_column[max_index]
row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]
if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])
if __name__ == '__main__':
matrix_example = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
print(peak(matrix_example))
There is still some work left for you to do here:
- Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum,
np.argmax
should return all the indices. Will it break the program? - Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".
- Change the docstring. I was lazy to come up with something good there :)
1
Thx mate,it really helps me a lot
â sek fook tan
May 6 at 10:15
add a comment |Â
up vote
3
down vote
up vote
3
down vote
When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.
Now let's discuss the style first.
Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:
- Functions should be surrounded by two blank lines.
- Operators should be surrounded with whitespaces. Add whitespaces after commas as well.
- Both variable and function names should be lowercase, with words separated by underscores.
Comparison withNone
should be always done byis
oris not
, and never by equality operators.Continuation lines should be aligned vertically. This goes to your
matrix
declaration. It can be, for example, like this:matrix = np.array([[3, 2, 3],
[100,55,7],
[344,9,37]])
Apart from that I would also recommend you:
- Remove all commented code.
- Be consistent with blank lines. In
algo
function your code is pretty dense and not very readable, while infindmax
there are just too many of them. Don't use 2 blank lines inside functions. Use chained comparison. For example, your
if start_R<=row and row<start_R+num_of_row:
should become
if start_r <= row < start_r + num_of_row:
Use self-descriptive names for functions and variables. Some suggestions:
algo -> peak
problem -> matrix
nrow -> rows_count
ncol -> columns_count
mid -> middle_column_index
loc -> location
val -> value
i -> index
...Remove redundant parentheses around tuples. For example:
(x, y) = bestloc1 -> x, y = bestloc1
(bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
return (bestloc, bestval) -> return bestloc, bestval
...Use one-line assignments with a tuple. Instead of
y = i[0][1]
x = i[0][0]you should write
x, y = i[0]
Instead of indexing numpy arrays like this:
matrix[x][y]
use the following:
matrix[x, y]
They both produce the same result but the first one is more inefficient.
Wrap the call of the main function along with
matrix
declaration in the following construct:if __name__ == '__main__':
matrix = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
peak(matrix)For explanations see here.
Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".
As an example, this is how signature of yourget_value
will look like:def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:
But it won't look good for some other functions in your case, like for
bestneighbor
. This is not fault of type hints but of the way how you definedi
in your code before:def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
nrow: int,
ncol: int,
matrix: np.ndarray) -> Tuple[int, int]:After some refactoring it can be significantly simplified.
Don't forget thatList
,Tuple
andOptional
should be imported fromtyping
module.
Now, let's get to more interesting stuff.
First thing that you are doing is checking if there are no elements in the matrix
Nrow=len(problem)
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return NoneThis can be done much easier:
if matrix.size == 0:
return NoneGetting number of rows and columns can be done by accessing
shape
attribute of numpy array:rows_count, columns_count = matrix.shape
Here goes crazy staff:
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.
Left part:matrix[:, :middle_column_index]
Right part:
matrix[:, middle_column_index + 1:]
I suggest you to read carefully about indexing in numpy.
Here you are calculating list of indices of a middle column:
loc=
for i in range(Nrow):
loc.append((i,mid))But then later in
findmax
you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?
For example, let's get that middle column first:middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]And then get index of a maximum value by
np.argmax
and its value:max_index = np.argmax(middle_column)
max_value = middle_column[max_index]Next you are taking that maximum and compare it against its neighbors in
bestneighbour
. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".
Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by
get_subproblem
, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])where
np.amax
returns a maximum of array.
In the end after all refactoring this is what I've got:
from typing import Optional
import numpy as np
def peak(matrix: np.ndarray) -> Optional[int]:
"""
Finds the peak element in the matrix recursively
starting from the middle column.
:param matrix: input array
:return: peak value
"""
if matrix.size == 0:
return None
middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]
max_index = np.argmax(middle_column)
max_value = middle_column[max_index]
row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]
if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])
if __name__ == '__main__':
matrix_example = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
print(peak(matrix_example))
There is still some work left for you to do here:
- Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum,
np.argmax
should return all the indices. Will it break the program? - Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".
- Change the docstring. I was lazy to come up with something good there :)
When I ran your function it printed me a bunch of staff that I didn't understand. Looks like you left a lot of printing statements for debugging purposes. It's better to remove them and return only final result. It would be nice if you added docstring as well, so someone could read it to understand what the purpose of your function is.
Now let's discuss the style first.
Python has its style guide called PEP 8. Try to stick to it. Some points that you violated:
- Functions should be surrounded by two blank lines.
- Operators should be surrounded with whitespaces. Add whitespaces after commas as well.
- Both variable and function names should be lowercase, with words separated by underscores.
Comparison withNone
should be always done byis
oris not
, and never by equality operators.Continuation lines should be aligned vertically. This goes to your
matrix
declaration. It can be, for example, like this:matrix = np.array([[3, 2, 3],
[100,55,7],
[344,9,37]])
Apart from that I would also recommend you:
- Remove all commented code.
- Be consistent with blank lines. In
algo
function your code is pretty dense and not very readable, while infindmax
there are just too many of them. Don't use 2 blank lines inside functions. Use chained comparison. For example, your
if start_R<=row and row<start_R+num_of_row:
should become
if start_r <= row < start_r + num_of_row:
Use self-descriptive names for functions and variables. Some suggestions:
algo -> peak
problem -> matrix
nrow -> rows_count
ncol -> columns_count
mid -> middle_column_index
loc -> location
val -> value
i -> index
...Remove redundant parentheses around tuples. For example:
(x, y) = bestloc1 -> x, y = bestloc1
(bestloc, bestval) = (None, 0) -> bestloc, bestval = None, 0
return (bestloc, bestval) -> return bestloc, bestval
...Use one-line assignments with a tuple. Instead of
y = i[0][1]
x = i[0][0]you should write
x, y = i[0]
Instead of indexing numpy arrays like this:
matrix[x][y]
use the following:
matrix[x, y]
They both produce the same result but the first one is more inefficient.
Wrap the call of the main function along with
matrix
declaration in the following construct:if __name__ == '__main__':
matrix = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
peak(matrix)For explanations see here.
Use type hints. This is not mandatory. But by using it you will help yourself and other people to understand what type of data your functions accept as input and what they return. After all "explicit is better than implicit".
As an example, this is how signature of yourget_value
will look like:def get_value(i: Tuple[int, int], matrix: np.ndarray) -> int:
But it won't look good for some other functions in your case, like for
bestneighbor
. This is not fault of type hints but of the way how you definedi
in your code before:def bestneighbour(i: Tuple[Optional[Tuple[int, int]], int],
nrow: int,
ncol: int,
matrix: np.ndarray) -> Tuple[int, int]:After some refactoring it can be significantly simplified.
Don't forget thatList
,Tuple
andOptional
should be imported fromtyping
module.
Now, let's get to more interesting stuff.
First thing that you are doing is checking if there are no elements in the matrix
Nrow=len(problem)
for i in problem:
Ncol=len(i)
break
if Nrow <=0 or Ncol<=0:
return NoneThis can be done much easier:
if matrix.size == 0:
return NoneGetting number of rows and columns can be done by accessing
shape
attribute of numpy array:rows_count, columns_count = matrix.shape
Here goes crazy staff:
(substartR,subnrow)=(0,Nrow)
(substartc1,subncol1)=(0,mid)
(substartc2,subncol2)=(mid,Ncol-mid+1)
subproblem=
subproblem.append((substartR,substartc1,subnrow,subncol1))
subproblem.append((substartR,substartc2,subnrow,subncol2))No need to do these sophisticated manipulations with indices in order to obtain later left and right parts of a matrix. In fact it is much much easier than how you did it.
Left part:matrix[:, :middle_column_index]
Right part:
matrix[:, middle_column_index + 1:]
I suggest you to read carefully about indexing in numpy.
Here you are calculating list of indices of a middle column:
loc=
for i in range(Nrow):
loc.append((i,mid))But then later in
findmax
you use these indices in order to obtain values of that column and get its maximum value with corresponding index. My question is why not operate on the column values in the first place and avoid dealing with indices?
For example, let's get that middle column first:middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]And then get index of a maximum value by
np.argmax
and its value:max_index = np.argmax(middle_column)
max_value = middle_column[max_index]Next you are taking that maximum and compare it against its neighbors in
bestneighbour
. Here I see a problem with first two if's in that function. We already know that this value is maximum in this column, in this case we don't actually need to compare it again with an element one row up and an element one row down. We just have to compare it with neighbor elements on the same row. This is a bit more tricky as we want to take into account boundaries of the row. But still this can be implemented in a much simpler way:row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]This was based on this answer for a question of "how to get the neighboring elements in a numpy array with taking boundaries into account?".
Finally you check if the maximum of this neighbor elements is from the middle column, and if it is, you print the value. If it's not, you get a part of the matrix that has a maximum by
get_subproblem
, and call the function again. Taking into account everything that I mentioned before, I suggest the following implementation:if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])where
np.amax
returns a maximum of array.
In the end after all refactoring this is what I've got:
from typing import Optional
import numpy as np
def peak(matrix: np.ndarray) -> Optional[int]:
"""
Finds the peak element in the matrix recursively
starting from the middle column.
:param matrix: input array
:return: peak value
"""
if matrix.size == 0:
return None
middle_column_index = matrix.shape[1] // 2
middle_column = matrix[:, middle_column_index]
max_index = np.argmax(middle_column)
max_value = middle_column[max_index]
row = matrix[max_index]
neighbours = row[max(0, middle_column_index - 1)
: min(len(row), middle_column_index + 1) + 1]
if max_value == np.amax(neighbours):
return max_value
if neighbours[0] > neighbours[-1]:
return peak(matrix[:, :middle_column_index])
return peak(matrix[:, middle_column_index + 1:])
if __name__ == '__main__':
matrix_example = np.array([[3, 2, 3],
[100, 55, 7],
[344, 9, 37]])
print(peak(matrix_example))
There is still some work left for you to do here:
- Check different test cases: different dimensions, different values, edge cases, bad inputs. Some things may not work. For example, if there are 2 or more of the same values in a column which are maximum,
np.argmax
should return all the indices. Will it break the program? - Try replacing recursion with a while-loop or even a for-loop. Compare both versions in terms of readability and maintainability. I ask this because I think that if one wants to debug a program with recursion then it becomes quite an uneasy task, and after all "flat is better than nested".
- Change the docstring. I was lazy to come up with something good there :)
answered May 1 at 19:49
Georgy
5521415
5521415
1
Thx mate,it really helps me a lot
â sek fook tan
May 6 at 10:15
add a comment |Â
1
Thx mate,it really helps me a lot
â sek fook tan
May 6 at 10:15
1
1
Thx mate,it really helps me a lot
â sek fook tan
May 6 at 10:15
Thx mate,it really helps me a lot
â sek fook tan
May 6 at 10:15
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f193067%2fpeak-finding-algorithm%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Is your data sorted?
â Peilonrayz
Apr 27 at 11:14
@Peilonrayz it can be either way,sorted or not sorted
â sek fook tan
Apr 27 at 12:10
@Georgy Sry,what I meant is 2d
â sek fook tan
May 6 at 10:15