Image template matching algorithm from scratch
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
10
down vote
favorite
I wanted to write a template matching algorithm from scratch.
The key area's I'm interesting in are readability, maintainability, DRY, and if the program performance/efficiency could be improved in any way.
Image scanning class:
import sys
from itertools import izip
class ScanImage:
def __init__(self, image, template, process='sad'):
self.image = image
self.temp = template
self.pro = process #sad or ssd
self.x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
self.y_axis_hgt = len(image) - len(template) # how far to scan on y axis
self.best_location = list() # will hold the coordinates of the final location
self.score = sys.maxint # similarity strength score (the less the better)
self.temp_matrix_holder = list() # holds a matrix of the same size as the template
self.calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
def _empty_list(self):
del self.temp_matrix_holder[:]
def run(self):
return self._scan_image(self.image, self.temp, self.pro)
def _scan_image(self, image, template, process):
"""main function: will iterate over the image one pixel at a time"""
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = self.calculate.run(
self.temp_matrix_holder, self.temp, process)
if new_score <= self.score:
self.score = new_score
self.best_location = [j, i]
self._empty_list()
return self.score, self.best_location
Score calculation class:
class ImageCalculation:
def __init__(self):
self.score = 0.0
def _reset_score(self):
self.score = 0.0
def _minus(self, atuple):
return abs(atuple[0]-atuple[1])
def _sad(self, atuple):
"""sum of absolute difference"""
# this is not nessesary but keeps the code a little neater
# and also shows the explicit difference between sad and ssd
return self._minus(atuple)
def _ssd(self, atuple):
"""sum of squared difference"""
diff = self._minus(atuple)
return diff * diff
def _block_template_comparison(self, image_block, template, process):
if (len(image_block) != len(template)) or
(len(image_block[0]) != len(template[0])):
return 'Error'
for i in xrange(len(image_block)):
for j in xrange(len(image_block[i])):
one_pixel_score = 0.0
for k in izip(image_block[i][j], template[i][j]):
if process == 'sad':
one_pixel_score += self._sad(k)
elif process == 'ssd':
one_pixel_score += self._ssd(k)
self.score += (one_pixel_score/3.0) # average out all 3 (RGB) channels for each pixle
return self.score
def run(self, image_block, template, process='sad'):
if process == 'ssd':
process = 'ssd'
elif process == 'sad':
process = 'sad'
else:
raise ValueError, 'Proccess must be either sad or ssd'
if not self.score == 0.0:
self._reset_score()
return self._block_template_comparison(image_block, template, process)
Outputs:
if __name__ == '__main__':
p_i = [
[[0], [1], [2], [10]],
[[3], [4], [5], [11]],
[[6], [7], [8], [12]],
]
t_i = [
[[4], [5]],
[[7], [12]],
]
s = ScanImage(p_i, t_i)
print s.run()
# ssd outputs : 5.333 (sength score), [1,1] (x, y coords)
# sad outputs : 1.3333, [1, 1]
Template matching is taking a small 'image' and scanning it over a larger image one pixel at a time until a match is found.
During each iteration of the scan the template and a block of the image, which is the same size as the template, are compared. They are given a score. The closer the score (in this specific scoring metric) to 0 the better. A perfect match gets a score of 0.
Special note:
This program is designed to work with pixels with 3 channels each (RGB), this is just a dummy example but it also works for fully built out 'images'.
python algorithm image
add a comment |Â
up vote
10
down vote
favorite
I wanted to write a template matching algorithm from scratch.
The key area's I'm interesting in are readability, maintainability, DRY, and if the program performance/efficiency could be improved in any way.
Image scanning class:
import sys
from itertools import izip
class ScanImage:
def __init__(self, image, template, process='sad'):
self.image = image
self.temp = template
self.pro = process #sad or ssd
self.x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
self.y_axis_hgt = len(image) - len(template) # how far to scan on y axis
self.best_location = list() # will hold the coordinates of the final location
self.score = sys.maxint # similarity strength score (the less the better)
self.temp_matrix_holder = list() # holds a matrix of the same size as the template
self.calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
def _empty_list(self):
del self.temp_matrix_holder[:]
def run(self):
return self._scan_image(self.image, self.temp, self.pro)
def _scan_image(self, image, template, process):
"""main function: will iterate over the image one pixel at a time"""
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = self.calculate.run(
self.temp_matrix_holder, self.temp, process)
if new_score <= self.score:
self.score = new_score
self.best_location = [j, i]
self._empty_list()
return self.score, self.best_location
Score calculation class:
class ImageCalculation:
def __init__(self):
self.score = 0.0
def _reset_score(self):
self.score = 0.0
def _minus(self, atuple):
return abs(atuple[0]-atuple[1])
def _sad(self, atuple):
"""sum of absolute difference"""
# this is not nessesary but keeps the code a little neater
# and also shows the explicit difference between sad and ssd
return self._minus(atuple)
def _ssd(self, atuple):
"""sum of squared difference"""
diff = self._minus(atuple)
return diff * diff
def _block_template_comparison(self, image_block, template, process):
if (len(image_block) != len(template)) or
(len(image_block[0]) != len(template[0])):
return 'Error'
for i in xrange(len(image_block)):
for j in xrange(len(image_block[i])):
one_pixel_score = 0.0
for k in izip(image_block[i][j], template[i][j]):
if process == 'sad':
one_pixel_score += self._sad(k)
elif process == 'ssd':
one_pixel_score += self._ssd(k)
self.score += (one_pixel_score/3.0) # average out all 3 (RGB) channels for each pixle
return self.score
def run(self, image_block, template, process='sad'):
if process == 'ssd':
process = 'ssd'
elif process == 'sad':
process = 'sad'
else:
raise ValueError, 'Proccess must be either sad or ssd'
if not self.score == 0.0:
self._reset_score()
return self._block_template_comparison(image_block, template, process)
Outputs:
if __name__ == '__main__':
p_i = [
[[0], [1], [2], [10]],
[[3], [4], [5], [11]],
[[6], [7], [8], [12]],
]
t_i = [
[[4], [5]],
[[7], [12]],
]
s = ScanImage(p_i, t_i)
print s.run()
# ssd outputs : 5.333 (sength score), [1,1] (x, y coords)
# sad outputs : 1.3333, [1, 1]
Template matching is taking a small 'image' and scanning it over a larger image one pixel at a time until a match is found.
During each iteration of the scan the template and a block of the image, which is the same size as the template, are compared. They are given a score. The closer the score (in this specific scoring metric) to 0 the better. A perfect match gets a score of 0.
Special note:
This program is designed to work with pixels with 3 channels each (RGB), this is just a dummy example but it also works for fully built out 'images'.
python algorithm image
Can you edit your question and describe in a few words what template matching is and how it works?
â Mathias Ettinger
Jun 8 at 12:57
Is Python2 required, or are you open to using Python3?
â scnerd
Jun 8 at 14:27
@scnerd python 2.7.9
â arm93
Jun 8 at 14:34
add a comment |Â
up vote
10
down vote
favorite
up vote
10
down vote
favorite
I wanted to write a template matching algorithm from scratch.
The key area's I'm interesting in are readability, maintainability, DRY, and if the program performance/efficiency could be improved in any way.
Image scanning class:
import sys
from itertools import izip
class ScanImage:
def __init__(self, image, template, process='sad'):
self.image = image
self.temp = template
self.pro = process #sad or ssd
self.x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
self.y_axis_hgt = len(image) - len(template) # how far to scan on y axis
self.best_location = list() # will hold the coordinates of the final location
self.score = sys.maxint # similarity strength score (the less the better)
self.temp_matrix_holder = list() # holds a matrix of the same size as the template
self.calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
def _empty_list(self):
del self.temp_matrix_holder[:]
def run(self):
return self._scan_image(self.image, self.temp, self.pro)
def _scan_image(self, image, template, process):
"""main function: will iterate over the image one pixel at a time"""
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = self.calculate.run(
self.temp_matrix_holder, self.temp, process)
if new_score <= self.score:
self.score = new_score
self.best_location = [j, i]
self._empty_list()
return self.score, self.best_location
Score calculation class:
class ImageCalculation:
def __init__(self):
self.score = 0.0
def _reset_score(self):
self.score = 0.0
def _minus(self, atuple):
return abs(atuple[0]-atuple[1])
def _sad(self, atuple):
"""sum of absolute difference"""
# this is not nessesary but keeps the code a little neater
# and also shows the explicit difference between sad and ssd
return self._minus(atuple)
def _ssd(self, atuple):
"""sum of squared difference"""
diff = self._minus(atuple)
return diff * diff
def _block_template_comparison(self, image_block, template, process):
if (len(image_block) != len(template)) or
(len(image_block[0]) != len(template[0])):
return 'Error'
for i in xrange(len(image_block)):
for j in xrange(len(image_block[i])):
one_pixel_score = 0.0
for k in izip(image_block[i][j], template[i][j]):
if process == 'sad':
one_pixel_score += self._sad(k)
elif process == 'ssd':
one_pixel_score += self._ssd(k)
self.score += (one_pixel_score/3.0) # average out all 3 (RGB) channels for each pixle
return self.score
def run(self, image_block, template, process='sad'):
if process == 'ssd':
process = 'ssd'
elif process == 'sad':
process = 'sad'
else:
raise ValueError, 'Proccess must be either sad or ssd'
if not self.score == 0.0:
self._reset_score()
return self._block_template_comparison(image_block, template, process)
Outputs:
if __name__ == '__main__':
p_i = [
[[0], [1], [2], [10]],
[[3], [4], [5], [11]],
[[6], [7], [8], [12]],
]
t_i = [
[[4], [5]],
[[7], [12]],
]
s = ScanImage(p_i, t_i)
print s.run()
# ssd outputs : 5.333 (sength score), [1,1] (x, y coords)
# sad outputs : 1.3333, [1, 1]
Template matching is taking a small 'image' and scanning it over a larger image one pixel at a time until a match is found.
During each iteration of the scan the template and a block of the image, which is the same size as the template, are compared. They are given a score. The closer the score (in this specific scoring metric) to 0 the better. A perfect match gets a score of 0.
Special note:
This program is designed to work with pixels with 3 channels each (RGB), this is just a dummy example but it also works for fully built out 'images'.
python algorithm image
I wanted to write a template matching algorithm from scratch.
The key area's I'm interesting in are readability, maintainability, DRY, and if the program performance/efficiency could be improved in any way.
Image scanning class:
import sys
from itertools import izip
class ScanImage:
def __init__(self, image, template, process='sad'):
self.image = image
self.temp = template
self.pro = process #sad or ssd
self.x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
self.y_axis_hgt = len(image) - len(template) # how far to scan on y axis
self.best_location = list() # will hold the coordinates of the final location
self.score = sys.maxint # similarity strength score (the less the better)
self.temp_matrix_holder = list() # holds a matrix of the same size as the template
self.calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
def _empty_list(self):
del self.temp_matrix_holder[:]
def run(self):
return self._scan_image(self.image, self.temp, self.pro)
def _scan_image(self, image, template, process):
"""main function: will iterate over the image one pixel at a time"""
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = self.calculate.run(
self.temp_matrix_holder, self.temp, process)
if new_score <= self.score:
self.score = new_score
self.best_location = [j, i]
self._empty_list()
return self.score, self.best_location
Score calculation class:
class ImageCalculation:
def __init__(self):
self.score = 0.0
def _reset_score(self):
self.score = 0.0
def _minus(self, atuple):
return abs(atuple[0]-atuple[1])
def _sad(self, atuple):
"""sum of absolute difference"""
# this is not nessesary but keeps the code a little neater
# and also shows the explicit difference between sad and ssd
return self._minus(atuple)
def _ssd(self, atuple):
"""sum of squared difference"""
diff = self._minus(atuple)
return diff * diff
def _block_template_comparison(self, image_block, template, process):
if (len(image_block) != len(template)) or
(len(image_block[0]) != len(template[0])):
return 'Error'
for i in xrange(len(image_block)):
for j in xrange(len(image_block[i])):
one_pixel_score = 0.0
for k in izip(image_block[i][j], template[i][j]):
if process == 'sad':
one_pixel_score += self._sad(k)
elif process == 'ssd':
one_pixel_score += self._ssd(k)
self.score += (one_pixel_score/3.0) # average out all 3 (RGB) channels for each pixle
return self.score
def run(self, image_block, template, process='sad'):
if process == 'ssd':
process = 'ssd'
elif process == 'sad':
process = 'sad'
else:
raise ValueError, 'Proccess must be either sad or ssd'
if not self.score == 0.0:
self._reset_score()
return self._block_template_comparison(image_block, template, process)
Outputs:
if __name__ == '__main__':
p_i = [
[[0], [1], [2], [10]],
[[3], [4], [5], [11]],
[[6], [7], [8], [12]],
]
t_i = [
[[4], [5]],
[[7], [12]],
]
s = ScanImage(p_i, t_i)
print s.run()
# ssd outputs : 5.333 (sength score), [1,1] (x, y coords)
# sad outputs : 1.3333, [1, 1]
Template matching is taking a small 'image' and scanning it over a larger image one pixel at a time until a match is found.
During each iteration of the scan the template and a block of the image, which is the same size as the template, are compared. They are given a score. The closer the score (in this specific scoring metric) to 0 the better. A perfect match gets a score of 0.
Special note:
This program is designed to work with pixels with 3 channels each (RGB), this is just a dummy example but it also works for fully built out 'images'.
python algorithm image
edited Jun 9 at 22:31
Jamalâ¦
30.1k11114225
30.1k11114225
asked Jun 7 at 21:17
arm93
956
956
Can you edit your question and describe in a few words what template matching is and how it works?
â Mathias Ettinger
Jun 8 at 12:57
Is Python2 required, or are you open to using Python3?
â scnerd
Jun 8 at 14:27
@scnerd python 2.7.9
â arm93
Jun 8 at 14:34
add a comment |Â
Can you edit your question and describe in a few words what template matching is and how it works?
â Mathias Ettinger
Jun 8 at 12:57
Is Python2 required, or are you open to using Python3?
â scnerd
Jun 8 at 14:27
@scnerd python 2.7.9
â arm93
Jun 8 at 14:34
Can you edit your question and describe in a few words what template matching is and how it works?
â Mathias Ettinger
Jun 8 at 12:57
Can you edit your question and describe in a few words what template matching is and how it works?
â Mathias Ettinger
Jun 8 at 12:57
Is Python2 required, or are you open to using Python3?
â scnerd
Jun 8 at 14:27
Is Python2 required, or are you open to using Python3?
â scnerd
Jun 8 at 14:27
@scnerd python 2.7.9
â arm93
Jun 8 at 14:34
@scnerd python 2.7.9
â arm93
Jun 8 at 14:34
add a comment |Â
1 Answer
1
active
oldest
votes
up vote
4
down vote
accepted
I am not very experienced in Python, so this review might miss some Python-specific issues. But I am very experienced with this type of algorithm, and so will primarily discuss the algorithmic and computational aspects of the code.
Image storage
If I understand correctly, the image here is stored as a list of lists. This is not an efficient storage model, because each image line needs its own allocation, and is potentially stored in disparate memory locations. It is always better to store an image as a single memory block, so that all pixels are consecutive in memory. This leads to fewer cache misses. There is also one fewer level of indirection (looking up the pointer to the image line (list) in the outer list, then looking up the pixel in the inner list).
And looking through the code again, it might even be the case that the image is stored as a list of lists of lists, where each pixel is a list containing the RGB triplet. This is even worse, as each pixel is potentially in a different location in memory.
Using a NumPy array to store the image should lead to a significant improvement in speed just because of the data locality and fewer levels of indirection.
Also, extracting the image block can be done in a single statement. The resulting NumPy array references the data in the original array, meaning that no data copy is necessary.
Additional advantages to using NumPy are the possibility to compute things such as done in ImageCalculation._block_template_comparison
with a single statement rather than three nested loops. This is both faster (the looping happens in compiled code, which is much faster than loops in Python), and easier to maintain (less code).
Generalization
These two lines imply that the template always has exactly two image lines:
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
It would be a good idea to add another loop here to copy an image block of arbitrary size, to be able to match templates of arbitrary size.
Loop limits
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
You are looping over all pixels, but not doing anything for the pixels at the right and bottom edges. Why not directly loop only over those pixels that you need to address?
for i in xrange(self.y_axis_hgt + 1):
for j in xrange(self.x_axis_leng + 1):
Square difference
The function ImageCalculation._ssd
computes the square of the absolute difference, as self._minus
calls abs
. That call to abs
is redundant (unless you have complex-valued images).
Short-cut
If you compute a score of 0, you can stop the algorithm, as it won't find a better match.
Method selection
In ImageCalculation._block_template_comparison
, you test for which of the two process
values is given within the double loop. This is not at all efficient, you should do this test outside. In fact, for every block being compared, the process
value is tested for validity as well, which needs to be done only once.
Instead, the ImageCalculation
class could take the process
value in its constructor, and store a pointer to the method it should use in computation. This way, no further string comparisons are needed during computation.
Readability and maintainability
You specifically ask for these. I think the code is readable, you use clear variable names and add comments where necessary.
However (and this is probably more about me than your code), I think that the classes here make the code more convoluted than necessary. I've never been a fan of the classes that just have a constructor and a run
method. I don't see the advantage over a normal function, and only see unnecessary complication, both in usage:
s = ScanImage(p_i, t_i)
print s.run()
vs
print scan_image(p_i, t_i)
and in the code itself. Your first block of code would be identical, but with lots of lines deleted:
def scan_image(image, template, process='sad'):
x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
y_axis_hgt = len(image) - len(template) # how far to scan on y axis
best_location = list() # will hold the coordinates of the final location
score = sys.maxint # similarity strength score (the less the better)
calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= x_axis_leng) and
(i <= y_axis_hgt):
temp_matrix_holder = list()
temp_matrix_holder.append(
image[i][j:j+len(template[0])])
temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = calculate.run(
temp_matrix_holder, template, process)
if new_score <= score:
score = new_score
best_location = [j, i]
return score, best_location
Because there are fewer lines of code, it's more readable and more maintainable. It's also easier to follow the data throughout the code, since there are no variables being shared across different methods.
Thank you very much. This is the exact sort of thing I was looking for. Your SSD point out was such a face palm moment :)
â arm93
Jun 11 at 15:38
add a comment |Â
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
4
down vote
accepted
I am not very experienced in Python, so this review might miss some Python-specific issues. But I am very experienced with this type of algorithm, and so will primarily discuss the algorithmic and computational aspects of the code.
Image storage
If I understand correctly, the image here is stored as a list of lists. This is not an efficient storage model, because each image line needs its own allocation, and is potentially stored in disparate memory locations. It is always better to store an image as a single memory block, so that all pixels are consecutive in memory. This leads to fewer cache misses. There is also one fewer level of indirection (looking up the pointer to the image line (list) in the outer list, then looking up the pixel in the inner list).
And looking through the code again, it might even be the case that the image is stored as a list of lists of lists, where each pixel is a list containing the RGB triplet. This is even worse, as each pixel is potentially in a different location in memory.
Using a NumPy array to store the image should lead to a significant improvement in speed just because of the data locality and fewer levels of indirection.
Also, extracting the image block can be done in a single statement. The resulting NumPy array references the data in the original array, meaning that no data copy is necessary.
Additional advantages to using NumPy are the possibility to compute things such as done in ImageCalculation._block_template_comparison
with a single statement rather than three nested loops. This is both faster (the looping happens in compiled code, which is much faster than loops in Python), and easier to maintain (less code).
Generalization
These two lines imply that the template always has exactly two image lines:
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
It would be a good idea to add another loop here to copy an image block of arbitrary size, to be able to match templates of arbitrary size.
Loop limits
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
You are looping over all pixels, but not doing anything for the pixels at the right and bottom edges. Why not directly loop only over those pixels that you need to address?
for i in xrange(self.y_axis_hgt + 1):
for j in xrange(self.x_axis_leng + 1):
Square difference
The function ImageCalculation._ssd
computes the square of the absolute difference, as self._minus
calls abs
. That call to abs
is redundant (unless you have complex-valued images).
Short-cut
If you compute a score of 0, you can stop the algorithm, as it won't find a better match.
Method selection
In ImageCalculation._block_template_comparison
, you test for which of the two process
values is given within the double loop. This is not at all efficient, you should do this test outside. In fact, for every block being compared, the process
value is tested for validity as well, which needs to be done only once.
Instead, the ImageCalculation
class could take the process
value in its constructor, and store a pointer to the method it should use in computation. This way, no further string comparisons are needed during computation.
Readability and maintainability
You specifically ask for these. I think the code is readable, you use clear variable names and add comments where necessary.
However (and this is probably more about me than your code), I think that the classes here make the code more convoluted than necessary. I've never been a fan of the classes that just have a constructor and a run
method. I don't see the advantage over a normal function, and only see unnecessary complication, both in usage:
s = ScanImage(p_i, t_i)
print s.run()
vs
print scan_image(p_i, t_i)
and in the code itself. Your first block of code would be identical, but with lots of lines deleted:
def scan_image(image, template, process='sad'):
x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
y_axis_hgt = len(image) - len(template) # how far to scan on y axis
best_location = list() # will hold the coordinates of the final location
score = sys.maxint # similarity strength score (the less the better)
calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= x_axis_leng) and
(i <= y_axis_hgt):
temp_matrix_holder = list()
temp_matrix_holder.append(
image[i][j:j+len(template[0])])
temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = calculate.run(
temp_matrix_holder, template, process)
if new_score <= score:
score = new_score
best_location = [j, i]
return score, best_location
Because there are fewer lines of code, it's more readable and more maintainable. It's also easier to follow the data throughout the code, since there are no variables being shared across different methods.
Thank you very much. This is the exact sort of thing I was looking for. Your SSD point out was such a face palm moment :)
â arm93
Jun 11 at 15:38
add a comment |Â
up vote
4
down vote
accepted
I am not very experienced in Python, so this review might miss some Python-specific issues. But I am very experienced with this type of algorithm, and so will primarily discuss the algorithmic and computational aspects of the code.
Image storage
If I understand correctly, the image here is stored as a list of lists. This is not an efficient storage model, because each image line needs its own allocation, and is potentially stored in disparate memory locations. It is always better to store an image as a single memory block, so that all pixels are consecutive in memory. This leads to fewer cache misses. There is also one fewer level of indirection (looking up the pointer to the image line (list) in the outer list, then looking up the pixel in the inner list).
And looking through the code again, it might even be the case that the image is stored as a list of lists of lists, where each pixel is a list containing the RGB triplet. This is even worse, as each pixel is potentially in a different location in memory.
Using a NumPy array to store the image should lead to a significant improvement in speed just because of the data locality and fewer levels of indirection.
Also, extracting the image block can be done in a single statement. The resulting NumPy array references the data in the original array, meaning that no data copy is necessary.
Additional advantages to using NumPy are the possibility to compute things such as done in ImageCalculation._block_template_comparison
with a single statement rather than three nested loops. This is both faster (the looping happens in compiled code, which is much faster than loops in Python), and easier to maintain (less code).
Generalization
These two lines imply that the template always has exactly two image lines:
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
It would be a good idea to add another loop here to copy an image block of arbitrary size, to be able to match templates of arbitrary size.
Loop limits
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
You are looping over all pixels, but not doing anything for the pixels at the right and bottom edges. Why not directly loop only over those pixels that you need to address?
for i in xrange(self.y_axis_hgt + 1):
for j in xrange(self.x_axis_leng + 1):
Square difference
The function ImageCalculation._ssd
computes the square of the absolute difference, as self._minus
calls abs
. That call to abs
is redundant (unless you have complex-valued images).
Short-cut
If you compute a score of 0, you can stop the algorithm, as it won't find a better match.
Method selection
In ImageCalculation._block_template_comparison
, you test for which of the two process
values is given within the double loop. This is not at all efficient, you should do this test outside. In fact, for every block being compared, the process
value is tested for validity as well, which needs to be done only once.
Instead, the ImageCalculation
class could take the process
value in its constructor, and store a pointer to the method it should use in computation. This way, no further string comparisons are needed during computation.
Readability and maintainability
You specifically ask for these. I think the code is readable, you use clear variable names and add comments where necessary.
However (and this is probably more about me than your code), I think that the classes here make the code more convoluted than necessary. I've never been a fan of the classes that just have a constructor and a run
method. I don't see the advantage over a normal function, and only see unnecessary complication, both in usage:
s = ScanImage(p_i, t_i)
print s.run()
vs
print scan_image(p_i, t_i)
and in the code itself. Your first block of code would be identical, but with lots of lines deleted:
def scan_image(image, template, process='sad'):
x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
y_axis_hgt = len(image) - len(template) # how far to scan on y axis
best_location = list() # will hold the coordinates of the final location
score = sys.maxint # similarity strength score (the less the better)
calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= x_axis_leng) and
(i <= y_axis_hgt):
temp_matrix_holder = list()
temp_matrix_holder.append(
image[i][j:j+len(template[0])])
temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = calculate.run(
temp_matrix_holder, template, process)
if new_score <= score:
score = new_score
best_location = [j, i]
return score, best_location
Because there are fewer lines of code, it's more readable and more maintainable. It's also easier to follow the data throughout the code, since there are no variables being shared across different methods.
Thank you very much. This is the exact sort of thing I was looking for. Your SSD point out was such a face palm moment :)
â arm93
Jun 11 at 15:38
add a comment |Â
up vote
4
down vote
accepted
up vote
4
down vote
accepted
I am not very experienced in Python, so this review might miss some Python-specific issues. But I am very experienced with this type of algorithm, and so will primarily discuss the algorithmic and computational aspects of the code.
Image storage
If I understand correctly, the image here is stored as a list of lists. This is not an efficient storage model, because each image line needs its own allocation, and is potentially stored in disparate memory locations. It is always better to store an image as a single memory block, so that all pixels are consecutive in memory. This leads to fewer cache misses. There is also one fewer level of indirection (looking up the pointer to the image line (list) in the outer list, then looking up the pixel in the inner list).
And looking through the code again, it might even be the case that the image is stored as a list of lists of lists, where each pixel is a list containing the RGB triplet. This is even worse, as each pixel is potentially in a different location in memory.
Using a NumPy array to store the image should lead to a significant improvement in speed just because of the data locality and fewer levels of indirection.
Also, extracting the image block can be done in a single statement. The resulting NumPy array references the data in the original array, meaning that no data copy is necessary.
Additional advantages to using NumPy are the possibility to compute things such as done in ImageCalculation._block_template_comparison
with a single statement rather than three nested loops. This is both faster (the looping happens in compiled code, which is much faster than loops in Python), and easier to maintain (less code).
Generalization
These two lines imply that the template always has exactly two image lines:
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
It would be a good idea to add another loop here to copy an image block of arbitrary size, to be able to match templates of arbitrary size.
Loop limits
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
You are looping over all pixels, but not doing anything for the pixels at the right and bottom edges. Why not directly loop only over those pixels that you need to address?
for i in xrange(self.y_axis_hgt + 1):
for j in xrange(self.x_axis_leng + 1):
Square difference
The function ImageCalculation._ssd
computes the square of the absolute difference, as self._minus
calls abs
. That call to abs
is redundant (unless you have complex-valued images).
Short-cut
If you compute a score of 0, you can stop the algorithm, as it won't find a better match.
Method selection
In ImageCalculation._block_template_comparison
, you test for which of the two process
values is given within the double loop. This is not at all efficient, you should do this test outside. In fact, for every block being compared, the process
value is tested for validity as well, which needs to be done only once.
Instead, the ImageCalculation
class could take the process
value in its constructor, and store a pointer to the method it should use in computation. This way, no further string comparisons are needed during computation.
Readability and maintainability
You specifically ask for these. I think the code is readable, you use clear variable names and add comments where necessary.
However (and this is probably more about me than your code), I think that the classes here make the code more convoluted than necessary. I've never been a fan of the classes that just have a constructor and a run
method. I don't see the advantage over a normal function, and only see unnecessary complication, both in usage:
s = ScanImage(p_i, t_i)
print s.run()
vs
print scan_image(p_i, t_i)
and in the code itself. Your first block of code would be identical, but with lots of lines deleted:
def scan_image(image, template, process='sad'):
x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
y_axis_hgt = len(image) - len(template) # how far to scan on y axis
best_location = list() # will hold the coordinates of the final location
score = sys.maxint # similarity strength score (the less the better)
calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= x_axis_leng) and
(i <= y_axis_hgt):
temp_matrix_holder = list()
temp_matrix_holder.append(
image[i][j:j+len(template[0])])
temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = calculate.run(
temp_matrix_holder, template, process)
if new_score <= score:
score = new_score
best_location = [j, i]
return score, best_location
Because there are fewer lines of code, it's more readable and more maintainable. It's also easier to follow the data throughout the code, since there are no variables being shared across different methods.
I am not very experienced in Python, so this review might miss some Python-specific issues. But I am very experienced with this type of algorithm, and so will primarily discuss the algorithmic and computational aspects of the code.
Image storage
If I understand correctly, the image here is stored as a list of lists. This is not an efficient storage model, because each image line needs its own allocation, and is potentially stored in disparate memory locations. It is always better to store an image as a single memory block, so that all pixels are consecutive in memory. This leads to fewer cache misses. There is also one fewer level of indirection (looking up the pointer to the image line (list) in the outer list, then looking up the pixel in the inner list).
And looking through the code again, it might even be the case that the image is stored as a list of lists of lists, where each pixel is a list containing the RGB triplet. This is even worse, as each pixel is potentially in a different location in memory.
Using a NumPy array to store the image should lead to a significant improvement in speed just because of the data locality and fewer levels of indirection.
Also, extracting the image block can be done in a single statement. The resulting NumPy array references the data in the original array, meaning that no data copy is necessary.
Additional advantages to using NumPy are the possibility to compute things such as done in ImageCalculation._block_template_comparison
with a single statement rather than three nested loops. This is both faster (the looping happens in compiled code, which is much faster than loops in Python), and easier to maintain (less code).
Generalization
These two lines imply that the template always has exactly two image lines:
self.temp_matrix_holder.append(
image[i][j:j+len(template[0])])
self.temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
It would be a good idea to add another loop here to copy an image block of arbitrary size, to be able to match templates of arbitrary size.
Loop limits
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= self.x_axis_leng) and
(i <= self.y_axis_hgt):
You are looping over all pixels, but not doing anything for the pixels at the right and bottom edges. Why not directly loop only over those pixels that you need to address?
for i in xrange(self.y_axis_hgt + 1):
for j in xrange(self.x_axis_leng + 1):
Square difference
The function ImageCalculation._ssd
computes the square of the absolute difference, as self._minus
calls abs
. That call to abs
is redundant (unless you have complex-valued images).
Short-cut
If you compute a score of 0, you can stop the algorithm, as it won't find a better match.
Method selection
In ImageCalculation._block_template_comparison
, you test for which of the two process
values is given within the double loop. This is not at all efficient, you should do this test outside. In fact, for every block being compared, the process
value is tested for validity as well, which needs to be done only once.
Instead, the ImageCalculation
class could take the process
value in its constructor, and store a pointer to the method it should use in computation. This way, no further string comparisons are needed during computation.
Readability and maintainability
You specifically ask for these. I think the code is readable, you use clear variable names and add comments where necessary.
However (and this is probably more about me than your code), I think that the classes here make the code more convoluted than necessary. I've never been a fan of the classes that just have a constructor and a run
method. I don't see the advantage over a normal function, and only see unnecessary complication, both in usage:
s = ScanImage(p_i, t_i)
print s.run()
vs
print scan_image(p_i, t_i)
and in the code itself. Your first block of code would be identical, but with lots of lines deleted:
def scan_image(image, template, process='sad'):
x_axis_leng = len(image[0]) - len(template[0]) # how far to scan on x axis of image
y_axis_hgt = len(image) - len(template) # how far to scan on y axis
best_location = list() # will hold the coordinates of the final location
score = sys.maxint # similarity strength score (the less the better)
calculate = ImageCalculation() # class that is used to calculate strength score using ssd or sad
for i in xrange(len(image)):
for j in xrange(len(image[i])):
if (j <= x_axis_leng) and
(i <= y_axis_hgt):
temp_matrix_holder = list()
temp_matrix_holder.append(
image[i][j:j+len(template[0])])
temp_matrix_holder.append(
image[i + 1][j:j+len(template[0])])
new_score = calculate.run(
temp_matrix_holder, template, process)
if new_score <= score:
score = new_score
best_location = [j, i]
return score, best_location
Because there are fewer lines of code, it's more readable and more maintainable. It's also easier to follow the data throughout the code, since there are no variables being shared across different methods.
answered Jun 11 at 4:30
Cris Luengo
1,877215
1,877215
Thank you very much. This is the exact sort of thing I was looking for. Your SSD point out was such a face palm moment :)
â arm93
Jun 11 at 15:38
add a comment |Â
Thank you very much. This is the exact sort of thing I was looking for. Your SSD point out was such a face palm moment :)
â arm93
Jun 11 at 15:38
Thank you very much. This is the exact sort of thing I was looking for. Your SSD point out was such a face palm moment :)
â arm93
Jun 11 at 15:38
Thank you very much. This is the exact sort of thing I was looking for. Your SSD point out was such a face palm moment :)
â arm93
Jun 11 at 15:38
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f196069%2fimage-template-matching-algorithm-from-scratch%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Can you edit your question and describe in a few words what template matching is and how it works?
â Mathias Ettinger
Jun 8 at 12:57
Is Python2 required, or are you open to using Python3?
â scnerd
Jun 8 at 14:27
@scnerd python 2.7.9
â arm93
Jun 8 at 14:34