Python: wild card pattern matching with memoization
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
5
down vote
favorite
Here is my take on wild card pattern matching with memoization.
I would appreciate comments on clarity of the code, as well as suggested ways to improve readability and maintainability (for bigger projects).
Also, comments on how to change the algorithm to a better one (do it bottom to top instead for example) will also be very welcome.
Last thing I'm looking for are more test cases that will lead to greater code coverage, especially of the edge cases/boundary conditions. Thanks
The commented sections are for collecting and printing out information that I used while debugging. The code is correct and those sections are no longer needed.
class wild_card_pattern_matching:
def __init__(self, p, s):
self.m = [[None for _ in range(len(p) + 1)] for _ in range(len(s) + 1)]
self.p = p
self.s = s
self.c = 0
def match(self):
def f(p,s,p_idx,s_idx):
self.c += 1
# end condition 1
if p_idx == s_idx == 0:
self.m[s_idx][p_idx] = True
# end condition 2
elif s_idx == 0:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) and p[p_idx-1] == '*'
# memo table init
elif p_idx == 0:
self.m[s_idx][p_idx] = False
# use memo if possible
elif self.m[s_idx][p_idx] != None:
return self.m[s_idx][p_idx]
# choose between ignoring * and "accepting" it
elif p[p_idx-1] == '*':
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) or f(p,s,p_idx,s_idx-1)
# matching characters
elif p[p_idx-1] == '?' or p[p_idx-1] == s[s_idx-1]:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx-1)
# pattern != string
else:
self.m[s_idx][p_idx] = False
return self.m[s_idx][p_idx]
f(self.p,self.s,len(self.p), len(self.s))
return self.m[-1][-1]
obj_ls = [wild_card_pattern_matching('*', '')]
obj_ls += [wild_card_pattern_matching('*', 'a')]
obj_ls += [wild_card_pattern_matching('*', 'b')]
obj_ls += [wild_card_pattern_matching('a*', 'ab')]
obj_ls += [wild_card_pattern_matching('*b', 'a')]
obj_ls += [wild_card_pattern_matching('a*a', 'aba')]
obj_ls += [wild_card_pattern_matching('*?*?', 'aa')]
obj_ls += [wild_card_pattern_matching('c*d?*', 'cda')]
obj_ls += [wild_card_pattern_matching('?*?*?*', 'xx0')]
obj_ls += [wild_card_pattern_matching('a', 'a')]
obj_ls += [wild_card_pattern_matching('', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'aa')]
obj_ls += [wild_card_pattern_matching('******?*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a**', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaab')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaa')]
print ' string pattern result counter'
print '='*55
for w in obj_ls:
print '%15s%15s%10r%10d' % (w.s,w.p,w.match(),w.c)
# for l in w.m:
# print l
python python-2.7 regex memoization
add a comment |Â
up vote
5
down vote
favorite
Here is my take on wild card pattern matching with memoization.
I would appreciate comments on clarity of the code, as well as suggested ways to improve readability and maintainability (for bigger projects).
Also, comments on how to change the algorithm to a better one (do it bottom to top instead for example) will also be very welcome.
Last thing I'm looking for are more test cases that will lead to greater code coverage, especially of the edge cases/boundary conditions. Thanks
The commented sections are for collecting and printing out information that I used while debugging. The code is correct and those sections are no longer needed.
class wild_card_pattern_matching:
def __init__(self, p, s):
self.m = [[None for _ in range(len(p) + 1)] for _ in range(len(s) + 1)]
self.p = p
self.s = s
self.c = 0
def match(self):
def f(p,s,p_idx,s_idx):
self.c += 1
# end condition 1
if p_idx == s_idx == 0:
self.m[s_idx][p_idx] = True
# end condition 2
elif s_idx == 0:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) and p[p_idx-1] == '*'
# memo table init
elif p_idx == 0:
self.m[s_idx][p_idx] = False
# use memo if possible
elif self.m[s_idx][p_idx] != None:
return self.m[s_idx][p_idx]
# choose between ignoring * and "accepting" it
elif p[p_idx-1] == '*':
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) or f(p,s,p_idx,s_idx-1)
# matching characters
elif p[p_idx-1] == '?' or p[p_idx-1] == s[s_idx-1]:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx-1)
# pattern != string
else:
self.m[s_idx][p_idx] = False
return self.m[s_idx][p_idx]
f(self.p,self.s,len(self.p), len(self.s))
return self.m[-1][-1]
obj_ls = [wild_card_pattern_matching('*', '')]
obj_ls += [wild_card_pattern_matching('*', 'a')]
obj_ls += [wild_card_pattern_matching('*', 'b')]
obj_ls += [wild_card_pattern_matching('a*', 'ab')]
obj_ls += [wild_card_pattern_matching('*b', 'a')]
obj_ls += [wild_card_pattern_matching('a*a', 'aba')]
obj_ls += [wild_card_pattern_matching('*?*?', 'aa')]
obj_ls += [wild_card_pattern_matching('c*d?*', 'cda')]
obj_ls += [wild_card_pattern_matching('?*?*?*', 'xx0')]
obj_ls += [wild_card_pattern_matching('a', 'a')]
obj_ls += [wild_card_pattern_matching('', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'aa')]
obj_ls += [wild_card_pattern_matching('******?*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a**', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaab')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaa')]
print ' string pattern result counter'
print '='*55
for w in obj_ls:
print '%15s%15s%10r%10d' % (w.s,w.p,w.match(),w.c)
# for l in w.m:
# print l
python python-2.7 regex memoization
add a comment |Â
up vote
5
down vote
favorite
up vote
5
down vote
favorite
Here is my take on wild card pattern matching with memoization.
I would appreciate comments on clarity of the code, as well as suggested ways to improve readability and maintainability (for bigger projects).
Also, comments on how to change the algorithm to a better one (do it bottom to top instead for example) will also be very welcome.
Last thing I'm looking for are more test cases that will lead to greater code coverage, especially of the edge cases/boundary conditions. Thanks
The commented sections are for collecting and printing out information that I used while debugging. The code is correct and those sections are no longer needed.
class wild_card_pattern_matching:
def __init__(self, p, s):
self.m = [[None for _ in range(len(p) + 1)] for _ in range(len(s) + 1)]
self.p = p
self.s = s
self.c = 0
def match(self):
def f(p,s,p_idx,s_idx):
self.c += 1
# end condition 1
if p_idx == s_idx == 0:
self.m[s_idx][p_idx] = True
# end condition 2
elif s_idx == 0:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) and p[p_idx-1] == '*'
# memo table init
elif p_idx == 0:
self.m[s_idx][p_idx] = False
# use memo if possible
elif self.m[s_idx][p_idx] != None:
return self.m[s_idx][p_idx]
# choose between ignoring * and "accepting" it
elif p[p_idx-1] == '*':
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) or f(p,s,p_idx,s_idx-1)
# matching characters
elif p[p_idx-1] == '?' or p[p_idx-1] == s[s_idx-1]:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx-1)
# pattern != string
else:
self.m[s_idx][p_idx] = False
return self.m[s_idx][p_idx]
f(self.p,self.s,len(self.p), len(self.s))
return self.m[-1][-1]
obj_ls = [wild_card_pattern_matching('*', '')]
obj_ls += [wild_card_pattern_matching('*', 'a')]
obj_ls += [wild_card_pattern_matching('*', 'b')]
obj_ls += [wild_card_pattern_matching('a*', 'ab')]
obj_ls += [wild_card_pattern_matching('*b', 'a')]
obj_ls += [wild_card_pattern_matching('a*a', 'aba')]
obj_ls += [wild_card_pattern_matching('*?*?', 'aa')]
obj_ls += [wild_card_pattern_matching('c*d?*', 'cda')]
obj_ls += [wild_card_pattern_matching('?*?*?*', 'xx0')]
obj_ls += [wild_card_pattern_matching('a', 'a')]
obj_ls += [wild_card_pattern_matching('', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'aa')]
obj_ls += [wild_card_pattern_matching('******?*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a**', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaab')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaa')]
print ' string pattern result counter'
print '='*55
for w in obj_ls:
print '%15s%15s%10r%10d' % (w.s,w.p,w.match(),w.c)
# for l in w.m:
# print l
python python-2.7 regex memoization
Here is my take on wild card pattern matching with memoization.
I would appreciate comments on clarity of the code, as well as suggested ways to improve readability and maintainability (for bigger projects).
Also, comments on how to change the algorithm to a better one (do it bottom to top instead for example) will also be very welcome.
Last thing I'm looking for are more test cases that will lead to greater code coverage, especially of the edge cases/boundary conditions. Thanks
The commented sections are for collecting and printing out information that I used while debugging. The code is correct and those sections are no longer needed.
class wild_card_pattern_matching:
def __init__(self, p, s):
self.m = [[None for _ in range(len(p) + 1)] for _ in range(len(s) + 1)]
self.p = p
self.s = s
self.c = 0
def match(self):
def f(p,s,p_idx,s_idx):
self.c += 1
# end condition 1
if p_idx == s_idx == 0:
self.m[s_idx][p_idx] = True
# end condition 2
elif s_idx == 0:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) and p[p_idx-1] == '*'
# memo table init
elif p_idx == 0:
self.m[s_idx][p_idx] = False
# use memo if possible
elif self.m[s_idx][p_idx] != None:
return self.m[s_idx][p_idx]
# choose between ignoring * and "accepting" it
elif p[p_idx-1] == '*':
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx) or f(p,s,p_idx,s_idx-1)
# matching characters
elif p[p_idx-1] == '?' or p[p_idx-1] == s[s_idx-1]:
self.m[s_idx][p_idx] = f(p,s,p_idx-1,s_idx-1)
# pattern != string
else:
self.m[s_idx][p_idx] = False
return self.m[s_idx][p_idx]
f(self.p,self.s,len(self.p), len(self.s))
return self.m[-1][-1]
obj_ls = [wild_card_pattern_matching('*', '')]
obj_ls += [wild_card_pattern_matching('*', 'a')]
obj_ls += [wild_card_pattern_matching('*', 'b')]
obj_ls += [wild_card_pattern_matching('a*', 'ab')]
obj_ls += [wild_card_pattern_matching('*b', 'a')]
obj_ls += [wild_card_pattern_matching('a*a', 'aba')]
obj_ls += [wild_card_pattern_matching('*?*?', 'aa')]
obj_ls += [wild_card_pattern_matching('c*d?*', 'cda')]
obj_ls += [wild_card_pattern_matching('?*?*?*', 'xx0')]
obj_ls += [wild_card_pattern_matching('a', 'a')]
obj_ls += [wild_card_pattern_matching('', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a', 'aa')]
obj_ls += [wild_card_pattern_matching('******?*a', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a**', 'a')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaab')]
obj_ls += [wild_card_pattern_matching('******a*a*', 'ababaa')]
print ' string pattern result counter'
print '='*55
for w in obj_ls:
print '%15s%15s%10r%10d' % (w.s,w.p,w.match(),w.c)
# for l in w.m:
# print l
python python-2.7 regex memoization
edited Feb 6 at 20:16
Oscar Smith
2,625922
2,625922
asked Feb 6 at 16:36
CIsForCookies
22014
22014
add a comment |Â
add a comment |Â
1 Answer
1
active
oldest
votes
up vote
3
down vote
accepted
Take a look at styling and naming guide, namely; PEP-8. Class names should be CamelCase
d and method/attributes should be snake_case
. Your variables are named like s
, p
, c
etc. which does not make it clear at first glance what they represent, nor is there a docstring available for reference.
That aside, running your code gives me:
string pattern result counter
=======================================================
* True 2
a * True 4
b * True 4
ab a* True 5
a *b False 1
aba a*a True 6
aa *?*? True 5
cda c*d?* True 6
xx0 ?*?*?* True 7
a a True 2
a False 1
a ******a*a False 10
aa ******a*a True 10
a ******?*a False 10
a ******a*a** False 35
ababaab ******a*a* True 21
ababaa ******a*a* True 19
where I noticed that your counter reached $35$ for the pattern ******a*a**
. In this case (and a few more similar cases), you can simply return False
if your patter, after removing all *
characters is still longer than the given string. Because then your string would never be able to cover the pattern.
As right now, you only have *
and ?
characters; for any length of characters and exactly one character, the above method reduces your recursion by a lot in several given cases:
def match(self):
if len(self.s) < len(self.p.replace('*', '')):
return False
and the result:
string pattern result counter
=======================================================
[=======================SAME======================]
a False 1
a ******a*a False 0
aa ******a*a True 10
a ******?*a False 0
a ******a*a** False 0
ababaab ******a*a* True 21
ababaa ******a*a* True 19
The above snippet can also be placed inside your recursive f
call. I still have to figure out under which section will it make more sense, but basically for any certain match fixed from pattern onto your string, if the remaining pattern satisfies the above length restriction, you can simply skip the rest of iterations.
For tests, make use of the python's own unittest
module to create mocks. Have a set containing patterns and strings, and iteratively run the assertions on expected values.
add a comment |Â
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
3
down vote
accepted
Take a look at styling and naming guide, namely; PEP-8. Class names should be CamelCase
d and method/attributes should be snake_case
. Your variables are named like s
, p
, c
etc. which does not make it clear at first glance what they represent, nor is there a docstring available for reference.
That aside, running your code gives me:
string pattern result counter
=======================================================
* True 2
a * True 4
b * True 4
ab a* True 5
a *b False 1
aba a*a True 6
aa *?*? True 5
cda c*d?* True 6
xx0 ?*?*?* True 7
a a True 2
a False 1
a ******a*a False 10
aa ******a*a True 10
a ******?*a False 10
a ******a*a** False 35
ababaab ******a*a* True 21
ababaa ******a*a* True 19
where I noticed that your counter reached $35$ for the pattern ******a*a**
. In this case (and a few more similar cases), you can simply return False
if your patter, after removing all *
characters is still longer than the given string. Because then your string would never be able to cover the pattern.
As right now, you only have *
and ?
characters; for any length of characters and exactly one character, the above method reduces your recursion by a lot in several given cases:
def match(self):
if len(self.s) < len(self.p.replace('*', '')):
return False
and the result:
string pattern result counter
=======================================================
[=======================SAME======================]
a False 1
a ******a*a False 0
aa ******a*a True 10
a ******?*a False 0
a ******a*a** False 0
ababaab ******a*a* True 21
ababaa ******a*a* True 19
The above snippet can also be placed inside your recursive f
call. I still have to figure out under which section will it make more sense, but basically for any certain match fixed from pattern onto your string, if the remaining pattern satisfies the above length restriction, you can simply skip the rest of iterations.
For tests, make use of the python's own unittest
module to create mocks. Have a set containing patterns and strings, and iteratively run the assertions on expected values.
add a comment |Â
up vote
3
down vote
accepted
Take a look at styling and naming guide, namely; PEP-8. Class names should be CamelCase
d and method/attributes should be snake_case
. Your variables are named like s
, p
, c
etc. which does not make it clear at first glance what they represent, nor is there a docstring available for reference.
That aside, running your code gives me:
string pattern result counter
=======================================================
* True 2
a * True 4
b * True 4
ab a* True 5
a *b False 1
aba a*a True 6
aa *?*? True 5
cda c*d?* True 6
xx0 ?*?*?* True 7
a a True 2
a False 1
a ******a*a False 10
aa ******a*a True 10
a ******?*a False 10
a ******a*a** False 35
ababaab ******a*a* True 21
ababaa ******a*a* True 19
where I noticed that your counter reached $35$ for the pattern ******a*a**
. In this case (and a few more similar cases), you can simply return False
if your patter, after removing all *
characters is still longer than the given string. Because then your string would never be able to cover the pattern.
As right now, you only have *
and ?
characters; for any length of characters and exactly one character, the above method reduces your recursion by a lot in several given cases:
def match(self):
if len(self.s) < len(self.p.replace('*', '')):
return False
and the result:
string pattern result counter
=======================================================
[=======================SAME======================]
a False 1
a ******a*a False 0
aa ******a*a True 10
a ******?*a False 0
a ******a*a** False 0
ababaab ******a*a* True 21
ababaa ******a*a* True 19
The above snippet can also be placed inside your recursive f
call. I still have to figure out under which section will it make more sense, but basically for any certain match fixed from pattern onto your string, if the remaining pattern satisfies the above length restriction, you can simply skip the rest of iterations.
For tests, make use of the python's own unittest
module to create mocks. Have a set containing patterns and strings, and iteratively run the assertions on expected values.
add a comment |Â
up vote
3
down vote
accepted
up vote
3
down vote
accepted
Take a look at styling and naming guide, namely; PEP-8. Class names should be CamelCase
d and method/attributes should be snake_case
. Your variables are named like s
, p
, c
etc. which does not make it clear at first glance what they represent, nor is there a docstring available for reference.
That aside, running your code gives me:
string pattern result counter
=======================================================
* True 2
a * True 4
b * True 4
ab a* True 5
a *b False 1
aba a*a True 6
aa *?*? True 5
cda c*d?* True 6
xx0 ?*?*?* True 7
a a True 2
a False 1
a ******a*a False 10
aa ******a*a True 10
a ******?*a False 10
a ******a*a** False 35
ababaab ******a*a* True 21
ababaa ******a*a* True 19
where I noticed that your counter reached $35$ for the pattern ******a*a**
. In this case (and a few more similar cases), you can simply return False
if your patter, after removing all *
characters is still longer than the given string. Because then your string would never be able to cover the pattern.
As right now, you only have *
and ?
characters; for any length of characters and exactly one character, the above method reduces your recursion by a lot in several given cases:
def match(self):
if len(self.s) < len(self.p.replace('*', '')):
return False
and the result:
string pattern result counter
=======================================================
[=======================SAME======================]
a False 1
a ******a*a False 0
aa ******a*a True 10
a ******?*a False 0
a ******a*a** False 0
ababaab ******a*a* True 21
ababaa ******a*a* True 19
The above snippet can also be placed inside your recursive f
call. I still have to figure out under which section will it make more sense, but basically for any certain match fixed from pattern onto your string, if the remaining pattern satisfies the above length restriction, you can simply skip the rest of iterations.
For tests, make use of the python's own unittest
module to create mocks. Have a set containing patterns and strings, and iteratively run the assertions on expected values.
Take a look at styling and naming guide, namely; PEP-8. Class names should be CamelCase
d and method/attributes should be snake_case
. Your variables are named like s
, p
, c
etc. which does not make it clear at first glance what they represent, nor is there a docstring available for reference.
That aside, running your code gives me:
string pattern result counter
=======================================================
* True 2
a * True 4
b * True 4
ab a* True 5
a *b False 1
aba a*a True 6
aa *?*? True 5
cda c*d?* True 6
xx0 ?*?*?* True 7
a a True 2
a False 1
a ******a*a False 10
aa ******a*a True 10
a ******?*a False 10
a ******a*a** False 35
ababaab ******a*a* True 21
ababaa ******a*a* True 19
where I noticed that your counter reached $35$ for the pattern ******a*a**
. In this case (and a few more similar cases), you can simply return False
if your patter, after removing all *
characters is still longer than the given string. Because then your string would never be able to cover the pattern.
As right now, you only have *
and ?
characters; for any length of characters and exactly one character, the above method reduces your recursion by a lot in several given cases:
def match(self):
if len(self.s) < len(self.p.replace('*', '')):
return False
and the result:
string pattern result counter
=======================================================
[=======================SAME======================]
a False 1
a ******a*a False 0
aa ******a*a True 10
a ******?*a False 0
a ******a*a** False 0
ababaab ******a*a* True 21
ababaa ******a*a* True 19
The above snippet can also be placed inside your recursive f
call. I still have to figure out under which section will it make more sense, but basically for any certain match fixed from pattern onto your string, if the remaining pattern satisfies the above length restriction, you can simply skip the rest of iterations.
For tests, make use of the python's own unittest
module to create mocks. Have a set containing patterns and strings, and iteratively run the assertions on expected values.
answered Feb 7 at 0:02
hjpotter92
4,97611539
4,97611539
add a comment |Â
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f186929%2fpython-wild-card-pattern-matching-with-memoization%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password