Trie implementation with Python 3

Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
7
down vote
favorite
This is just a classic implementation of a trie using python 3.6, with types. I am a PHP developer, so these are my first attempts with Python.
I know I can improve the readability of my code, it's a bit messy, but I am struggling finding the right way of doing it. I'm sure I am missing a lot of the Pythonic ways in here :-)
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
is_end_of_word = False
for i in range(0, len(word)):
char = word[i]
if i == len(word) - 1:
is_end_of_word = True
if not current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
continue
if char not in current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
else:
current = current.children[char]
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if not current.children:
return False
if char in current.children:
current = current.children[char]
continue
else:
return False
if not current.is_word:
return False
return True
def remove(self, word: str) -> None:
current = self.root
for i in range(0, len(word)):
char = word[i]
is_end_of_word = False
if i == len(word) - 1:
is_end_of_word = True
if char in current.children:
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
else:
return
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word = word + child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
continue
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
python python-3.x trie
add a comment |Â
up vote
7
down vote
favorite
This is just a classic implementation of a trie using python 3.6, with types. I am a PHP developer, so these are my first attempts with Python.
I know I can improve the readability of my code, it's a bit messy, but I am struggling finding the right way of doing it. I'm sure I am missing a lot of the Pythonic ways in here :-)
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
is_end_of_word = False
for i in range(0, len(word)):
char = word[i]
if i == len(word) - 1:
is_end_of_word = True
if not current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
continue
if char not in current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
else:
current = current.children[char]
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if not current.children:
return False
if char in current.children:
current = current.children[char]
continue
else:
return False
if not current.is_word:
return False
return True
def remove(self, word: str) -> None:
current = self.root
for i in range(0, len(word)):
char = word[i]
is_end_of_word = False
if i == len(word) - 1:
is_end_of_word = True
if char in current.children:
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
else:
return
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word = word + child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
continue
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
python python-3.x trie
1
Welcome to CodeReview.SE! Your question looks nice and the type annotation is a very nice touch. An example with the expected output could make it even better :-)
â Josay
Mar 25 at 16:05
Hey, thank you @Josay, I wrote some BDD scenarios for testing, using Behave. I wasn't sure that including those scenarios was ok for the codereview.stackexcange platform. Would you suggest me to add also the gherkin files and the implementation of it?
â kekko12
Mar 25 at 19:51
1
Great @kekko12 , you actually seem to know a lot more than I do about BDD. I was more thinking about small examples like the one I provided in my answer. Because your code was clear enough, I way able to use it without any example but usually it helps for the review to have examples.
â Josay
Mar 25 at 20:37
add a comment |Â
up vote
7
down vote
favorite
up vote
7
down vote
favorite
This is just a classic implementation of a trie using python 3.6, with types. I am a PHP developer, so these are my first attempts with Python.
I know I can improve the readability of my code, it's a bit messy, but I am struggling finding the right way of doing it. I'm sure I am missing a lot of the Pythonic ways in here :-)
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
is_end_of_word = False
for i in range(0, len(word)):
char = word[i]
if i == len(word) - 1:
is_end_of_word = True
if not current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
continue
if char not in current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
else:
current = current.children[char]
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if not current.children:
return False
if char in current.children:
current = current.children[char]
continue
else:
return False
if not current.is_word:
return False
return True
def remove(self, word: str) -> None:
current = self.root
for i in range(0, len(word)):
char = word[i]
is_end_of_word = False
if i == len(word) - 1:
is_end_of_word = True
if char in current.children:
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
else:
return
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word = word + child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
continue
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
python python-3.x trie
This is just a classic implementation of a trie using python 3.6, with types. I am a PHP developer, so these are my first attempts with Python.
I know I can improve the readability of my code, it's a bit messy, but I am struggling finding the right way of doing it. I'm sure I am missing a lot of the Pythonic ways in here :-)
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
is_end_of_word = False
for i in range(0, len(word)):
char = word[i]
if i == len(word) - 1:
is_end_of_word = True
if not current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
continue
if char not in current.children:
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
else:
current = current.children[char]
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if not current.children:
return False
if char in current.children:
current = current.children[char]
continue
else:
return False
if not current.is_word:
return False
return True
def remove(self, word: str) -> None:
current = self.root
for i in range(0, len(word)):
char = word[i]
is_end_of_word = False
if i == len(word) - 1:
is_end_of_word = True
if char in current.children:
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
else:
return
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word = word + child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
continue
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
python python-3.x trie
edited Mar 25 at 15:44
200_success
123k14142399
123k14142399
asked Mar 25 at 11:40
kekko12
383
383
1
Welcome to CodeReview.SE! Your question looks nice and the type annotation is a very nice touch. An example with the expected output could make it even better :-)
â Josay
Mar 25 at 16:05
Hey, thank you @Josay, I wrote some BDD scenarios for testing, using Behave. I wasn't sure that including those scenarios was ok for the codereview.stackexcange platform. Would you suggest me to add also the gherkin files and the implementation of it?
â kekko12
Mar 25 at 19:51
1
Great @kekko12 , you actually seem to know a lot more than I do about BDD. I was more thinking about small examples like the one I provided in my answer. Because your code was clear enough, I way able to use it without any example but usually it helps for the review to have examples.
â Josay
Mar 25 at 20:37
add a comment |Â
1
Welcome to CodeReview.SE! Your question looks nice and the type annotation is a very nice touch. An example with the expected output could make it even better :-)
â Josay
Mar 25 at 16:05
Hey, thank you @Josay, I wrote some BDD scenarios for testing, using Behave. I wasn't sure that including those scenarios was ok for the codereview.stackexcange platform. Would you suggest me to add also the gherkin files and the implementation of it?
â kekko12
Mar 25 at 19:51
1
Great @kekko12 , you actually seem to know a lot more than I do about BDD. I was more thinking about small examples like the one I provided in my answer. Because your code was clear enough, I way able to use it without any example but usually it helps for the review to have examples.
â Josay
Mar 25 at 20:37
1
1
Welcome to CodeReview.SE! Your question looks nice and the type annotation is a very nice touch. An example with the expected output could make it even better :-)
â Josay
Mar 25 at 16:05
Welcome to CodeReview.SE! Your question looks nice and the type annotation is a very nice touch. An example with the expected output could make it even better :-)
â Josay
Mar 25 at 16:05
Hey, thank you @Josay, I wrote some BDD scenarios for testing, using Behave. I wasn't sure that including those scenarios was ok for the codereview.stackexcange platform. Would you suggest me to add also the gherkin files and the implementation of it?
â kekko12
Mar 25 at 19:51
Hey, thank you @Josay, I wrote some BDD scenarios for testing, using Behave. I wasn't sure that including those scenarios was ok for the codereview.stackexcange platform. Would you suggest me to add also the gherkin files and the implementation of it?
â kekko12
Mar 25 at 19:51
1
1
Great @kekko12 , you actually seem to know a lot more than I do about BDD. I was more thinking about small examples like the one I provided in my answer. Because your code was clear enough, I way able to use it without any example but usually it helps for the review to have examples.
â Josay
Mar 25 at 20:37
Great @kekko12 , you actually seem to know a lot more than I do about BDD. I was more thinking about small examples like the one I provided in my answer. Because your code was clear enough, I way able to use it without any example but usually it helps for the review to have examples.
â Josay
Mar 25 at 20:37
add a comment |Â
2 Answers
2
active
oldest
votes
up vote
4
down vote
accepted
Your code looks very good and the type annotation is a really nice touch
Also, your code is well organised and easy to understand. Some documentation could be a good idea (but it's better to have no doc rather than bad doc). Also, you could consider writing unit tests.
Let's try to see what could be improved/made more Pythonic.
Loop like a native
I highly recommand Ned Batchelder's excellent talk "loop like a native". One of the idea is that whenever you are using for i in range(len(iterable)), you are probably doing it wrong. In your case, you could use for i, char in enumerate(word):.
End of word
The end-of-word detection could be done in a single statement: is_end_of_word = i == len(word) - 1. Also, you can get rid of the definition before the loop and even in the loops, sometimes, you could define it only behind the if char in current.children: because you use it only there.
Reorganise the logic
Sometimes, you check if something is empty and then if it contains a particular element. This can be factorised out:
Also, simplifying the code if (cond) return False else return True, you could rewrite contains:
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
Then in _retrieve_all_words, you can get rid of continue by using a simple else which makes things more explicit. Then, you can actually factorise out the common code at the end of the 2 branches and get the more simple:
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
Finally, you can use += to simplify word = word + child.char into word += child.char.
At this stage, the code looks like:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char in current.children:
current = current.children[char]
else:
is_end_of_word = i == len(word) - 1
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
def remove(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char not in current.children:
return
is_end_of_word = i == len(word) - 1
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word += child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
Storing things in a different format
Instead of maintaining a is_word attribute, maybe you could add store a sentinel like None to say that we have a full word here. This could be written like this:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None)
def add(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char in current.children:
current = current.children[char]
else:
node_to_insert = Node(char)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return False
current = current.children[char]
return True
def remove(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return
elif char is None:
del current.children[char]
else:
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
else:
word += child.char
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str]):
self.char = char
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
t.remove("toto")
print(t.retrieve_all_words())
print(t.contains("toto"))
Back to word = word + child.char
Something I should have mentionned earlier but spotted on the late.
In _retrieve_all_words, you add a char to word only to remove it afterward. It is much clearer (and more efficient?) to just write word + char in the places where you actually need to word with the added character.
Then, the code becomes:
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
elif child.children:
self._retrieve_all_words(child, word + child.char, words)
return words
I tested your proposal, it works nicely, so you can remove the disclaimer :)
â kekko12
Mar 25 at 19:58
Nice. The only thing that looks weird to me in the first iteration isis_end_of_word = i == len(word) - 1inside the loop, which wouldn't be needed outside and after the loop.
â Eric Duminil
Mar 26 at 9:15
@EricDuminil I am not sure to understand, why would it be needed outside/after the loop? We define it just before using it and never use it afterward. We could even get rid of it but the variable has a name which helps understand what is going on...
â Josay
Mar 26 at 9:30
@Josay: It doesn't seem to make sense to ask inside the loop if the loop is finished. After the loop, you know it's finished.
â Eric Duminil
Mar 26 at 9:46
1
@Josay: I know that. As an alternative, you could simply callcurrent.is_word = Trueafter the loop, and removeis_word: boolfromNode.__init__. Here's the code
â Eric Duminil
Mar 26 at 12:13
 |Â
show 3 more comments
up vote
4
down vote
this is a small thing, but the API you provide isn't very pythonic. rather than using a containsmethod, you should overload in. similarly, rather than defining return_all_words, you should define iteration, so you can loop directly through it, and then the list conversion will just be list(tried). these may seem insignificant, but this type of consistency is what makes python feel nice to use.
Great observations, that is the kind of feedback that makes whoever approaches a new language feeling more confident about things. The syntax of a language is the easiest thing to learn. The hard bit comes with specific concepts implicitly used throughout the community. Thanks!
â kekko12
Mar 26 at 12:32
add a comment |Â
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
4
down vote
accepted
Your code looks very good and the type annotation is a really nice touch
Also, your code is well organised and easy to understand. Some documentation could be a good idea (but it's better to have no doc rather than bad doc). Also, you could consider writing unit tests.
Let's try to see what could be improved/made more Pythonic.
Loop like a native
I highly recommand Ned Batchelder's excellent talk "loop like a native". One of the idea is that whenever you are using for i in range(len(iterable)), you are probably doing it wrong. In your case, you could use for i, char in enumerate(word):.
End of word
The end-of-word detection could be done in a single statement: is_end_of_word = i == len(word) - 1. Also, you can get rid of the definition before the loop and even in the loops, sometimes, you could define it only behind the if char in current.children: because you use it only there.
Reorganise the logic
Sometimes, you check if something is empty and then if it contains a particular element. This can be factorised out:
Also, simplifying the code if (cond) return False else return True, you could rewrite contains:
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
Then in _retrieve_all_words, you can get rid of continue by using a simple else which makes things more explicit. Then, you can actually factorise out the common code at the end of the 2 branches and get the more simple:
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
Finally, you can use += to simplify word = word + child.char into word += child.char.
At this stage, the code looks like:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char in current.children:
current = current.children[char]
else:
is_end_of_word = i == len(word) - 1
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
def remove(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char not in current.children:
return
is_end_of_word = i == len(word) - 1
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word += child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
Storing things in a different format
Instead of maintaining a is_word attribute, maybe you could add store a sentinel like None to say that we have a full word here. This could be written like this:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None)
def add(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char in current.children:
current = current.children[char]
else:
node_to_insert = Node(char)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return False
current = current.children[char]
return True
def remove(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return
elif char is None:
del current.children[char]
else:
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
else:
word += child.char
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str]):
self.char = char
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
t.remove("toto")
print(t.retrieve_all_words())
print(t.contains("toto"))
Back to word = word + child.char
Something I should have mentionned earlier but spotted on the late.
In _retrieve_all_words, you add a char to word only to remove it afterward. It is much clearer (and more efficient?) to just write word + char in the places where you actually need to word with the added character.
Then, the code becomes:
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
elif child.children:
self._retrieve_all_words(child, word + child.char, words)
return words
I tested your proposal, it works nicely, so you can remove the disclaimer :)
â kekko12
Mar 25 at 19:58
Nice. The only thing that looks weird to me in the first iteration isis_end_of_word = i == len(word) - 1inside the loop, which wouldn't be needed outside and after the loop.
â Eric Duminil
Mar 26 at 9:15
@EricDuminil I am not sure to understand, why would it be needed outside/after the loop? We define it just before using it and never use it afterward. We could even get rid of it but the variable has a name which helps understand what is going on...
â Josay
Mar 26 at 9:30
@Josay: It doesn't seem to make sense to ask inside the loop if the loop is finished. After the loop, you know it's finished.
â Eric Duminil
Mar 26 at 9:46
1
@Josay: I know that. As an alternative, you could simply callcurrent.is_word = Trueafter the loop, and removeis_word: boolfromNode.__init__. Here's the code
â Eric Duminil
Mar 26 at 12:13
 |Â
show 3 more comments
up vote
4
down vote
accepted
Your code looks very good and the type annotation is a really nice touch
Also, your code is well organised and easy to understand. Some documentation could be a good idea (but it's better to have no doc rather than bad doc). Also, you could consider writing unit tests.
Let's try to see what could be improved/made more Pythonic.
Loop like a native
I highly recommand Ned Batchelder's excellent talk "loop like a native". One of the idea is that whenever you are using for i in range(len(iterable)), you are probably doing it wrong. In your case, you could use for i, char in enumerate(word):.
End of word
The end-of-word detection could be done in a single statement: is_end_of_word = i == len(word) - 1. Also, you can get rid of the definition before the loop and even in the loops, sometimes, you could define it only behind the if char in current.children: because you use it only there.
Reorganise the logic
Sometimes, you check if something is empty and then if it contains a particular element. This can be factorised out:
Also, simplifying the code if (cond) return False else return True, you could rewrite contains:
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
Then in _retrieve_all_words, you can get rid of continue by using a simple else which makes things more explicit. Then, you can actually factorise out the common code at the end of the 2 branches and get the more simple:
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
Finally, you can use += to simplify word = word + child.char into word += child.char.
At this stage, the code looks like:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char in current.children:
current = current.children[char]
else:
is_end_of_word = i == len(word) - 1
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
def remove(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char not in current.children:
return
is_end_of_word = i == len(word) - 1
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word += child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
Storing things in a different format
Instead of maintaining a is_word attribute, maybe you could add store a sentinel like None to say that we have a full word here. This could be written like this:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None)
def add(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char in current.children:
current = current.children[char]
else:
node_to_insert = Node(char)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return False
current = current.children[char]
return True
def remove(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return
elif char is None:
del current.children[char]
else:
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
else:
word += child.char
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str]):
self.char = char
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
t.remove("toto")
print(t.retrieve_all_words())
print(t.contains("toto"))
Back to word = word + child.char
Something I should have mentionned earlier but spotted on the late.
In _retrieve_all_words, you add a char to word only to remove it afterward. It is much clearer (and more efficient?) to just write word + char in the places where you actually need to word with the added character.
Then, the code becomes:
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
elif child.children:
self._retrieve_all_words(child, word + child.char, words)
return words
I tested your proposal, it works nicely, so you can remove the disclaimer :)
â kekko12
Mar 25 at 19:58
Nice. The only thing that looks weird to me in the first iteration isis_end_of_word = i == len(word) - 1inside the loop, which wouldn't be needed outside and after the loop.
â Eric Duminil
Mar 26 at 9:15
@EricDuminil I am not sure to understand, why would it be needed outside/after the loop? We define it just before using it and never use it afterward. We could even get rid of it but the variable has a name which helps understand what is going on...
â Josay
Mar 26 at 9:30
@Josay: It doesn't seem to make sense to ask inside the loop if the loop is finished. After the loop, you know it's finished.
â Eric Duminil
Mar 26 at 9:46
1
@Josay: I know that. As an alternative, you could simply callcurrent.is_word = Trueafter the loop, and removeis_word: boolfromNode.__init__. Here's the code
â Eric Duminil
Mar 26 at 12:13
 |Â
show 3 more comments
up vote
4
down vote
accepted
up vote
4
down vote
accepted
Your code looks very good and the type annotation is a really nice touch
Also, your code is well organised and easy to understand. Some documentation could be a good idea (but it's better to have no doc rather than bad doc). Also, you could consider writing unit tests.
Let's try to see what could be improved/made more Pythonic.
Loop like a native
I highly recommand Ned Batchelder's excellent talk "loop like a native". One of the idea is that whenever you are using for i in range(len(iterable)), you are probably doing it wrong. In your case, you could use for i, char in enumerate(word):.
End of word
The end-of-word detection could be done in a single statement: is_end_of_word = i == len(word) - 1. Also, you can get rid of the definition before the loop and even in the loops, sometimes, you could define it only behind the if char in current.children: because you use it only there.
Reorganise the logic
Sometimes, you check if something is empty and then if it contains a particular element. This can be factorised out:
Also, simplifying the code if (cond) return False else return True, you could rewrite contains:
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
Then in _retrieve_all_words, you can get rid of continue by using a simple else which makes things more explicit. Then, you can actually factorise out the common code at the end of the 2 branches and get the more simple:
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
Finally, you can use += to simplify word = word + child.char into word += child.char.
At this stage, the code looks like:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char in current.children:
current = current.children[char]
else:
is_end_of_word = i == len(word) - 1
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
def remove(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char not in current.children:
return
is_end_of_word = i == len(word) - 1
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word += child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
Storing things in a different format
Instead of maintaining a is_word attribute, maybe you could add store a sentinel like None to say that we have a full word here. This could be written like this:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None)
def add(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char in current.children:
current = current.children[char]
else:
node_to_insert = Node(char)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return False
current = current.children[char]
return True
def remove(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return
elif char is None:
del current.children[char]
else:
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
else:
word += child.char
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str]):
self.char = char
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
t.remove("toto")
print(t.retrieve_all_words())
print(t.contains("toto"))
Back to word = word + child.char
Something I should have mentionned earlier but spotted on the late.
In _retrieve_all_words, you add a char to word only to remove it afterward. It is much clearer (and more efficient?) to just write word + char in the places where you actually need to word with the added character.
Then, the code becomes:
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
elif child.children:
self._retrieve_all_words(child, word + child.char, words)
return words
Your code looks very good and the type annotation is a really nice touch
Also, your code is well organised and easy to understand. Some documentation could be a good idea (but it's better to have no doc rather than bad doc). Also, you could consider writing unit tests.
Let's try to see what could be improved/made more Pythonic.
Loop like a native
I highly recommand Ned Batchelder's excellent talk "loop like a native". One of the idea is that whenever you are using for i in range(len(iterable)), you are probably doing it wrong. In your case, you could use for i, char in enumerate(word):.
End of word
The end-of-word detection could be done in a single statement: is_end_of_word = i == len(word) - 1. Also, you can get rid of the definition before the loop and even in the loops, sometimes, you could define it only behind the if char in current.children: because you use it only there.
Reorganise the logic
Sometimes, you check if something is empty and then if it contains a particular element. This can be factorised out:
Also, simplifying the code if (cond) return False else return True, you could rewrite contains:
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
Then in _retrieve_all_words, you can get rid of continue by using a simple else which makes things more explicit. Then, you can actually factorise out the common code at the end of the 2 branches and get the more simple:
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
Finally, you can use += to simplify word = word + child.char into word += child.char.
At this stage, the code looks like:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None, is_word=False)
def add(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char in current.children:
current = current.children[char]
else:
is_end_of_word = i == len(word) - 1
node_to_insert = Node(char, is_end_of_word)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in word:
if char not in current.children:
return False
current = current.children[char]
return current.is_word
def remove(self, word: str) -> None:
current = self.root
for i, char in enumerate(word):
if char not in current.children:
return
is_end_of_word = i == len(word) - 1
if current.children[char].is_word and is_end_of_word:
current.children[char].is_word = False
return
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
word += child.char
if child.is_word:
words.append(word)
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str], is_word: bool):
self.char = char
self.is_word = is_word
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
Storing things in a different format
Instead of maintaining a is_word attribute, maybe you could add store a sentinel like None to say that we have a full word here. This could be written like this:
from typing import Optional
class Trie:
def __init__(self):
self.root = Node(char=None)
def add(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char in current.children:
current = current.children[char]
else:
node_to_insert = Node(char)
current.add_child(node_to_insert)
current = node_to_insert
def contains(self, word: str) -> bool:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return False
current = current.children[char]
return True
def remove(self, word: str) -> None:
current = self.root
for char in list(word) + [None]:
if char not in current.children:
return
elif char is None:
del current.children[char]
else:
current = current.children[char]
def retrieve_all_words(self) -> list:
return self._retrieve_all_words(self.root, '', )
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
else:
word += child.char
if child.children:
self._retrieve_all_words(child, word, words)
word = word[:-1]
return words
class Node:
def __init__(self, char: Optional[str]):
self.char = char
self.children =
def add_child(self, node: 'Node'):
self.children[node.char] = node
t = Trie()
t.add("toto")
t.add("tutu")
t.add("foobar")
print(t)
print(t.retrieve_all_words())
print(t.contains("tot"))
print(t.contains("toto"))
print(t.contains("totot"))
t.remove("toto")
print(t.retrieve_all_words())
print(t.contains("toto"))
Back to word = word + child.char
Something I should have mentionned earlier but spotted on the late.
In _retrieve_all_words, you add a char to word only to remove it afterward. It is much clearer (and more efficient?) to just write word + char in the places where you actually need to word with the added character.
Then, the code becomes:
def _retrieve_all_words(self, current: 'Node', word: str, words: list) -> list:
for child in current.children.values():
if child.char is None:
words.append(word)
elif child.children:
self._retrieve_all_words(child, word + child.char, words)
return words
edited Mar 26 at 11:18
answered Mar 25 at 17:42
Josay
23.8k13580
23.8k13580
I tested your proposal, it works nicely, so you can remove the disclaimer :)
â kekko12
Mar 25 at 19:58
Nice. The only thing that looks weird to me in the first iteration isis_end_of_word = i == len(word) - 1inside the loop, which wouldn't be needed outside and after the loop.
â Eric Duminil
Mar 26 at 9:15
@EricDuminil I am not sure to understand, why would it be needed outside/after the loop? We define it just before using it and never use it afterward. We could even get rid of it but the variable has a name which helps understand what is going on...
â Josay
Mar 26 at 9:30
@Josay: It doesn't seem to make sense to ask inside the loop if the loop is finished. After the loop, you know it's finished.
â Eric Duminil
Mar 26 at 9:46
1
@Josay: I know that. As an alternative, you could simply callcurrent.is_word = Trueafter the loop, and removeis_word: boolfromNode.__init__. Here's the code
â Eric Duminil
Mar 26 at 12:13
 |Â
show 3 more comments
I tested your proposal, it works nicely, so you can remove the disclaimer :)
â kekko12
Mar 25 at 19:58
Nice. The only thing that looks weird to me in the first iteration isis_end_of_word = i == len(word) - 1inside the loop, which wouldn't be needed outside and after the loop.
â Eric Duminil
Mar 26 at 9:15
@EricDuminil I am not sure to understand, why would it be needed outside/after the loop? We define it just before using it and never use it afterward. We could even get rid of it but the variable has a name which helps understand what is going on...
â Josay
Mar 26 at 9:30
@Josay: It doesn't seem to make sense to ask inside the loop if the loop is finished. After the loop, you know it's finished.
â Eric Duminil
Mar 26 at 9:46
1
@Josay: I know that. As an alternative, you could simply callcurrent.is_word = Trueafter the loop, and removeis_word: boolfromNode.__init__. Here's the code
â Eric Duminil
Mar 26 at 12:13
I tested your proposal, it works nicely, so you can remove the disclaimer :)
â kekko12
Mar 25 at 19:58
I tested your proposal, it works nicely, so you can remove the disclaimer :)
â kekko12
Mar 25 at 19:58
Nice. The only thing that looks weird to me in the first iteration is
is_end_of_word = i == len(word) - 1 inside the loop, which wouldn't be needed outside and after the loop.â Eric Duminil
Mar 26 at 9:15
Nice. The only thing that looks weird to me in the first iteration is
is_end_of_word = i == len(word) - 1 inside the loop, which wouldn't be needed outside and after the loop.â Eric Duminil
Mar 26 at 9:15
@EricDuminil I am not sure to understand, why would it be needed outside/after the loop? We define it just before using it and never use it afterward. We could even get rid of it but the variable has a name which helps understand what is going on...
â Josay
Mar 26 at 9:30
@EricDuminil I am not sure to understand, why would it be needed outside/after the loop? We define it just before using it and never use it afterward. We could even get rid of it but the variable has a name which helps understand what is going on...
â Josay
Mar 26 at 9:30
@Josay: It doesn't seem to make sense to ask inside the loop if the loop is finished. After the loop, you know it's finished.
â Eric Duminil
Mar 26 at 9:46
@Josay: It doesn't seem to make sense to ask inside the loop if the loop is finished. After the loop, you know it's finished.
â Eric Duminil
Mar 26 at 9:46
1
1
@Josay: I know that. As an alternative, you could simply call
current.is_word = Trueafter the loop, and remove is_word: bool from Node.__init__. Here's the codeâ Eric Duminil
Mar 26 at 12:13
@Josay: I know that. As an alternative, you could simply call
current.is_word = Trueafter the loop, and remove is_word: bool from Node.__init__. Here's the codeâ Eric Duminil
Mar 26 at 12:13
 |Â
show 3 more comments
up vote
4
down vote
this is a small thing, but the API you provide isn't very pythonic. rather than using a containsmethod, you should overload in. similarly, rather than defining return_all_words, you should define iteration, so you can loop directly through it, and then the list conversion will just be list(tried). these may seem insignificant, but this type of consistency is what makes python feel nice to use.
Great observations, that is the kind of feedback that makes whoever approaches a new language feeling more confident about things. The syntax of a language is the easiest thing to learn. The hard bit comes with specific concepts implicitly used throughout the community. Thanks!
â kekko12
Mar 26 at 12:32
add a comment |Â
up vote
4
down vote
this is a small thing, but the API you provide isn't very pythonic. rather than using a containsmethod, you should overload in. similarly, rather than defining return_all_words, you should define iteration, so you can loop directly through it, and then the list conversion will just be list(tried). these may seem insignificant, but this type of consistency is what makes python feel nice to use.
Great observations, that is the kind of feedback that makes whoever approaches a new language feeling more confident about things. The syntax of a language is the easiest thing to learn. The hard bit comes with specific concepts implicitly used throughout the community. Thanks!
â kekko12
Mar 26 at 12:32
add a comment |Â
up vote
4
down vote
up vote
4
down vote
this is a small thing, but the API you provide isn't very pythonic. rather than using a containsmethod, you should overload in. similarly, rather than defining return_all_words, you should define iteration, so you can loop directly through it, and then the list conversion will just be list(tried). these may seem insignificant, but this type of consistency is what makes python feel nice to use.
this is a small thing, but the API you provide isn't very pythonic. rather than using a containsmethod, you should overload in. similarly, rather than defining return_all_words, you should define iteration, so you can loop directly through it, and then the list conversion will just be list(tried). these may seem insignificant, but this type of consistency is what makes python feel nice to use.
answered Mar 25 at 23:37
Oscar Smith
2,625922
2,625922
Great observations, that is the kind of feedback that makes whoever approaches a new language feeling more confident about things. The syntax of a language is the easiest thing to learn. The hard bit comes with specific concepts implicitly used throughout the community. Thanks!
â kekko12
Mar 26 at 12:32
add a comment |Â
Great observations, that is the kind of feedback that makes whoever approaches a new language feeling more confident about things. The syntax of a language is the easiest thing to learn. The hard bit comes with specific concepts implicitly used throughout the community. Thanks!
â kekko12
Mar 26 at 12:32
Great observations, that is the kind of feedback that makes whoever approaches a new language feeling more confident about things. The syntax of a language is the easiest thing to learn. The hard bit comes with specific concepts implicitly used throughout the community. Thanks!
â kekko12
Mar 26 at 12:32
Great observations, that is the kind of feedback that makes whoever approaches a new language feeling more confident about things. The syntax of a language is the easiest thing to learn. The hard bit comes with specific concepts implicitly used throughout the community. Thanks!
â kekko12
Mar 26 at 12:32
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f190433%2ftrie-implementation-with-python-3%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
1
Welcome to CodeReview.SE! Your question looks nice and the type annotation is a very nice touch. An example with the expected output could make it even better :-)
â Josay
Mar 25 at 16:05
Hey, thank you @Josay, I wrote some BDD scenarios for testing, using Behave. I wasn't sure that including those scenarios was ok for the codereview.stackexcange platform. Would you suggest me to add also the gherkin files and the implementation of it?
â kekko12
Mar 25 at 19:51
1
Great @kekko12 , you actually seem to know a lot more than I do about BDD. I was more thinking about small examples like the one I provided in my answer. Because your code was clear enough, I way able to use it without any example but usually it helps for the review to have examples.
â Josay
Mar 25 at 20:37