Autocomplete implementation in Python

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
3
down vote

favorite
1












I have an autocomplete implementation in Python, and it passed all of the unit tests that I have included at the bottom.



#!python

import random
import time


def get_words(filename):
with open(filename, 'r') as f:
words_list = f.read().split()
return words_list
print(words_list)


def binarysearch(list_, item, left=None, right=None):
if left is None and right is None:
left = 0
right = len(list_) - 1
middle = (right + left) // 2
item_length = len(item)
middle_item = list_[middle]
if middle_item[:item_length].lower() == item.lower():
return find_all_words(list_, item, middle)
elif middle_item[:item_length].lower() < item.lower():
return binarysearch(list_, item, middle + 1, right)
elif middle_item[:item_length].lower() > item.lower():
return binarysearch(list_, item, left, middle - 1)
elif left > right:
return None


def find_all_words(list_, item, middle):
predicted_words = [list_[middle]]
item_length = len(item)
left_match = True
right_match = False
left_index = middle
right_index = middle
while left_match:
left_index -= 1
if left_index < 0:
left_match = False
else:
word = list_[left_index]
if word[:item_length] == item:
predicted_words.append(word)
else:
left_match = False
while right_match:
right_index += 1
if right_index >= len(list_):
right_match = False
else:
word = list_[right_index]
if word[:item_length] == item:
predicted_words.append(word)
else:
right_match = False
return predicted_words

def autocomplete(prefix=""):
filename = "/usr/share/dict/words"
words_list = get_words(filename)
predicted_words = binarysearch(words_list, prefix)
return predicted_words

def benchmark(all_prefixes):
t1 = time.time()
for prefix in all_prefixes:
autocomplete(prefix)
t2 = time.time()
return t2 - t1


def main():
all_words = get_words('/usr/share/dict/words')
all_prefixes = set([word[:len(word)//2] for word in all_words])
time = benchmark(all_prefixes)
print('Took seconds to benchmark prefixes on words'.format(time, len(all_prefixes), len(all_words)))
# import sys
# prefix = sys.argv[1]
# words = autocomplete(prefix)
# print(words)

if __name__ == '__main__':
main()

# autocomplete_test.py


#!python

from autocomplete import insert_word
from tries import Trie
import unittest


class TrieTest(unittest.TestCase):

def test_init(self):
trie = Trie()
data, is_word = trie.root.data
assert data == ""
assert is_word is False
assert trie.root.next ==

def test_insert_with_3_items(self):
trie = Trie()
assert trie.root.data == ('', False)
assert trie.root.next ==
trie.insert("a")
assert trie.root.next['a'] is not None
assert len(trie.root.next) == 1
node = trie.root.next['a']
assert node.data == ('a', True)
assert node.next ==
trie.insert("a")
assert trie.root.next['a'] is not None
assert len(trie.root.next) == 1
node = trie.root.next['a']
assert node.data == ('a', True)
assert node.next ==
trie.insert("b")
assert trie.root.next['b'] is not None
assert len(trie.root.next) == 2
node = trie.root.next['b']
assert node.data == ('b', True)
assert node.next ==
trie.insert("c")
assert trie.root.next['c'] is not None
assert len(trie.root.next) == 3
node = trie.root.next['c']
assert node.data == ('c', True)
assert node.next ==

def test_inset_with_7_items(self):
trie = Trie()
words_list = ['a', 'app', 'apple', 'be', 'bee', 'beat', 'c']
for word in words_list:
trie.insert(word)
assert trie.root.data == ('', False)
assert len(trie.root.next) == 3

assert trie.root.next['a'] is not None
node = trie.root.next['a']
assert node.data == ('a', True)
assert len(node.next) == 1
assert node.next['p'] is not None
node = node.next['p']
assert node.data == ('ap', False)
assert len(node.next) == 1
assert node.next['p'] is not None
node = node.next['p']
assert node.data == ('app', True)
assert len(node.next) == 1
assert node.next['l'] is not None
node = node.next['l']
assert node.data == ('appl', False)
assert len(node.next) == 1
assert node.next['e'] is not None
node = node.next['e']
assert node.data == ('apple', True)
assert len(node.next) == 0

assert trie.root.next['b'] is not None
node = trie.root.next['b']
assert node.data == ('b', False)
assert len(node.next) == 1
assert node.next['e'] is not None
node = node.next['e']
assert node.data == ('be', True)
assert len(node.next) == 2
assert node.next['e'] is not None
node1 = node.next['e']
assert node1.data == ('bee', True)
assert len(node1.next) == 0
assert node.next['a'] is not None
node2 = node.next['a']
assert node2.data == ('bea', False)
assert len(node2.next) == 1
assert node2.next['t'] is not None
node = node2.next['t']
assert node.data == ('beat', True)
assert len(node.next) == 0

assert trie.root.next['c'] is not None
node = trie.root.next['c']
assert node.data == ('c', True)
assert len(node.next) == 0


class AutocompleteTest(unittest.TestCase):

def test_insert_word():






share|improve this question



























    up vote
    3
    down vote

    favorite
    1












    I have an autocomplete implementation in Python, and it passed all of the unit tests that I have included at the bottom.



    #!python

    import random
    import time


    def get_words(filename):
    with open(filename, 'r') as f:
    words_list = f.read().split()
    return words_list
    print(words_list)


    def binarysearch(list_, item, left=None, right=None):
    if left is None and right is None:
    left = 0
    right = len(list_) - 1
    middle = (right + left) // 2
    item_length = len(item)
    middle_item = list_[middle]
    if middle_item[:item_length].lower() == item.lower():
    return find_all_words(list_, item, middle)
    elif middle_item[:item_length].lower() < item.lower():
    return binarysearch(list_, item, middle + 1, right)
    elif middle_item[:item_length].lower() > item.lower():
    return binarysearch(list_, item, left, middle - 1)
    elif left > right:
    return None


    def find_all_words(list_, item, middle):
    predicted_words = [list_[middle]]
    item_length = len(item)
    left_match = True
    right_match = False
    left_index = middle
    right_index = middle
    while left_match:
    left_index -= 1
    if left_index < 0:
    left_match = False
    else:
    word = list_[left_index]
    if word[:item_length] == item:
    predicted_words.append(word)
    else:
    left_match = False
    while right_match:
    right_index += 1
    if right_index >= len(list_):
    right_match = False
    else:
    word = list_[right_index]
    if word[:item_length] == item:
    predicted_words.append(word)
    else:
    right_match = False
    return predicted_words

    def autocomplete(prefix=""):
    filename = "/usr/share/dict/words"
    words_list = get_words(filename)
    predicted_words = binarysearch(words_list, prefix)
    return predicted_words

    def benchmark(all_prefixes):
    t1 = time.time()
    for prefix in all_prefixes:
    autocomplete(prefix)
    t2 = time.time()
    return t2 - t1


    def main():
    all_words = get_words('/usr/share/dict/words')
    all_prefixes = set([word[:len(word)//2] for word in all_words])
    time = benchmark(all_prefixes)
    print('Took seconds to benchmark prefixes on words'.format(time, len(all_prefixes), len(all_words)))
    # import sys
    # prefix = sys.argv[1]
    # words = autocomplete(prefix)
    # print(words)

    if __name__ == '__main__':
    main()

    # autocomplete_test.py


    #!python

    from autocomplete import insert_word
    from tries import Trie
    import unittest


    class TrieTest(unittest.TestCase):

    def test_init(self):
    trie = Trie()
    data, is_word = trie.root.data
    assert data == ""
    assert is_word is False
    assert trie.root.next ==

    def test_insert_with_3_items(self):
    trie = Trie()
    assert trie.root.data == ('', False)
    assert trie.root.next ==
    trie.insert("a")
    assert trie.root.next['a'] is not None
    assert len(trie.root.next) == 1
    node = trie.root.next['a']
    assert node.data == ('a', True)
    assert node.next ==
    trie.insert("a")
    assert trie.root.next['a'] is not None
    assert len(trie.root.next) == 1
    node = trie.root.next['a']
    assert node.data == ('a', True)
    assert node.next ==
    trie.insert("b")
    assert trie.root.next['b'] is not None
    assert len(trie.root.next) == 2
    node = trie.root.next['b']
    assert node.data == ('b', True)
    assert node.next ==
    trie.insert("c")
    assert trie.root.next['c'] is not None
    assert len(trie.root.next) == 3
    node = trie.root.next['c']
    assert node.data == ('c', True)
    assert node.next ==

    def test_inset_with_7_items(self):
    trie = Trie()
    words_list = ['a', 'app', 'apple', 'be', 'bee', 'beat', 'c']
    for word in words_list:
    trie.insert(word)
    assert trie.root.data == ('', False)
    assert len(trie.root.next) == 3

    assert trie.root.next['a'] is not None
    node = trie.root.next['a']
    assert node.data == ('a', True)
    assert len(node.next) == 1
    assert node.next['p'] is not None
    node = node.next['p']
    assert node.data == ('ap', False)
    assert len(node.next) == 1
    assert node.next['p'] is not None
    node = node.next['p']
    assert node.data == ('app', True)
    assert len(node.next) == 1
    assert node.next['l'] is not None
    node = node.next['l']
    assert node.data == ('appl', False)
    assert len(node.next) == 1
    assert node.next['e'] is not None
    node = node.next['e']
    assert node.data == ('apple', True)
    assert len(node.next) == 0

    assert trie.root.next['b'] is not None
    node = trie.root.next['b']
    assert node.data == ('b', False)
    assert len(node.next) == 1
    assert node.next['e'] is not None
    node = node.next['e']
    assert node.data == ('be', True)
    assert len(node.next) == 2
    assert node.next['e'] is not None
    node1 = node.next['e']
    assert node1.data == ('bee', True)
    assert len(node1.next) == 0
    assert node.next['a'] is not None
    node2 = node.next['a']
    assert node2.data == ('bea', False)
    assert len(node2.next) == 1
    assert node2.next['t'] is not None
    node = node2.next['t']
    assert node.data == ('beat', True)
    assert len(node.next) == 0

    assert trie.root.next['c'] is not None
    node = trie.root.next['c']
    assert node.data == ('c', True)
    assert len(node.next) == 0


    class AutocompleteTest(unittest.TestCase):

    def test_insert_word():






    share|improve this question























      up vote
      3
      down vote

      favorite
      1









      up vote
      3
      down vote

      favorite
      1






      1





      I have an autocomplete implementation in Python, and it passed all of the unit tests that I have included at the bottom.



      #!python

      import random
      import time


      def get_words(filename):
      with open(filename, 'r') as f:
      words_list = f.read().split()
      return words_list
      print(words_list)


      def binarysearch(list_, item, left=None, right=None):
      if left is None and right is None:
      left = 0
      right = len(list_) - 1
      middle = (right + left) // 2
      item_length = len(item)
      middle_item = list_[middle]
      if middle_item[:item_length].lower() == item.lower():
      return find_all_words(list_, item, middle)
      elif middle_item[:item_length].lower() < item.lower():
      return binarysearch(list_, item, middle + 1, right)
      elif middle_item[:item_length].lower() > item.lower():
      return binarysearch(list_, item, left, middle - 1)
      elif left > right:
      return None


      def find_all_words(list_, item, middle):
      predicted_words = [list_[middle]]
      item_length = len(item)
      left_match = True
      right_match = False
      left_index = middle
      right_index = middle
      while left_match:
      left_index -= 1
      if left_index < 0:
      left_match = False
      else:
      word = list_[left_index]
      if word[:item_length] == item:
      predicted_words.append(word)
      else:
      left_match = False
      while right_match:
      right_index += 1
      if right_index >= len(list_):
      right_match = False
      else:
      word = list_[right_index]
      if word[:item_length] == item:
      predicted_words.append(word)
      else:
      right_match = False
      return predicted_words

      def autocomplete(prefix=""):
      filename = "/usr/share/dict/words"
      words_list = get_words(filename)
      predicted_words = binarysearch(words_list, prefix)
      return predicted_words

      def benchmark(all_prefixes):
      t1 = time.time()
      for prefix in all_prefixes:
      autocomplete(prefix)
      t2 = time.time()
      return t2 - t1


      def main():
      all_words = get_words('/usr/share/dict/words')
      all_prefixes = set([word[:len(word)//2] for word in all_words])
      time = benchmark(all_prefixes)
      print('Took seconds to benchmark prefixes on words'.format(time, len(all_prefixes), len(all_words)))
      # import sys
      # prefix = sys.argv[1]
      # words = autocomplete(prefix)
      # print(words)

      if __name__ == '__main__':
      main()

      # autocomplete_test.py


      #!python

      from autocomplete import insert_word
      from tries import Trie
      import unittest


      class TrieTest(unittest.TestCase):

      def test_init(self):
      trie = Trie()
      data, is_word = trie.root.data
      assert data == ""
      assert is_word is False
      assert trie.root.next ==

      def test_insert_with_3_items(self):
      trie = Trie()
      assert trie.root.data == ('', False)
      assert trie.root.next ==
      trie.insert("a")
      assert trie.root.next['a'] is not None
      assert len(trie.root.next) == 1
      node = trie.root.next['a']
      assert node.data == ('a', True)
      assert node.next ==
      trie.insert("a")
      assert trie.root.next['a'] is not None
      assert len(trie.root.next) == 1
      node = trie.root.next['a']
      assert node.data == ('a', True)
      assert node.next ==
      trie.insert("b")
      assert trie.root.next['b'] is not None
      assert len(trie.root.next) == 2
      node = trie.root.next['b']
      assert node.data == ('b', True)
      assert node.next ==
      trie.insert("c")
      assert trie.root.next['c'] is not None
      assert len(trie.root.next) == 3
      node = trie.root.next['c']
      assert node.data == ('c', True)
      assert node.next ==

      def test_inset_with_7_items(self):
      trie = Trie()
      words_list = ['a', 'app', 'apple', 'be', 'bee', 'beat', 'c']
      for word in words_list:
      trie.insert(word)
      assert trie.root.data == ('', False)
      assert len(trie.root.next) == 3

      assert trie.root.next['a'] is not None
      node = trie.root.next['a']
      assert node.data == ('a', True)
      assert len(node.next) == 1
      assert node.next['p'] is not None
      node = node.next['p']
      assert node.data == ('ap', False)
      assert len(node.next) == 1
      assert node.next['p'] is not None
      node = node.next['p']
      assert node.data == ('app', True)
      assert len(node.next) == 1
      assert node.next['l'] is not None
      node = node.next['l']
      assert node.data == ('appl', False)
      assert len(node.next) == 1
      assert node.next['e'] is not None
      node = node.next['e']
      assert node.data == ('apple', True)
      assert len(node.next) == 0

      assert trie.root.next['b'] is not None
      node = trie.root.next['b']
      assert node.data == ('b', False)
      assert len(node.next) == 1
      assert node.next['e'] is not None
      node = node.next['e']
      assert node.data == ('be', True)
      assert len(node.next) == 2
      assert node.next['e'] is not None
      node1 = node.next['e']
      assert node1.data == ('bee', True)
      assert len(node1.next) == 0
      assert node.next['a'] is not None
      node2 = node.next['a']
      assert node2.data == ('bea', False)
      assert len(node2.next) == 1
      assert node2.next['t'] is not None
      node = node2.next['t']
      assert node.data == ('beat', True)
      assert len(node.next) == 0

      assert trie.root.next['c'] is not None
      node = trie.root.next['c']
      assert node.data == ('c', True)
      assert len(node.next) == 0


      class AutocompleteTest(unittest.TestCase):

      def test_insert_word():






      share|improve this question













      I have an autocomplete implementation in Python, and it passed all of the unit tests that I have included at the bottom.



      #!python

      import random
      import time


      def get_words(filename):
      with open(filename, 'r') as f:
      words_list = f.read().split()
      return words_list
      print(words_list)


      def binarysearch(list_, item, left=None, right=None):
      if left is None and right is None:
      left = 0
      right = len(list_) - 1
      middle = (right + left) // 2
      item_length = len(item)
      middle_item = list_[middle]
      if middle_item[:item_length].lower() == item.lower():
      return find_all_words(list_, item, middle)
      elif middle_item[:item_length].lower() < item.lower():
      return binarysearch(list_, item, middle + 1, right)
      elif middle_item[:item_length].lower() > item.lower():
      return binarysearch(list_, item, left, middle - 1)
      elif left > right:
      return None


      def find_all_words(list_, item, middle):
      predicted_words = [list_[middle]]
      item_length = len(item)
      left_match = True
      right_match = False
      left_index = middle
      right_index = middle
      while left_match:
      left_index -= 1
      if left_index < 0:
      left_match = False
      else:
      word = list_[left_index]
      if word[:item_length] == item:
      predicted_words.append(word)
      else:
      left_match = False
      while right_match:
      right_index += 1
      if right_index >= len(list_):
      right_match = False
      else:
      word = list_[right_index]
      if word[:item_length] == item:
      predicted_words.append(word)
      else:
      right_match = False
      return predicted_words

      def autocomplete(prefix=""):
      filename = "/usr/share/dict/words"
      words_list = get_words(filename)
      predicted_words = binarysearch(words_list, prefix)
      return predicted_words

      def benchmark(all_prefixes):
      t1 = time.time()
      for prefix in all_prefixes:
      autocomplete(prefix)
      t2 = time.time()
      return t2 - t1


      def main():
      all_words = get_words('/usr/share/dict/words')
      all_prefixes = set([word[:len(word)//2] for word in all_words])
      time = benchmark(all_prefixes)
      print('Took seconds to benchmark prefixes on words'.format(time, len(all_prefixes), len(all_words)))
      # import sys
      # prefix = sys.argv[1]
      # words = autocomplete(prefix)
      # print(words)

      if __name__ == '__main__':
      main()

      # autocomplete_test.py


      #!python

      from autocomplete import insert_word
      from tries import Trie
      import unittest


      class TrieTest(unittest.TestCase):

      def test_init(self):
      trie = Trie()
      data, is_word = trie.root.data
      assert data == ""
      assert is_word is False
      assert trie.root.next ==

      def test_insert_with_3_items(self):
      trie = Trie()
      assert trie.root.data == ('', False)
      assert trie.root.next ==
      trie.insert("a")
      assert trie.root.next['a'] is not None
      assert len(trie.root.next) == 1
      node = trie.root.next['a']
      assert node.data == ('a', True)
      assert node.next ==
      trie.insert("a")
      assert trie.root.next['a'] is not None
      assert len(trie.root.next) == 1
      node = trie.root.next['a']
      assert node.data == ('a', True)
      assert node.next ==
      trie.insert("b")
      assert trie.root.next['b'] is not None
      assert len(trie.root.next) == 2
      node = trie.root.next['b']
      assert node.data == ('b', True)
      assert node.next ==
      trie.insert("c")
      assert trie.root.next['c'] is not None
      assert len(trie.root.next) == 3
      node = trie.root.next['c']
      assert node.data == ('c', True)
      assert node.next ==

      def test_inset_with_7_items(self):
      trie = Trie()
      words_list = ['a', 'app', 'apple', 'be', 'bee', 'beat', 'c']
      for word in words_list:
      trie.insert(word)
      assert trie.root.data == ('', False)
      assert len(trie.root.next) == 3

      assert trie.root.next['a'] is not None
      node = trie.root.next['a']
      assert node.data == ('a', True)
      assert len(node.next) == 1
      assert node.next['p'] is not None
      node = node.next['p']
      assert node.data == ('ap', False)
      assert len(node.next) == 1
      assert node.next['p'] is not None
      node = node.next['p']
      assert node.data == ('app', True)
      assert len(node.next) == 1
      assert node.next['l'] is not None
      node = node.next['l']
      assert node.data == ('appl', False)
      assert len(node.next) == 1
      assert node.next['e'] is not None
      node = node.next['e']
      assert node.data == ('apple', True)
      assert len(node.next) == 0

      assert trie.root.next['b'] is not None
      node = trie.root.next['b']
      assert node.data == ('b', False)
      assert len(node.next) == 1
      assert node.next['e'] is not None
      node = node.next['e']
      assert node.data == ('be', True)
      assert len(node.next) == 2
      assert node.next['e'] is not None
      node1 = node.next['e']
      assert node1.data == ('bee', True)
      assert len(node1.next) == 0
      assert node.next['a'] is not None
      node2 = node.next['a']
      assert node2.data == ('bea', False)
      assert len(node2.next) == 1
      assert node2.next['t'] is not None
      node = node2.next['t']
      assert node.data == ('beat', True)
      assert len(node.next) == 0

      assert trie.root.next['c'] is not None
      node = trie.root.next['c']
      assert node.data == ('c', True)
      assert len(node.next) == 0


      class AutocompleteTest(unittest.TestCase):

      def test_insert_word():








      share|improve this question












      share|improve this question




      share|improve this question








      edited Jun 28 at 1:45









      Jamal♦

      30.1k11114225




      30.1k11114225









      asked Jun 27 at 22:58









      NinjaG

      654221




      654221




















          1 Answer
          1






          active

          oldest

          votes

















          up vote
          5
          down vote



          accepted
          +25










          You don't seem to have used the random module, so that import can be deleted. In your first function get_words(), you have put a print(words_list) immediately after an unconditional return, which is unreachable. Unless this is for debugging, it is best to either put it before the return statement or delete it altogether.




          binarysearch is composed of two words, and should be separated with an underscore for consistency. The conditional for the first if statement is a bit redundant, since whenever one index is None, the other must also be the same. If you do want to check both, you should probably use an or rather than and, since neither is allowed to be None.



          The final elif of the function is again unreachable, this time because one of the previous comparisons (==, <, >) will always be true. This implementation means your binary search will never terminate correctly if the search term is not present in the dictionary — instead, check if the search should terminate at the beginning of the function.



          You can also do the slicing and lowercasing of your variables immediately instead of repeating for each if statement.



          def binary_search(list_, item, left=None, right=None):
          if left is None or right is None:
          left = 0
          right = len(list_) - 1
          elif left > right:
          return None
          item = item.lower()
          middle = (right + left) // 2
          sliced_middle_item = list_[middle][:len(item)].lower()
          if sliced_middle_item == item:
          return find_all_words(list_, item, middle)
          if sliced_middle_item < item:
          return binary_search(list_, item, middle + 1, right)
          if sliced_middle_item > item:
          return binary_search(list_, item, left, middle - 1)


          Since this is a single branch problem and Python does not optimise tail calls, it may be more efficent to impement your binary search iteratively using a loop instead of recursively.



          def binary_search(list_, item):
          left = 0
          right = len(list_) - 1
          item = item.lower()
          while True:
          if left > right:
          return None
          middle = (left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          elif sliced_middle_item > item:
          right = middle - 1
          elif sliced_middle_item == item:
          return find_all_words(list_, item, middle)



          In find_all_words, you have initialized right_match to False, which means the second loop will never run, and you will not find any values to the right of the first match. Also, using a flag variable that controls the loop is really no different from using a break statement, except you won't need to put the second half of your loop in an else block.



          Instead of building your list one at a time using .append(), it is probably more efficent to find the first match and last match and then using the indcies to slice the word list. As a bonus this will also maintain the lexical ordering of the list. This will also mean you can use a binary search to find the first and last matches instead of a linear search. It would be best to continue from the previous search rather than starting from scratch, taking left and right as parameters as well.



          def binary_find_all(list_, item, left, middle, right):
          half_right = middle
          half_left = middle
          while left < half_right:
          middle = (left + half_right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          else:
          half_right = middle
          while half_left < right:
          middle = (half_left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item == item:
          half_left = middle + 1
          else:
          right = middle
          return list_[left:right]



          It is really unnessary and inefficent to reopen the file and reset the word list for every invocation of autocomplete() — on my machine, 1000 invocations of the binary search takes around 60 ms for most search terms, while 1000 invocations of autocomplete() as it is written would take over 30 seconds: 500 times slower! Just delete it, or make it an alias of binary_search() if you have to.




          For your unit tests, I think your tests for the above code has been cut off, either by the site or during cutting and pasting. I only see the tests for the Trie data type. I'm not sure why you're testing it, or why you haven't used it in your main code, but the tests themselves look fine.






          share|improve this answer





















          • i don't think I miss any unit testings of my file. you can find my file here at github
            – NinjaG
            Jul 4 at 3:19










          • @NinjaG, I'm afraid that I am unable to find any public repository of gist containing what appears to be your autocomplete linked to the GitHub username in your profile (Jeffchiucp). Nor can I see any code in the class AutocompleteTest, except the line def test_insert_word(): with nothing in the function. I think one more thing I forgot to mention is that, depending on use case, it might be a good idea to add docstrings to your functions. Also, binary search might return instead of None. I'll edit it in only when there's something else significant to add though.
            – Alpha3031
            Jul 4 at 6:59










          Your Answer




          StackExchange.ifUsing("editor", function ()
          return StackExchange.using("mathjaxEditing", function ()
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          );
          );
          , "mathjax-editing");

          StackExchange.ifUsing("editor", function ()
          StackExchange.using("externalEditor", function ()
          StackExchange.using("snippets", function ()
          StackExchange.snippets.init();
          );
          );
          , "code-snippets");

          StackExchange.ready(function()
          var channelOptions =
          tags: "".split(" "),
          id: "196"
          ;
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function()
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled)
          StackExchange.using("snippets", function()
          createEditor();
          );

          else
          createEditor();

          );

          function createEditor()
          StackExchange.prepareEditor(
          heartbeatType: 'answer',
          convertImagesToLinks: false,
          noModals: false,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          );



          );








           

          draft saved


          draft discarded


















          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f197389%2fautocomplete-implementation-in-python%23new-answer', 'question_page');

          );

          Post as a guest






























          1 Answer
          1






          active

          oldest

          votes








          1 Answer
          1






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes








          up vote
          5
          down vote



          accepted
          +25










          You don't seem to have used the random module, so that import can be deleted. In your first function get_words(), you have put a print(words_list) immediately after an unconditional return, which is unreachable. Unless this is for debugging, it is best to either put it before the return statement or delete it altogether.




          binarysearch is composed of two words, and should be separated with an underscore for consistency. The conditional for the first if statement is a bit redundant, since whenever one index is None, the other must also be the same. If you do want to check both, you should probably use an or rather than and, since neither is allowed to be None.



          The final elif of the function is again unreachable, this time because one of the previous comparisons (==, <, >) will always be true. This implementation means your binary search will never terminate correctly if the search term is not present in the dictionary — instead, check if the search should terminate at the beginning of the function.



          You can also do the slicing and lowercasing of your variables immediately instead of repeating for each if statement.



          def binary_search(list_, item, left=None, right=None):
          if left is None or right is None:
          left = 0
          right = len(list_) - 1
          elif left > right:
          return None
          item = item.lower()
          middle = (right + left) // 2
          sliced_middle_item = list_[middle][:len(item)].lower()
          if sliced_middle_item == item:
          return find_all_words(list_, item, middle)
          if sliced_middle_item < item:
          return binary_search(list_, item, middle + 1, right)
          if sliced_middle_item > item:
          return binary_search(list_, item, left, middle - 1)


          Since this is a single branch problem and Python does not optimise tail calls, it may be more efficent to impement your binary search iteratively using a loop instead of recursively.



          def binary_search(list_, item):
          left = 0
          right = len(list_) - 1
          item = item.lower()
          while True:
          if left > right:
          return None
          middle = (left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          elif sliced_middle_item > item:
          right = middle - 1
          elif sliced_middle_item == item:
          return find_all_words(list_, item, middle)



          In find_all_words, you have initialized right_match to False, which means the second loop will never run, and you will not find any values to the right of the first match. Also, using a flag variable that controls the loop is really no different from using a break statement, except you won't need to put the second half of your loop in an else block.



          Instead of building your list one at a time using .append(), it is probably more efficent to find the first match and last match and then using the indcies to slice the word list. As a bonus this will also maintain the lexical ordering of the list. This will also mean you can use a binary search to find the first and last matches instead of a linear search. It would be best to continue from the previous search rather than starting from scratch, taking left and right as parameters as well.



          def binary_find_all(list_, item, left, middle, right):
          half_right = middle
          half_left = middle
          while left < half_right:
          middle = (left + half_right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          else:
          half_right = middle
          while half_left < right:
          middle = (half_left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item == item:
          half_left = middle + 1
          else:
          right = middle
          return list_[left:right]



          It is really unnessary and inefficent to reopen the file and reset the word list for every invocation of autocomplete() — on my machine, 1000 invocations of the binary search takes around 60 ms for most search terms, while 1000 invocations of autocomplete() as it is written would take over 30 seconds: 500 times slower! Just delete it, or make it an alias of binary_search() if you have to.




          For your unit tests, I think your tests for the above code has been cut off, either by the site or during cutting and pasting. I only see the tests for the Trie data type. I'm not sure why you're testing it, or why you haven't used it in your main code, but the tests themselves look fine.






          share|improve this answer





















          • i don't think I miss any unit testings of my file. you can find my file here at github
            – NinjaG
            Jul 4 at 3:19










          • @NinjaG, I'm afraid that I am unable to find any public repository of gist containing what appears to be your autocomplete linked to the GitHub username in your profile (Jeffchiucp). Nor can I see any code in the class AutocompleteTest, except the line def test_insert_word(): with nothing in the function. I think one more thing I forgot to mention is that, depending on use case, it might be a good idea to add docstrings to your functions. Also, binary search might return instead of None. I'll edit it in only when there's something else significant to add though.
            – Alpha3031
            Jul 4 at 6:59














          up vote
          5
          down vote



          accepted
          +25










          You don't seem to have used the random module, so that import can be deleted. In your first function get_words(), you have put a print(words_list) immediately after an unconditional return, which is unreachable. Unless this is for debugging, it is best to either put it before the return statement or delete it altogether.




          binarysearch is composed of two words, and should be separated with an underscore for consistency. The conditional for the first if statement is a bit redundant, since whenever one index is None, the other must also be the same. If you do want to check both, you should probably use an or rather than and, since neither is allowed to be None.



          The final elif of the function is again unreachable, this time because one of the previous comparisons (==, <, >) will always be true. This implementation means your binary search will never terminate correctly if the search term is not present in the dictionary — instead, check if the search should terminate at the beginning of the function.



          You can also do the slicing and lowercasing of your variables immediately instead of repeating for each if statement.



          def binary_search(list_, item, left=None, right=None):
          if left is None or right is None:
          left = 0
          right = len(list_) - 1
          elif left > right:
          return None
          item = item.lower()
          middle = (right + left) // 2
          sliced_middle_item = list_[middle][:len(item)].lower()
          if sliced_middle_item == item:
          return find_all_words(list_, item, middle)
          if sliced_middle_item < item:
          return binary_search(list_, item, middle + 1, right)
          if sliced_middle_item > item:
          return binary_search(list_, item, left, middle - 1)


          Since this is a single branch problem and Python does not optimise tail calls, it may be more efficent to impement your binary search iteratively using a loop instead of recursively.



          def binary_search(list_, item):
          left = 0
          right = len(list_) - 1
          item = item.lower()
          while True:
          if left > right:
          return None
          middle = (left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          elif sliced_middle_item > item:
          right = middle - 1
          elif sliced_middle_item == item:
          return find_all_words(list_, item, middle)



          In find_all_words, you have initialized right_match to False, which means the second loop will never run, and you will not find any values to the right of the first match. Also, using a flag variable that controls the loop is really no different from using a break statement, except you won't need to put the second half of your loop in an else block.



          Instead of building your list one at a time using .append(), it is probably more efficent to find the first match and last match and then using the indcies to slice the word list. As a bonus this will also maintain the lexical ordering of the list. This will also mean you can use a binary search to find the first and last matches instead of a linear search. It would be best to continue from the previous search rather than starting from scratch, taking left and right as parameters as well.



          def binary_find_all(list_, item, left, middle, right):
          half_right = middle
          half_left = middle
          while left < half_right:
          middle = (left + half_right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          else:
          half_right = middle
          while half_left < right:
          middle = (half_left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item == item:
          half_left = middle + 1
          else:
          right = middle
          return list_[left:right]



          It is really unnessary and inefficent to reopen the file and reset the word list for every invocation of autocomplete() — on my machine, 1000 invocations of the binary search takes around 60 ms for most search terms, while 1000 invocations of autocomplete() as it is written would take over 30 seconds: 500 times slower! Just delete it, or make it an alias of binary_search() if you have to.




          For your unit tests, I think your tests for the above code has been cut off, either by the site or during cutting and pasting. I only see the tests for the Trie data type. I'm not sure why you're testing it, or why you haven't used it in your main code, but the tests themselves look fine.






          share|improve this answer





















          • i don't think I miss any unit testings of my file. you can find my file here at github
            – NinjaG
            Jul 4 at 3:19










          • @NinjaG, I'm afraid that I am unable to find any public repository of gist containing what appears to be your autocomplete linked to the GitHub username in your profile (Jeffchiucp). Nor can I see any code in the class AutocompleteTest, except the line def test_insert_word(): with nothing in the function. I think one more thing I forgot to mention is that, depending on use case, it might be a good idea to add docstrings to your functions. Also, binary search might return instead of None. I'll edit it in only when there's something else significant to add though.
            – Alpha3031
            Jul 4 at 6:59












          up vote
          5
          down vote



          accepted
          +25







          up vote
          5
          down vote



          accepted
          +25




          +25




          You don't seem to have used the random module, so that import can be deleted. In your first function get_words(), you have put a print(words_list) immediately after an unconditional return, which is unreachable. Unless this is for debugging, it is best to either put it before the return statement or delete it altogether.




          binarysearch is composed of two words, and should be separated with an underscore for consistency. The conditional for the first if statement is a bit redundant, since whenever one index is None, the other must also be the same. If you do want to check both, you should probably use an or rather than and, since neither is allowed to be None.



          The final elif of the function is again unreachable, this time because one of the previous comparisons (==, <, >) will always be true. This implementation means your binary search will never terminate correctly if the search term is not present in the dictionary — instead, check if the search should terminate at the beginning of the function.



          You can also do the slicing and lowercasing of your variables immediately instead of repeating for each if statement.



          def binary_search(list_, item, left=None, right=None):
          if left is None or right is None:
          left = 0
          right = len(list_) - 1
          elif left > right:
          return None
          item = item.lower()
          middle = (right + left) // 2
          sliced_middle_item = list_[middle][:len(item)].lower()
          if sliced_middle_item == item:
          return find_all_words(list_, item, middle)
          if sliced_middle_item < item:
          return binary_search(list_, item, middle + 1, right)
          if sliced_middle_item > item:
          return binary_search(list_, item, left, middle - 1)


          Since this is a single branch problem and Python does not optimise tail calls, it may be more efficent to impement your binary search iteratively using a loop instead of recursively.



          def binary_search(list_, item):
          left = 0
          right = len(list_) - 1
          item = item.lower()
          while True:
          if left > right:
          return None
          middle = (left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          elif sliced_middle_item > item:
          right = middle - 1
          elif sliced_middle_item == item:
          return find_all_words(list_, item, middle)



          In find_all_words, you have initialized right_match to False, which means the second loop will never run, and you will not find any values to the right of the first match. Also, using a flag variable that controls the loop is really no different from using a break statement, except you won't need to put the second half of your loop in an else block.



          Instead of building your list one at a time using .append(), it is probably more efficent to find the first match and last match and then using the indcies to slice the word list. As a bonus this will also maintain the lexical ordering of the list. This will also mean you can use a binary search to find the first and last matches instead of a linear search. It would be best to continue from the previous search rather than starting from scratch, taking left and right as parameters as well.



          def binary_find_all(list_, item, left, middle, right):
          half_right = middle
          half_left = middle
          while left < half_right:
          middle = (left + half_right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          else:
          half_right = middle
          while half_left < right:
          middle = (half_left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item == item:
          half_left = middle + 1
          else:
          right = middle
          return list_[left:right]



          It is really unnessary and inefficent to reopen the file and reset the word list for every invocation of autocomplete() — on my machine, 1000 invocations of the binary search takes around 60 ms for most search terms, while 1000 invocations of autocomplete() as it is written would take over 30 seconds: 500 times slower! Just delete it, or make it an alias of binary_search() if you have to.




          For your unit tests, I think your tests for the above code has been cut off, either by the site or during cutting and pasting. I only see the tests for the Trie data type. I'm not sure why you're testing it, or why you haven't used it in your main code, but the tests themselves look fine.






          share|improve this answer













          You don't seem to have used the random module, so that import can be deleted. In your first function get_words(), you have put a print(words_list) immediately after an unconditional return, which is unreachable. Unless this is for debugging, it is best to either put it before the return statement or delete it altogether.




          binarysearch is composed of two words, and should be separated with an underscore for consistency. The conditional for the first if statement is a bit redundant, since whenever one index is None, the other must also be the same. If you do want to check both, you should probably use an or rather than and, since neither is allowed to be None.



          The final elif of the function is again unreachable, this time because one of the previous comparisons (==, <, >) will always be true. This implementation means your binary search will never terminate correctly if the search term is not present in the dictionary — instead, check if the search should terminate at the beginning of the function.



          You can also do the slicing and lowercasing of your variables immediately instead of repeating for each if statement.



          def binary_search(list_, item, left=None, right=None):
          if left is None or right is None:
          left = 0
          right = len(list_) - 1
          elif left > right:
          return None
          item = item.lower()
          middle = (right + left) // 2
          sliced_middle_item = list_[middle][:len(item)].lower()
          if sliced_middle_item == item:
          return find_all_words(list_, item, middle)
          if sliced_middle_item < item:
          return binary_search(list_, item, middle + 1, right)
          if sliced_middle_item > item:
          return binary_search(list_, item, left, middle - 1)


          Since this is a single branch problem and Python does not optimise tail calls, it may be more efficent to impement your binary search iteratively using a loop instead of recursively.



          def binary_search(list_, item):
          left = 0
          right = len(list_) - 1
          item = item.lower()
          while True:
          if left > right:
          return None
          middle = (left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          elif sliced_middle_item > item:
          right = middle - 1
          elif sliced_middle_item == item:
          return find_all_words(list_, item, middle)



          In find_all_words, you have initialized right_match to False, which means the second loop will never run, and you will not find any values to the right of the first match. Also, using a flag variable that controls the loop is really no different from using a break statement, except you won't need to put the second half of your loop in an else block.



          Instead of building your list one at a time using .append(), it is probably more efficent to find the first match and last match and then using the indcies to slice the word list. As a bonus this will also maintain the lexical ordering of the list. This will also mean you can use a binary search to find the first and last matches instead of a linear search. It would be best to continue from the previous search rather than starting from scratch, taking left and right as parameters as well.



          def binary_find_all(list_, item, left, middle, right):
          half_right = middle
          half_left = middle
          while left < half_right:
          middle = (left + half_right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item < item:
          left = middle + 1
          else:
          half_right = middle
          while half_left < right:
          middle = (half_left + right) // 2
          sliced_middle_item = list_[middle][:len(item)]
          if sliced_middle_item == item:
          half_left = middle + 1
          else:
          right = middle
          return list_[left:right]



          It is really unnessary and inefficent to reopen the file and reset the word list for every invocation of autocomplete() — on my machine, 1000 invocations of the binary search takes around 60 ms for most search terms, while 1000 invocations of autocomplete() as it is written would take over 30 seconds: 500 times slower! Just delete it, or make it an alias of binary_search() if you have to.




          For your unit tests, I think your tests for the above code has been cut off, either by the site or during cutting and pasting. I only see the tests for the Trie data type. I'm not sure why you're testing it, or why you haven't used it in your main code, but the tests themselves look fine.







          share|improve this answer













          share|improve this answer



          share|improve this answer











          answered Jul 3 at 13:09









          Alpha3031

          21819




          21819











          • i don't think I miss any unit testings of my file. you can find my file here at github
            – NinjaG
            Jul 4 at 3:19










          • @NinjaG, I'm afraid that I am unable to find any public repository of gist containing what appears to be your autocomplete linked to the GitHub username in your profile (Jeffchiucp). Nor can I see any code in the class AutocompleteTest, except the line def test_insert_word(): with nothing in the function. I think one more thing I forgot to mention is that, depending on use case, it might be a good idea to add docstrings to your functions. Also, binary search might return instead of None. I'll edit it in only when there's something else significant to add though.
            – Alpha3031
            Jul 4 at 6:59
















          • i don't think I miss any unit testings of my file. you can find my file here at github
            – NinjaG
            Jul 4 at 3:19










          • @NinjaG, I'm afraid that I am unable to find any public repository of gist containing what appears to be your autocomplete linked to the GitHub username in your profile (Jeffchiucp). Nor can I see any code in the class AutocompleteTest, except the line def test_insert_word(): with nothing in the function. I think one more thing I forgot to mention is that, depending on use case, it might be a good idea to add docstrings to your functions. Also, binary search might return instead of None. I'll edit it in only when there's something else significant to add though.
            – Alpha3031
            Jul 4 at 6:59















          i don't think I miss any unit testings of my file. you can find my file here at github
          – NinjaG
          Jul 4 at 3:19




          i don't think I miss any unit testings of my file. you can find my file here at github
          – NinjaG
          Jul 4 at 3:19












          @NinjaG, I'm afraid that I am unable to find any public repository of gist containing what appears to be your autocomplete linked to the GitHub username in your profile (Jeffchiucp). Nor can I see any code in the class AutocompleteTest, except the line def test_insert_word(): with nothing in the function. I think one more thing I forgot to mention is that, depending on use case, it might be a good idea to add docstrings to your functions. Also, binary search might return instead of None. I'll edit it in only when there's something else significant to add though.
          – Alpha3031
          Jul 4 at 6:59




          @NinjaG, I'm afraid that I am unable to find any public repository of gist containing what appears to be your autocomplete linked to the GitHub username in your profile (Jeffchiucp). Nor can I see any code in the class AutocompleteTest, except the line def test_insert_word(): with nothing in the function. I think one more thing I forgot to mention is that, depending on use case, it might be a good idea to add docstrings to your functions. Also, binary search might return instead of None. I'll edit it in only when there's something else significant to add though.
          – Alpha3031
          Jul 4 at 6:59












           

          draft saved


          draft discarded


























           


          draft saved


          draft discarded














          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f197389%2fautocomplete-implementation-in-python%23new-answer', 'question_page');

          );

          Post as a guest













































































          Popular posts from this blog

          Python Lists

          Aion

          JavaScript Array Iteration Methods