Expression calculator in Python

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
4
down vote

favorite












This calculator accepts four operators: +, -, *, /, and works with parentheses.
Feedback on how I could make it easier to read and or more effective would be much appreciated.



This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.



from sys import exit


#Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
def is_number(item):
try:
float(item)
return True
except ValueError:
return False


def set_up_list():
#First part gets string and deletes whitespace
astring = raw_input("Calculation: ")
astring = astring.replace(" ", "")
#Next it will check if there are any unsupported characters in the string
for item in astring:
if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
print ("Unsupported Character: " + item)
exit()
#Then it creates the list and adds each individual character to the list
a_list =
for item in astring:
a_list.append(item)
count = 0
#Finally it combines individual numbers into actual numbers based on user input
while count < len(a_list) - 1:
if is_number(a_list[count]) and a_list[count + 1] == "(":
print ("Program does not accept parentheses directly after number, must have operator in between.")
exit()
if is_number(a_list[count]) and is_number(a_list[count + 1]):
a_list[count] += a_list[count + 1]
del a_list[count + 1]
elif is_number(a_list[count]) and a_list[count + 1] == ".":
try:
x = a_list[count+2]
except IndexError:
print ("Your formatting is off somehow.")
exit()
if is_number(a_list[count + 2]):
a_list[count] += a_list[count + 1] + a_list[count + 2]
del a_list[count + 2]
del a_list[count + 1]
else:
count += 1
return a_list


def perform_operation(n1, operand, n2):
if operand == "+":
return str(float(n1) + float(n2))
elif operand == "-":
return str(float(n1) - float(n2))
elif operand == "*":
return str(float(n1) * float(n2))
elif operand == "/":
try:
n = str(float(n1) / float(n2))
return n
except ZeroDivisionError:
print ("You tried to divide by 0.")
print ("Just for that I am going to terminate myself")
exit()


expression = set_up_list()
emergency_count = 0
#Makes code shorter, short for parentheses
P = ["(", ")"]
#If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
while len(expression) != 1:
#If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
#Will check to see if the first parentheses exists first so that it does not throw an index error
count = 0
while count < len(expression) - 1:
if expression[count] == "(":
if expression[count + 2] == ")":
del expression[count + 2]
del expression[count]
count += 1
#After that is done, it will multiply and divide what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#Then it will add and subtact what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
emergency_count += 1
if emergency_count >= 1000:
print ("Operation was too long or was bugged")
exit()

print (float(expression[0]))






share|improve this question



























    up vote
    4
    down vote

    favorite












    This calculator accepts four operators: +, -, *, /, and works with parentheses.
    Feedback on how I could make it easier to read and or more effective would be much appreciated.



    This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.



    from sys import exit


    #Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
    def is_number(item):
    try:
    float(item)
    return True
    except ValueError:
    return False


    def set_up_list():
    #First part gets string and deletes whitespace
    astring = raw_input("Calculation: ")
    astring = astring.replace(" ", "")
    #Next it will check if there are any unsupported characters in the string
    for item in astring:
    if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
    print ("Unsupported Character: " + item)
    exit()
    #Then it creates the list and adds each individual character to the list
    a_list =
    for item in astring:
    a_list.append(item)
    count = 0
    #Finally it combines individual numbers into actual numbers based on user input
    while count < len(a_list) - 1:
    if is_number(a_list[count]) and a_list[count + 1] == "(":
    print ("Program does not accept parentheses directly after number, must have operator in between.")
    exit()
    if is_number(a_list[count]) and is_number(a_list[count + 1]):
    a_list[count] += a_list[count + 1]
    del a_list[count + 1]
    elif is_number(a_list[count]) and a_list[count + 1] == ".":
    try:
    x = a_list[count+2]
    except IndexError:
    print ("Your formatting is off somehow.")
    exit()
    if is_number(a_list[count + 2]):
    a_list[count] += a_list[count + 1] + a_list[count + 2]
    del a_list[count + 2]
    del a_list[count + 1]
    else:
    count += 1
    return a_list


    def perform_operation(n1, operand, n2):
    if operand == "+":
    return str(float(n1) + float(n2))
    elif operand == "-":
    return str(float(n1) - float(n2))
    elif operand == "*":
    return str(float(n1) * float(n2))
    elif operand == "/":
    try:
    n = str(float(n1) / float(n2))
    return n
    except ZeroDivisionError:
    print ("You tried to divide by 0.")
    print ("Just for that I am going to terminate myself")
    exit()


    expression = set_up_list()
    emergency_count = 0
    #Makes code shorter, short for parentheses
    P = ["(", ")"]
    #If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
    while len(expression) != 1:
    #If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
    #Will check to see if the first parentheses exists first so that it does not throw an index error
    count = 0
    while count < len(expression) - 1:
    if expression[count] == "(":
    if expression[count + 2] == ")":
    del expression[count + 2]
    del expression[count]
    count += 1
    #After that is done, it will multiply and divide what it can
    count = 0
    while count < len(expression) - 1:
    if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
    expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
    del expression[count + 1]
    del expression[count]
    count += 1
    #Then it will add and subtact what it can
    count = 0
    while count < len(expression) - 1:
    if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
    expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
    del expression[count + 1]
    del expression[count]
    count += 1
    #The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
    emergency_count += 1
    if emergency_count >= 1000:
    print ("Operation was too long or was bugged")
    exit()

    print (float(expression[0]))






    share|improve this question























      up vote
      4
      down vote

      favorite









      up vote
      4
      down vote

      favorite











      This calculator accepts four operators: +, -, *, /, and works with parentheses.
      Feedback on how I could make it easier to read and or more effective would be much appreciated.



      This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.



      from sys import exit


      #Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
      def is_number(item):
      try:
      float(item)
      return True
      except ValueError:
      return False


      def set_up_list():
      #First part gets string and deletes whitespace
      astring = raw_input("Calculation: ")
      astring = astring.replace(" ", "")
      #Next it will check if there are any unsupported characters in the string
      for item in astring:
      if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
      print ("Unsupported Character: " + item)
      exit()
      #Then it creates the list and adds each individual character to the list
      a_list =
      for item in astring:
      a_list.append(item)
      count = 0
      #Finally it combines individual numbers into actual numbers based on user input
      while count < len(a_list) - 1:
      if is_number(a_list[count]) and a_list[count + 1] == "(":
      print ("Program does not accept parentheses directly after number, must have operator in between.")
      exit()
      if is_number(a_list[count]) and is_number(a_list[count + 1]):
      a_list[count] += a_list[count + 1]
      del a_list[count + 1]
      elif is_number(a_list[count]) and a_list[count + 1] == ".":
      try:
      x = a_list[count+2]
      except IndexError:
      print ("Your formatting is off somehow.")
      exit()
      if is_number(a_list[count + 2]):
      a_list[count] += a_list[count + 1] + a_list[count + 2]
      del a_list[count + 2]
      del a_list[count + 1]
      else:
      count += 1
      return a_list


      def perform_operation(n1, operand, n2):
      if operand == "+":
      return str(float(n1) + float(n2))
      elif operand == "-":
      return str(float(n1) - float(n2))
      elif operand == "*":
      return str(float(n1) * float(n2))
      elif operand == "/":
      try:
      n = str(float(n1) / float(n2))
      return n
      except ZeroDivisionError:
      print ("You tried to divide by 0.")
      print ("Just for that I am going to terminate myself")
      exit()


      expression = set_up_list()
      emergency_count = 0
      #Makes code shorter, short for parentheses
      P = ["(", ")"]
      #If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
      while len(expression) != 1:
      #If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
      #Will check to see if the first parentheses exists first so that it does not throw an index error
      count = 0
      while count < len(expression) - 1:
      if expression[count] == "(":
      if expression[count + 2] == ")":
      del expression[count + 2]
      del expression[count]
      count += 1
      #After that is done, it will multiply and divide what it can
      count = 0
      while count < len(expression) - 1:
      if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
      expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
      del expression[count + 1]
      del expression[count]
      count += 1
      #Then it will add and subtact what it can
      count = 0
      while count < len(expression) - 1:
      if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
      expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
      del expression[count + 1]
      del expression[count]
      count += 1
      #The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
      emergency_count += 1
      if emergency_count >= 1000:
      print ("Operation was too long or was bugged")
      exit()

      print (float(expression[0]))






      share|improve this question













      This calculator accepts four operators: +, -, *, /, and works with parentheses.
      Feedback on how I could make it easier to read and or more effective would be much appreciated.



      This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.



      from sys import exit


      #Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
      def is_number(item):
      try:
      float(item)
      return True
      except ValueError:
      return False


      def set_up_list():
      #First part gets string and deletes whitespace
      astring = raw_input("Calculation: ")
      astring = astring.replace(" ", "")
      #Next it will check if there are any unsupported characters in the string
      for item in astring:
      if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
      print ("Unsupported Character: " + item)
      exit()
      #Then it creates the list and adds each individual character to the list
      a_list =
      for item in astring:
      a_list.append(item)
      count = 0
      #Finally it combines individual numbers into actual numbers based on user input
      while count < len(a_list) - 1:
      if is_number(a_list[count]) and a_list[count + 1] == "(":
      print ("Program does not accept parentheses directly after number, must have operator in between.")
      exit()
      if is_number(a_list[count]) and is_number(a_list[count + 1]):
      a_list[count] += a_list[count + 1]
      del a_list[count + 1]
      elif is_number(a_list[count]) and a_list[count + 1] == ".":
      try:
      x = a_list[count+2]
      except IndexError:
      print ("Your formatting is off somehow.")
      exit()
      if is_number(a_list[count + 2]):
      a_list[count] += a_list[count + 1] + a_list[count + 2]
      del a_list[count + 2]
      del a_list[count + 1]
      else:
      count += 1
      return a_list


      def perform_operation(n1, operand, n2):
      if operand == "+":
      return str(float(n1) + float(n2))
      elif operand == "-":
      return str(float(n1) - float(n2))
      elif operand == "*":
      return str(float(n1) * float(n2))
      elif operand == "/":
      try:
      n = str(float(n1) / float(n2))
      return n
      except ZeroDivisionError:
      print ("You tried to divide by 0.")
      print ("Just for that I am going to terminate myself")
      exit()


      expression = set_up_list()
      emergency_count = 0
      #Makes code shorter, short for parentheses
      P = ["(", ")"]
      #If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
      while len(expression) != 1:
      #If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
      #Will check to see if the first parentheses exists first so that it does not throw an index error
      count = 0
      while count < len(expression) - 1:
      if expression[count] == "(":
      if expression[count + 2] == ")":
      del expression[count + 2]
      del expression[count]
      count += 1
      #After that is done, it will multiply and divide what it can
      count = 0
      while count < len(expression) - 1:
      if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
      expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
      del expression[count + 1]
      del expression[count]
      count += 1
      #Then it will add and subtact what it can
      count = 0
      while count < len(expression) - 1:
      if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
      expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
      del expression[count + 1]
      del expression[count]
      count += 1
      #The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
      emergency_count += 1
      if emergency_count >= 1000:
      print ("Operation was too long or was bugged")
      exit()

      print (float(expression[0]))








      share|improve this question












      share|improve this question




      share|improve this question








      edited Mar 22 at 2:03









      Jamal♦

      30.1k11114225




      30.1k11114225









      asked Mar 21 at 6:52









      Chuck Bartowski

      213




      213




















          2 Answers
          2






          active

          oldest

          votes

















          up vote
          3
          down vote













          Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)



          • Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.

            • Comments need one space after the #

            • There's some required spaces between phrases you're missing

            • No space after print!

            • Wrap lines to 79 columns


          • The comment before is_number should be a """Doc comment."""


          • set_up_list isn't a good name. I had to read the code and see it used in context to understand what it did. read_expression might be better.

          • You have a lot of use of while with some sort of counter (ie. while count < len(expression) - 1:). In python, we almost exclusively spell this as for e in expression:. If you need the index, then for i, e in enumerate(expression).

          • We typically use 'single quoted strings'. "Double quotes" are usually reserved for format strings (ex. "1 + 1 is ".format(1 + 1)).

          • Your use of in is nice. But, did you know instead of that big long list you can just do item not in '01234567890+-*/.()'? Strings are behave like lists in this way.

          • Do the same thing for P = ['(', ')']. P is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But, x in '()' is nearly as short and gets the point across much better.

          • There is no need for your creates the list and adds each individual character to the list (no need for a_list, just use astring and give it a better name!). Strings are sequences so you can do len(astring), astring[23], etc.

          • You have print()s and exit()s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function like calculate(), which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extending Exception for things like UnclosedParenError or UnexpectedCharacterError) in error conditions. This means you can test it like so:

          from unittest import main, TestCase
          from yourcalculator import calculate


          class TestCalculator(TestCase):
          def test_basic_sums(self):
          self.assertEqual(42, calculate('40 + 2'))


          if __name__ == '__main__':
          main()


          • Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a calculate function. You should use a main() function for this and use the standard if __name__ == '__main__': main():

          def main():
          try:
          while True:
          try:
          print(calculate(input('Enter an expression: ')))
          except CalculatorError as e:
          print(e)
          except KeyboardInterrupt:
          break


          if __name__ == '__main__':
          main()


          • You probably want better errors than Your formatting is off somehow

          • In perform_operation, you use float() (presumably on numbers that you've already called is_number on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things like if foo_wont_raise(bar): foo(bar)

          • I see a lot of del. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list)

          • There's really no need for emergency_count. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised)

          • I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html

          With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).



          You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).



          Intrinsically, we have two different concerns here:



          1. We want to be able to handle lots of arbitrary spacing (ie. 1+1 is the same as 1 + 1 is the same as 1 + 1) and ensure that there are no invalid characters (or bad numbers like 123.456.789)

          2. Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it

          In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:



          • Scanner: remove excess spacing and ensure all characters are valid c in '01234567890+-*/.()' and from each valid character produce a "Token" (ex. in the expression 2 * 20 + 2 the tokens are 2, '*', 20, '+', 2)

          • Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)

          Let's tackle the scanner first. It has three main things it needs to do:



          1. Ignore whitespace (although this has it's limits, we probably don't want 123 456 to become 123456--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that 123 and 456)

          2. Turn symbols into single tokens (ex c in '+-/*()')

          3. Parse numbers (may be multiple characters, can be ill formatted as in 123.456.789)

          Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)



          The scanner could look something like this:



          class Scanner(BaseParser):
          """Scanner scans an input string for calculator tokens and yields them.

          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']
          """

          def scan(self):
          """Yields all tokens in the input."""
          while not self._done():
          # Ignore any whitespace that may be next
          self._consume_whitespace()

          # Emit any symbol tokens that may be next
          yield from self._take(lambda char: char in '+-*/()')

          # Emit any number token that may be next
          yield from self._take_number()

          def _consume_whitespace(self):
          """_take()s whitespace characters, but does not yield them."""
          # Note we need the list here to force evaluation of the generator
          list(self._take(lambda char: char.isspace()))

          def _take_number(self):
          """Yields a single number if there is one next in the input."""

          # Gather up the digits/. forming the next number in the input
          number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))

          # If number is empty, we didn't scan digits, so don't try to float it
          if number:
          try:
          yield float(number)
          except ValueError:
          raise BadNumberError(number)


          Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan(). It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.



          In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:



          • The need for self._done() (we'd optimally like one of the functions we use to handle this for us, but unfortunately their StopIterations are consumed when we yield from them)

          • The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a _check_for_invalid_chars() that does something like self._take(lambda c: c not in '01234567890.+-/*()') and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.

          Now, hopefully you were able to figure out what _take() does (it's provided by BaseParser). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):



          class BaseParser:
          """A base class containing utilities useful for a Parser."""

          def __init__(self, items):
          self._items = PeekableIterator(items)

          def _take(self, predicate):
          """
          Yields a contiguous group of items from the items being parsed for
          which the predicate returns True.

          >>> p = BaseParser([2, 4, 3])
          >>> list(p._take(lambda x: x % 2 == 0))
          [2, 4]
          """
          while predicate(self._items.peek()):
          yield next(self._items)

          def _done(self):
          """Returns True if the underlying items have been fully consumed."""
          try:
          self._items.peek()
          return False
          except StopIteration:
          return True


          class PeekableIterator:
          """An iterator that supports 1-lookahead (peek)."""

          def __init__(self, iterable):
          self._iterator = iter(iterable)

          # NOTE: We use None here to denote that we haven't peeked yet. This
          # doesn't work if None can occur in the iterable (so this
          # doesn't generalize!), but for our purposes here it's fine!
          self._next_item = None

          def peek(self):
          """
          Return the next item that will be returned by the iterator without
          advancing the iterator. Raises StopIteration if the iterator is done.

          >>> i = PeekableIterator([1, 2, 3])
          >>> i.peek()
          1
          >>> i.peek()
          1
          >>> next(i)
          1
          >>> next(i)
          2
          >>> i.peek()
          3
          """
          if self._next_item is None:
          self._next_item = next(self._iterator)

          return self._next_item

          def __next__(self):
          if self._next_item is not None:
          next_item = self._next_item
          self._next_item = None
          return next_item

          return next(self._iterator)

          def __iter__(self):
          return self


          I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.



          Now we have a working scanner that we can use like so:



          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']


          See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.



          Now, let's talk about the parser. The reason why we are going to reuse BaseParser is because, in effect, Scanner is a parser that takes an iterable (specifically a string—an iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single float—or it will raise an exception).



          Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:



          Expression = Term ("+" .
          Term = Factor ("*" Factor} .
          Factor = number | "(" Expression ")" .


          Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-") means we need either a + or a - next. The indicate that whatever is inside them can occur zero or more times. So, an Expression could be just a Term, but it could also be a Term "+" Term "-" Term "+" Term.



          If you try some examples like 1 * (2 - 3) + 4, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression and Factor group things so order of operations works (the deepest in the nesting are the * and /, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression by evaluating all its Terms and then adding/subtracting the results as appropriate, evaluate a Term by evaluating all of its Factors and then multiplying/dividing the results as appropriate, and evaluate a Factor by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html



          With these in mind, let's write a Parser capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.



          import operator


          class Parser(BaseParser):
          """Parser for tokenized calculator inputs."""

          def parse(self):
          """Parse calculator input and return the result of evaluating it.

          >>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
          5
          """
          return self._parse_expression()

          def _parse_expression(self):
          """Parse an Expression and return the result of evaluating it.

          >>> Parser([1, '+', 2])._parse_expression()
          3
          """

          # Parse the first (required) Term
          terms = [self._parse_term()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '+-'
          terms += flatten((op, self._parse_term()) for op in self._take(op))

          return evaluate(terms)

          def _parse_term(self):
          """Parse a Term and return the result of evaluating it.

          >>> Parser([1, '*', 2])._parse_term()
          2
          """

          # Parse the first (required) Factor
          factors = [self._parse_factor()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '*/'
          factors += flatten((op, self._parse_factor()) for op in self._take(op))

          return evaluate(factors)

          def _parse_factor(self):
          """Parse a Factor and return the result of evaluating it.

          >>> Parser([1])._parse_factor()
          1

          >>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
          7
          """

          # NOTE: Here's where Python gets a little cumbersome. This isn't really
          # a for, we're just using it to handle the case where it doesn't
          # find a number (gracefully skip). If it finds one, we return the
          # number.
          for n in self._take(lambda t: isinstance(t, float)):
          return n

          # If we failed to parse a number, then try to find a '('
          for _ in self._take(lambda t: t == '('):
          # If we found a '(', parse the subexpression
          value = self._parse_expression()
          # Make sure the subexpression is followed by a ')'
          self._expect(')')

          # Both parsing the number and subexpresion failed
          raise self._unexpected('number', '(')

          def _expect(self, char):
          """Expect a certain character, or raise if it is not next."""
          for _ in self._take(lambda t: t == char):
          return

          raise self._unexpected(char)

          def _unexpected(self, *expected):
          """Create an exception for an unexpected character."""
          try:
          return UnexpectedCharacterError(self._items.peek(), expected)
          except StopIteration:
          return UnexpectedEndError(expected)


          def evaluate(items):
          """
          Evaluate a list of floats separated by operators (at the same level of
          precedence). Returns the result.

          >>> evaluate([3, '*', 4, '/', 2])
          6
          """

          assert items, 'cannot evaluate empty list'
          # x, x + x, x + x + x, etc. all have an odd number of tokens
          assert len(items) % 2 == 1, 'list must be of odd length'

          while len(items) > 1:
          items[-3:] = [_evaluate_binary(*items[-3:])]

          return items[0]


          def _evaluate_binary(lhs, op, rhs):
          """Evalutates a single binary operation op where lhs and rhs are floats."""
          ops = '+': operator.add,
          '-': operator.sub,
          '*': operator.mul,
          '/': operator.truediv

          return ops[op](lhs, rhs)


          There's a lot to unpack here! First, take a look at parse. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression we parse a term , collect any number of ("+"|"-") Term that follow it, and evaluate out that math. We do something similar for Term. For Factor we either try to parse a number (just a float) or a "(" (subexpression) and if we can't find that we raise an error.



          Notice how the actual work of doing the math in done in evaluate and _evaluate_binary. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate intentionally cannot handle [1, '+', 2, '*', 3]. The parser handles this. It would first parse a Term and call evaluate([2, '*', 3]). This would be returned and then it could finish parsing the Expression and call evaluate([1, '+', 6]).



          For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.



          This code needs the following helper:



          def flatten(iterable):
          """Flattens a nested iterable by one nesting layer.

          >>> flatten([[1,2], [3]])
          [1, 2, 3]
          """
          return [x for l in iterable for x in l]


          Now all that's left is to wire the scanner and parser together as we discussed before:



          def calculate(expression):
          """Evaluates a mathematical expression and returns the result.

          >>> calculate('3 * (1 + 6 / 3)')
          9
          """
          return Parser(Scanner(expression).scan()).parse()


          And we're done!



          Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:



          • Support hexadecimal numbers (you only need to modify the scanner!)

          • Support mathematical constants like pi and e (you only need to modify the scanner!)

          • Support leading negative signs (you only need to modify the parser!)

          • Support new operations like % (modulo)

          Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f






          share|improve this answer























          • Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
            – Chuck Bartowski
            Mar 26 at 15:52

















          up vote
          2
          down vote













          Heres some quick suggestions...




          edit: as pointed out, isdigit() only works on integers


          Your is_number(item) function could be the built-in isdigit():



          if a_list[count].isdigit() and ...


          You could chain together method calls, doesn't make it less readable imo:



          astring = raw_input("Calculation: ").replace(" ", "")


          The first two loops you do could be condensed into one:



          # Next it will add only supported characters to the list
          a_list =
          for item in astring:
          if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
          print ("Unsupported Character: " + item)
          exit()
          a_list.append(item)



          Also, try to use better variable names than astring and a_list.






          share|improve this answer



















          • 1




            Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
            – Chuck Bartowski
            Mar 22 at 16:55







          • 1




            Nice call! Overlooked that one.
            – Ullauri
            Mar 22 at 17:37










          Your Answer




          StackExchange.ifUsing("editor", function ()
          return StackExchange.using("mathjaxEditing", function ()
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          );
          );
          , "mathjax-editing");

          StackExchange.ifUsing("editor", function ()
          StackExchange.using("externalEditor", function ()
          StackExchange.using("snippets", function ()
          StackExchange.snippets.init();
          );
          );
          , "code-snippets");

          StackExchange.ready(function()
          var channelOptions =
          tags: "".split(" "),
          id: "196"
          ;
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function()
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled)
          StackExchange.using("snippets", function()
          createEditor();
          );

          else
          createEditor();

          );

          function createEditor()
          StackExchange.prepareEditor(
          heartbeatType: 'answer',
          convertImagesToLinks: false,
          noModals: false,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          );



          );








           

          draft saved


          draft discarded


















          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f190101%2fexpression-calculator-in-python%23new-answer', 'question_page');

          );

          Post as a guest






























          2 Answers
          2






          active

          oldest

          votes








          2 Answers
          2






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes








          up vote
          3
          down vote













          Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)



          • Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.

            • Comments need one space after the #

            • There's some required spaces between phrases you're missing

            • No space after print!

            • Wrap lines to 79 columns


          • The comment before is_number should be a """Doc comment."""


          • set_up_list isn't a good name. I had to read the code and see it used in context to understand what it did. read_expression might be better.

          • You have a lot of use of while with some sort of counter (ie. while count < len(expression) - 1:). In python, we almost exclusively spell this as for e in expression:. If you need the index, then for i, e in enumerate(expression).

          • We typically use 'single quoted strings'. "Double quotes" are usually reserved for format strings (ex. "1 + 1 is ".format(1 + 1)).

          • Your use of in is nice. But, did you know instead of that big long list you can just do item not in '01234567890+-*/.()'? Strings are behave like lists in this way.

          • Do the same thing for P = ['(', ')']. P is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But, x in '()' is nearly as short and gets the point across much better.

          • There is no need for your creates the list and adds each individual character to the list (no need for a_list, just use astring and give it a better name!). Strings are sequences so you can do len(astring), astring[23], etc.

          • You have print()s and exit()s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function like calculate(), which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extending Exception for things like UnclosedParenError or UnexpectedCharacterError) in error conditions. This means you can test it like so:

          from unittest import main, TestCase
          from yourcalculator import calculate


          class TestCalculator(TestCase):
          def test_basic_sums(self):
          self.assertEqual(42, calculate('40 + 2'))


          if __name__ == '__main__':
          main()


          • Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a calculate function. You should use a main() function for this and use the standard if __name__ == '__main__': main():

          def main():
          try:
          while True:
          try:
          print(calculate(input('Enter an expression: ')))
          except CalculatorError as e:
          print(e)
          except KeyboardInterrupt:
          break


          if __name__ == '__main__':
          main()


          • You probably want better errors than Your formatting is off somehow

          • In perform_operation, you use float() (presumably on numbers that you've already called is_number on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things like if foo_wont_raise(bar): foo(bar)

          • I see a lot of del. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list)

          • There's really no need for emergency_count. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised)

          • I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html

          With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).



          You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).



          Intrinsically, we have two different concerns here:



          1. We want to be able to handle lots of arbitrary spacing (ie. 1+1 is the same as 1 + 1 is the same as 1 + 1) and ensure that there are no invalid characters (or bad numbers like 123.456.789)

          2. Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it

          In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:



          • Scanner: remove excess spacing and ensure all characters are valid c in '01234567890+-*/.()' and from each valid character produce a "Token" (ex. in the expression 2 * 20 + 2 the tokens are 2, '*', 20, '+', 2)

          • Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)

          Let's tackle the scanner first. It has three main things it needs to do:



          1. Ignore whitespace (although this has it's limits, we probably don't want 123 456 to become 123456--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that 123 and 456)

          2. Turn symbols into single tokens (ex c in '+-/*()')

          3. Parse numbers (may be multiple characters, can be ill formatted as in 123.456.789)

          Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)



          The scanner could look something like this:



          class Scanner(BaseParser):
          """Scanner scans an input string for calculator tokens and yields them.

          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']
          """

          def scan(self):
          """Yields all tokens in the input."""
          while not self._done():
          # Ignore any whitespace that may be next
          self._consume_whitespace()

          # Emit any symbol tokens that may be next
          yield from self._take(lambda char: char in '+-*/()')

          # Emit any number token that may be next
          yield from self._take_number()

          def _consume_whitespace(self):
          """_take()s whitespace characters, but does not yield them."""
          # Note we need the list here to force evaluation of the generator
          list(self._take(lambda char: char.isspace()))

          def _take_number(self):
          """Yields a single number if there is one next in the input."""

          # Gather up the digits/. forming the next number in the input
          number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))

          # If number is empty, we didn't scan digits, so don't try to float it
          if number:
          try:
          yield float(number)
          except ValueError:
          raise BadNumberError(number)


          Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan(). It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.



          In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:



          • The need for self._done() (we'd optimally like one of the functions we use to handle this for us, but unfortunately their StopIterations are consumed when we yield from them)

          • The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a _check_for_invalid_chars() that does something like self._take(lambda c: c not in '01234567890.+-/*()') and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.

          Now, hopefully you were able to figure out what _take() does (it's provided by BaseParser). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):



          class BaseParser:
          """A base class containing utilities useful for a Parser."""

          def __init__(self, items):
          self._items = PeekableIterator(items)

          def _take(self, predicate):
          """
          Yields a contiguous group of items from the items being parsed for
          which the predicate returns True.

          >>> p = BaseParser([2, 4, 3])
          >>> list(p._take(lambda x: x % 2 == 0))
          [2, 4]
          """
          while predicate(self._items.peek()):
          yield next(self._items)

          def _done(self):
          """Returns True if the underlying items have been fully consumed."""
          try:
          self._items.peek()
          return False
          except StopIteration:
          return True


          class PeekableIterator:
          """An iterator that supports 1-lookahead (peek)."""

          def __init__(self, iterable):
          self._iterator = iter(iterable)

          # NOTE: We use None here to denote that we haven't peeked yet. This
          # doesn't work if None can occur in the iterable (so this
          # doesn't generalize!), but for our purposes here it's fine!
          self._next_item = None

          def peek(self):
          """
          Return the next item that will be returned by the iterator without
          advancing the iterator. Raises StopIteration if the iterator is done.

          >>> i = PeekableIterator([1, 2, 3])
          >>> i.peek()
          1
          >>> i.peek()
          1
          >>> next(i)
          1
          >>> next(i)
          2
          >>> i.peek()
          3
          """
          if self._next_item is None:
          self._next_item = next(self._iterator)

          return self._next_item

          def __next__(self):
          if self._next_item is not None:
          next_item = self._next_item
          self._next_item = None
          return next_item

          return next(self._iterator)

          def __iter__(self):
          return self


          I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.



          Now we have a working scanner that we can use like so:



          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']


          See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.



          Now, let's talk about the parser. The reason why we are going to reuse BaseParser is because, in effect, Scanner is a parser that takes an iterable (specifically a string—an iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single float—or it will raise an exception).



          Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:



          Expression = Term ("+" .
          Term = Factor ("*" Factor} .
          Factor = number | "(" Expression ")" .


          Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-") means we need either a + or a - next. The indicate that whatever is inside them can occur zero or more times. So, an Expression could be just a Term, but it could also be a Term "+" Term "-" Term "+" Term.



          If you try some examples like 1 * (2 - 3) + 4, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression and Factor group things so order of operations works (the deepest in the nesting are the * and /, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression by evaluating all its Terms and then adding/subtracting the results as appropriate, evaluate a Term by evaluating all of its Factors and then multiplying/dividing the results as appropriate, and evaluate a Factor by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html



          With these in mind, let's write a Parser capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.



          import operator


          class Parser(BaseParser):
          """Parser for tokenized calculator inputs."""

          def parse(self):
          """Parse calculator input and return the result of evaluating it.

          >>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
          5
          """
          return self._parse_expression()

          def _parse_expression(self):
          """Parse an Expression and return the result of evaluating it.

          >>> Parser([1, '+', 2])._parse_expression()
          3
          """

          # Parse the first (required) Term
          terms = [self._parse_term()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '+-'
          terms += flatten((op, self._parse_term()) for op in self._take(op))

          return evaluate(terms)

          def _parse_term(self):
          """Parse a Term and return the result of evaluating it.

          >>> Parser([1, '*', 2])._parse_term()
          2
          """

          # Parse the first (required) Factor
          factors = [self._parse_factor()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '*/'
          factors += flatten((op, self._parse_factor()) for op in self._take(op))

          return evaluate(factors)

          def _parse_factor(self):
          """Parse a Factor and return the result of evaluating it.

          >>> Parser([1])._parse_factor()
          1

          >>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
          7
          """

          # NOTE: Here's where Python gets a little cumbersome. This isn't really
          # a for, we're just using it to handle the case where it doesn't
          # find a number (gracefully skip). If it finds one, we return the
          # number.
          for n in self._take(lambda t: isinstance(t, float)):
          return n

          # If we failed to parse a number, then try to find a '('
          for _ in self._take(lambda t: t == '('):
          # If we found a '(', parse the subexpression
          value = self._parse_expression()
          # Make sure the subexpression is followed by a ')'
          self._expect(')')

          # Both parsing the number and subexpresion failed
          raise self._unexpected('number', '(')

          def _expect(self, char):
          """Expect a certain character, or raise if it is not next."""
          for _ in self._take(lambda t: t == char):
          return

          raise self._unexpected(char)

          def _unexpected(self, *expected):
          """Create an exception for an unexpected character."""
          try:
          return UnexpectedCharacterError(self._items.peek(), expected)
          except StopIteration:
          return UnexpectedEndError(expected)


          def evaluate(items):
          """
          Evaluate a list of floats separated by operators (at the same level of
          precedence). Returns the result.

          >>> evaluate([3, '*', 4, '/', 2])
          6
          """

          assert items, 'cannot evaluate empty list'
          # x, x + x, x + x + x, etc. all have an odd number of tokens
          assert len(items) % 2 == 1, 'list must be of odd length'

          while len(items) > 1:
          items[-3:] = [_evaluate_binary(*items[-3:])]

          return items[0]


          def _evaluate_binary(lhs, op, rhs):
          """Evalutates a single binary operation op where lhs and rhs are floats."""
          ops = '+': operator.add,
          '-': operator.sub,
          '*': operator.mul,
          '/': operator.truediv

          return ops[op](lhs, rhs)


          There's a lot to unpack here! First, take a look at parse. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression we parse a term , collect any number of ("+"|"-") Term that follow it, and evaluate out that math. We do something similar for Term. For Factor we either try to parse a number (just a float) or a "(" (subexpression) and if we can't find that we raise an error.



          Notice how the actual work of doing the math in done in evaluate and _evaluate_binary. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate intentionally cannot handle [1, '+', 2, '*', 3]. The parser handles this. It would first parse a Term and call evaluate([2, '*', 3]). This would be returned and then it could finish parsing the Expression and call evaluate([1, '+', 6]).



          For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.



          This code needs the following helper:



          def flatten(iterable):
          """Flattens a nested iterable by one nesting layer.

          >>> flatten([[1,2], [3]])
          [1, 2, 3]
          """
          return [x for l in iterable for x in l]


          Now all that's left is to wire the scanner and parser together as we discussed before:



          def calculate(expression):
          """Evaluates a mathematical expression and returns the result.

          >>> calculate('3 * (1 + 6 / 3)')
          9
          """
          return Parser(Scanner(expression).scan()).parse()


          And we're done!



          Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:



          • Support hexadecimal numbers (you only need to modify the scanner!)

          • Support mathematical constants like pi and e (you only need to modify the scanner!)

          • Support leading negative signs (you only need to modify the parser!)

          • Support new operations like % (modulo)

          Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f






          share|improve this answer























          • Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
            – Chuck Bartowski
            Mar 26 at 15:52














          up vote
          3
          down vote













          Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)



          • Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.

            • Comments need one space after the #

            • There's some required spaces between phrases you're missing

            • No space after print!

            • Wrap lines to 79 columns


          • The comment before is_number should be a """Doc comment."""


          • set_up_list isn't a good name. I had to read the code and see it used in context to understand what it did. read_expression might be better.

          • You have a lot of use of while with some sort of counter (ie. while count < len(expression) - 1:). In python, we almost exclusively spell this as for e in expression:. If you need the index, then for i, e in enumerate(expression).

          • We typically use 'single quoted strings'. "Double quotes" are usually reserved for format strings (ex. "1 + 1 is ".format(1 + 1)).

          • Your use of in is nice. But, did you know instead of that big long list you can just do item not in '01234567890+-*/.()'? Strings are behave like lists in this way.

          • Do the same thing for P = ['(', ')']. P is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But, x in '()' is nearly as short and gets the point across much better.

          • There is no need for your creates the list and adds each individual character to the list (no need for a_list, just use astring and give it a better name!). Strings are sequences so you can do len(astring), astring[23], etc.

          • You have print()s and exit()s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function like calculate(), which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extending Exception for things like UnclosedParenError or UnexpectedCharacterError) in error conditions. This means you can test it like so:

          from unittest import main, TestCase
          from yourcalculator import calculate


          class TestCalculator(TestCase):
          def test_basic_sums(self):
          self.assertEqual(42, calculate('40 + 2'))


          if __name__ == '__main__':
          main()


          • Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a calculate function. You should use a main() function for this and use the standard if __name__ == '__main__': main():

          def main():
          try:
          while True:
          try:
          print(calculate(input('Enter an expression: ')))
          except CalculatorError as e:
          print(e)
          except KeyboardInterrupt:
          break


          if __name__ == '__main__':
          main()


          • You probably want better errors than Your formatting is off somehow

          • In perform_operation, you use float() (presumably on numbers that you've already called is_number on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things like if foo_wont_raise(bar): foo(bar)

          • I see a lot of del. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list)

          • There's really no need for emergency_count. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised)

          • I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html

          With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).



          You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).



          Intrinsically, we have two different concerns here:



          1. We want to be able to handle lots of arbitrary spacing (ie. 1+1 is the same as 1 + 1 is the same as 1 + 1) and ensure that there are no invalid characters (or bad numbers like 123.456.789)

          2. Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it

          In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:



          • Scanner: remove excess spacing and ensure all characters are valid c in '01234567890+-*/.()' and from each valid character produce a "Token" (ex. in the expression 2 * 20 + 2 the tokens are 2, '*', 20, '+', 2)

          • Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)

          Let's tackle the scanner first. It has three main things it needs to do:



          1. Ignore whitespace (although this has it's limits, we probably don't want 123 456 to become 123456--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that 123 and 456)

          2. Turn symbols into single tokens (ex c in '+-/*()')

          3. Parse numbers (may be multiple characters, can be ill formatted as in 123.456.789)

          Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)



          The scanner could look something like this:



          class Scanner(BaseParser):
          """Scanner scans an input string for calculator tokens and yields them.

          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']
          """

          def scan(self):
          """Yields all tokens in the input."""
          while not self._done():
          # Ignore any whitespace that may be next
          self._consume_whitespace()

          # Emit any symbol tokens that may be next
          yield from self._take(lambda char: char in '+-*/()')

          # Emit any number token that may be next
          yield from self._take_number()

          def _consume_whitespace(self):
          """_take()s whitespace characters, but does not yield them."""
          # Note we need the list here to force evaluation of the generator
          list(self._take(lambda char: char.isspace()))

          def _take_number(self):
          """Yields a single number if there is one next in the input."""

          # Gather up the digits/. forming the next number in the input
          number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))

          # If number is empty, we didn't scan digits, so don't try to float it
          if number:
          try:
          yield float(number)
          except ValueError:
          raise BadNumberError(number)


          Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan(). It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.



          In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:



          • The need for self._done() (we'd optimally like one of the functions we use to handle this for us, but unfortunately their StopIterations are consumed when we yield from them)

          • The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a _check_for_invalid_chars() that does something like self._take(lambda c: c not in '01234567890.+-/*()') and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.

          Now, hopefully you were able to figure out what _take() does (it's provided by BaseParser). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):



          class BaseParser:
          """A base class containing utilities useful for a Parser."""

          def __init__(self, items):
          self._items = PeekableIterator(items)

          def _take(self, predicate):
          """
          Yields a contiguous group of items from the items being parsed for
          which the predicate returns True.

          >>> p = BaseParser([2, 4, 3])
          >>> list(p._take(lambda x: x % 2 == 0))
          [2, 4]
          """
          while predicate(self._items.peek()):
          yield next(self._items)

          def _done(self):
          """Returns True if the underlying items have been fully consumed."""
          try:
          self._items.peek()
          return False
          except StopIteration:
          return True


          class PeekableIterator:
          """An iterator that supports 1-lookahead (peek)."""

          def __init__(self, iterable):
          self._iterator = iter(iterable)

          # NOTE: We use None here to denote that we haven't peeked yet. This
          # doesn't work if None can occur in the iterable (so this
          # doesn't generalize!), but for our purposes here it's fine!
          self._next_item = None

          def peek(self):
          """
          Return the next item that will be returned by the iterator without
          advancing the iterator. Raises StopIteration if the iterator is done.

          >>> i = PeekableIterator([1, 2, 3])
          >>> i.peek()
          1
          >>> i.peek()
          1
          >>> next(i)
          1
          >>> next(i)
          2
          >>> i.peek()
          3
          """
          if self._next_item is None:
          self._next_item = next(self._iterator)

          return self._next_item

          def __next__(self):
          if self._next_item is not None:
          next_item = self._next_item
          self._next_item = None
          return next_item

          return next(self._iterator)

          def __iter__(self):
          return self


          I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.



          Now we have a working scanner that we can use like so:



          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']


          See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.



          Now, let's talk about the parser. The reason why we are going to reuse BaseParser is because, in effect, Scanner is a parser that takes an iterable (specifically a string—an iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single float—or it will raise an exception).



          Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:



          Expression = Term ("+" .
          Term = Factor ("*" Factor} .
          Factor = number | "(" Expression ")" .


          Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-") means we need either a + or a - next. The indicate that whatever is inside them can occur zero or more times. So, an Expression could be just a Term, but it could also be a Term "+" Term "-" Term "+" Term.



          If you try some examples like 1 * (2 - 3) + 4, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression and Factor group things so order of operations works (the deepest in the nesting are the * and /, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression by evaluating all its Terms and then adding/subtracting the results as appropriate, evaluate a Term by evaluating all of its Factors and then multiplying/dividing the results as appropriate, and evaluate a Factor by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html



          With these in mind, let's write a Parser capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.



          import operator


          class Parser(BaseParser):
          """Parser for tokenized calculator inputs."""

          def parse(self):
          """Parse calculator input and return the result of evaluating it.

          >>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
          5
          """
          return self._parse_expression()

          def _parse_expression(self):
          """Parse an Expression and return the result of evaluating it.

          >>> Parser([1, '+', 2])._parse_expression()
          3
          """

          # Parse the first (required) Term
          terms = [self._parse_term()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '+-'
          terms += flatten((op, self._parse_term()) for op in self._take(op))

          return evaluate(terms)

          def _parse_term(self):
          """Parse a Term and return the result of evaluating it.

          >>> Parser([1, '*', 2])._parse_term()
          2
          """

          # Parse the first (required) Factor
          factors = [self._parse_factor()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '*/'
          factors += flatten((op, self._parse_factor()) for op in self._take(op))

          return evaluate(factors)

          def _parse_factor(self):
          """Parse a Factor and return the result of evaluating it.

          >>> Parser([1])._parse_factor()
          1

          >>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
          7
          """

          # NOTE: Here's where Python gets a little cumbersome. This isn't really
          # a for, we're just using it to handle the case where it doesn't
          # find a number (gracefully skip). If it finds one, we return the
          # number.
          for n in self._take(lambda t: isinstance(t, float)):
          return n

          # If we failed to parse a number, then try to find a '('
          for _ in self._take(lambda t: t == '('):
          # If we found a '(', parse the subexpression
          value = self._parse_expression()
          # Make sure the subexpression is followed by a ')'
          self._expect(')')

          # Both parsing the number and subexpresion failed
          raise self._unexpected('number', '(')

          def _expect(self, char):
          """Expect a certain character, or raise if it is not next."""
          for _ in self._take(lambda t: t == char):
          return

          raise self._unexpected(char)

          def _unexpected(self, *expected):
          """Create an exception for an unexpected character."""
          try:
          return UnexpectedCharacterError(self._items.peek(), expected)
          except StopIteration:
          return UnexpectedEndError(expected)


          def evaluate(items):
          """
          Evaluate a list of floats separated by operators (at the same level of
          precedence). Returns the result.

          >>> evaluate([3, '*', 4, '/', 2])
          6
          """

          assert items, 'cannot evaluate empty list'
          # x, x + x, x + x + x, etc. all have an odd number of tokens
          assert len(items) % 2 == 1, 'list must be of odd length'

          while len(items) > 1:
          items[-3:] = [_evaluate_binary(*items[-3:])]

          return items[0]


          def _evaluate_binary(lhs, op, rhs):
          """Evalutates a single binary operation op where lhs and rhs are floats."""
          ops = '+': operator.add,
          '-': operator.sub,
          '*': operator.mul,
          '/': operator.truediv

          return ops[op](lhs, rhs)


          There's a lot to unpack here! First, take a look at parse. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression we parse a term , collect any number of ("+"|"-") Term that follow it, and evaluate out that math. We do something similar for Term. For Factor we either try to parse a number (just a float) or a "(" (subexpression) and if we can't find that we raise an error.



          Notice how the actual work of doing the math in done in evaluate and _evaluate_binary. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate intentionally cannot handle [1, '+', 2, '*', 3]. The parser handles this. It would first parse a Term and call evaluate([2, '*', 3]). This would be returned and then it could finish parsing the Expression and call evaluate([1, '+', 6]).



          For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.



          This code needs the following helper:



          def flatten(iterable):
          """Flattens a nested iterable by one nesting layer.

          >>> flatten([[1,2], [3]])
          [1, 2, 3]
          """
          return [x for l in iterable for x in l]


          Now all that's left is to wire the scanner and parser together as we discussed before:



          def calculate(expression):
          """Evaluates a mathematical expression and returns the result.

          >>> calculate('3 * (1 + 6 / 3)')
          9
          """
          return Parser(Scanner(expression).scan()).parse()


          And we're done!



          Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:



          • Support hexadecimal numbers (you only need to modify the scanner!)

          • Support mathematical constants like pi and e (you only need to modify the scanner!)

          • Support leading negative signs (you only need to modify the parser!)

          • Support new operations like % (modulo)

          Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f






          share|improve this answer























          • Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
            – Chuck Bartowski
            Mar 26 at 15:52












          up vote
          3
          down vote










          up vote
          3
          down vote









          Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)



          • Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.

            • Comments need one space after the #

            • There's some required spaces between phrases you're missing

            • No space after print!

            • Wrap lines to 79 columns


          • The comment before is_number should be a """Doc comment."""


          • set_up_list isn't a good name. I had to read the code and see it used in context to understand what it did. read_expression might be better.

          • You have a lot of use of while with some sort of counter (ie. while count < len(expression) - 1:). In python, we almost exclusively spell this as for e in expression:. If you need the index, then for i, e in enumerate(expression).

          • We typically use 'single quoted strings'. "Double quotes" are usually reserved for format strings (ex. "1 + 1 is ".format(1 + 1)).

          • Your use of in is nice. But, did you know instead of that big long list you can just do item not in '01234567890+-*/.()'? Strings are behave like lists in this way.

          • Do the same thing for P = ['(', ')']. P is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But, x in '()' is nearly as short and gets the point across much better.

          • There is no need for your creates the list and adds each individual character to the list (no need for a_list, just use astring and give it a better name!). Strings are sequences so you can do len(astring), astring[23], etc.

          • You have print()s and exit()s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function like calculate(), which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extending Exception for things like UnclosedParenError or UnexpectedCharacterError) in error conditions. This means you can test it like so:

          from unittest import main, TestCase
          from yourcalculator import calculate


          class TestCalculator(TestCase):
          def test_basic_sums(self):
          self.assertEqual(42, calculate('40 + 2'))


          if __name__ == '__main__':
          main()


          • Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a calculate function. You should use a main() function for this and use the standard if __name__ == '__main__': main():

          def main():
          try:
          while True:
          try:
          print(calculate(input('Enter an expression: ')))
          except CalculatorError as e:
          print(e)
          except KeyboardInterrupt:
          break


          if __name__ == '__main__':
          main()


          • You probably want better errors than Your formatting is off somehow

          • In perform_operation, you use float() (presumably on numbers that you've already called is_number on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things like if foo_wont_raise(bar): foo(bar)

          • I see a lot of del. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list)

          • There's really no need for emergency_count. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised)

          • I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html

          With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).



          You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).



          Intrinsically, we have two different concerns here:



          1. We want to be able to handle lots of arbitrary spacing (ie. 1+1 is the same as 1 + 1 is the same as 1 + 1) and ensure that there are no invalid characters (or bad numbers like 123.456.789)

          2. Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it

          In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:



          • Scanner: remove excess spacing and ensure all characters are valid c in '01234567890+-*/.()' and from each valid character produce a "Token" (ex. in the expression 2 * 20 + 2 the tokens are 2, '*', 20, '+', 2)

          • Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)

          Let's tackle the scanner first. It has three main things it needs to do:



          1. Ignore whitespace (although this has it's limits, we probably don't want 123 456 to become 123456--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that 123 and 456)

          2. Turn symbols into single tokens (ex c in '+-/*()')

          3. Parse numbers (may be multiple characters, can be ill formatted as in 123.456.789)

          Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)



          The scanner could look something like this:



          class Scanner(BaseParser):
          """Scanner scans an input string for calculator tokens and yields them.

          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']
          """

          def scan(self):
          """Yields all tokens in the input."""
          while not self._done():
          # Ignore any whitespace that may be next
          self._consume_whitespace()

          # Emit any symbol tokens that may be next
          yield from self._take(lambda char: char in '+-*/()')

          # Emit any number token that may be next
          yield from self._take_number()

          def _consume_whitespace(self):
          """_take()s whitespace characters, but does not yield them."""
          # Note we need the list here to force evaluation of the generator
          list(self._take(lambda char: char.isspace()))

          def _take_number(self):
          """Yields a single number if there is one next in the input."""

          # Gather up the digits/. forming the next number in the input
          number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))

          # If number is empty, we didn't scan digits, so don't try to float it
          if number:
          try:
          yield float(number)
          except ValueError:
          raise BadNumberError(number)


          Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan(). It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.



          In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:



          • The need for self._done() (we'd optimally like one of the functions we use to handle this for us, but unfortunately their StopIterations are consumed when we yield from them)

          • The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a _check_for_invalid_chars() that does something like self._take(lambda c: c not in '01234567890.+-/*()') and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.

          Now, hopefully you were able to figure out what _take() does (it's provided by BaseParser). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):



          class BaseParser:
          """A base class containing utilities useful for a Parser."""

          def __init__(self, items):
          self._items = PeekableIterator(items)

          def _take(self, predicate):
          """
          Yields a contiguous group of items from the items being parsed for
          which the predicate returns True.

          >>> p = BaseParser([2, 4, 3])
          >>> list(p._take(lambda x: x % 2 == 0))
          [2, 4]
          """
          while predicate(self._items.peek()):
          yield next(self._items)

          def _done(self):
          """Returns True if the underlying items have been fully consumed."""
          try:
          self._items.peek()
          return False
          except StopIteration:
          return True


          class PeekableIterator:
          """An iterator that supports 1-lookahead (peek)."""

          def __init__(self, iterable):
          self._iterator = iter(iterable)

          # NOTE: We use None here to denote that we haven't peeked yet. This
          # doesn't work if None can occur in the iterable (so this
          # doesn't generalize!), but for our purposes here it's fine!
          self._next_item = None

          def peek(self):
          """
          Return the next item that will be returned by the iterator without
          advancing the iterator. Raises StopIteration if the iterator is done.

          >>> i = PeekableIterator([1, 2, 3])
          >>> i.peek()
          1
          >>> i.peek()
          1
          >>> next(i)
          1
          >>> next(i)
          2
          >>> i.peek()
          3
          """
          if self._next_item is None:
          self._next_item = next(self._iterator)

          return self._next_item

          def __next__(self):
          if self._next_item is not None:
          next_item = self._next_item
          self._next_item = None
          return next_item

          return next(self._iterator)

          def __iter__(self):
          return self


          I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.



          Now we have a working scanner that we can use like so:



          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']


          See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.



          Now, let's talk about the parser. The reason why we are going to reuse BaseParser is because, in effect, Scanner is a parser that takes an iterable (specifically a string—an iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single float—or it will raise an exception).



          Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:



          Expression = Term ("+" .
          Term = Factor ("*" Factor} .
          Factor = number | "(" Expression ")" .


          Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-") means we need either a + or a - next. The indicate that whatever is inside them can occur zero or more times. So, an Expression could be just a Term, but it could also be a Term "+" Term "-" Term "+" Term.



          If you try some examples like 1 * (2 - 3) + 4, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression and Factor group things so order of operations works (the deepest in the nesting are the * and /, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression by evaluating all its Terms and then adding/subtracting the results as appropriate, evaluate a Term by evaluating all of its Factors and then multiplying/dividing the results as appropriate, and evaluate a Factor by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html



          With these in mind, let's write a Parser capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.



          import operator


          class Parser(BaseParser):
          """Parser for tokenized calculator inputs."""

          def parse(self):
          """Parse calculator input and return the result of evaluating it.

          >>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
          5
          """
          return self._parse_expression()

          def _parse_expression(self):
          """Parse an Expression and return the result of evaluating it.

          >>> Parser([1, '+', 2])._parse_expression()
          3
          """

          # Parse the first (required) Term
          terms = [self._parse_term()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '+-'
          terms += flatten((op, self._parse_term()) for op in self._take(op))

          return evaluate(terms)

          def _parse_term(self):
          """Parse a Term and return the result of evaluating it.

          >>> Parser([1, '*', 2])._parse_term()
          2
          """

          # Parse the first (required) Factor
          factors = [self._parse_factor()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '*/'
          factors += flatten((op, self._parse_factor()) for op in self._take(op))

          return evaluate(factors)

          def _parse_factor(self):
          """Parse a Factor and return the result of evaluating it.

          >>> Parser([1])._parse_factor()
          1

          >>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
          7
          """

          # NOTE: Here's where Python gets a little cumbersome. This isn't really
          # a for, we're just using it to handle the case where it doesn't
          # find a number (gracefully skip). If it finds one, we return the
          # number.
          for n in self._take(lambda t: isinstance(t, float)):
          return n

          # If we failed to parse a number, then try to find a '('
          for _ in self._take(lambda t: t == '('):
          # If we found a '(', parse the subexpression
          value = self._parse_expression()
          # Make sure the subexpression is followed by a ')'
          self._expect(')')

          # Both parsing the number and subexpresion failed
          raise self._unexpected('number', '(')

          def _expect(self, char):
          """Expect a certain character, or raise if it is not next."""
          for _ in self._take(lambda t: t == char):
          return

          raise self._unexpected(char)

          def _unexpected(self, *expected):
          """Create an exception for an unexpected character."""
          try:
          return UnexpectedCharacterError(self._items.peek(), expected)
          except StopIteration:
          return UnexpectedEndError(expected)


          def evaluate(items):
          """
          Evaluate a list of floats separated by operators (at the same level of
          precedence). Returns the result.

          >>> evaluate([3, '*', 4, '/', 2])
          6
          """

          assert items, 'cannot evaluate empty list'
          # x, x + x, x + x + x, etc. all have an odd number of tokens
          assert len(items) % 2 == 1, 'list must be of odd length'

          while len(items) > 1:
          items[-3:] = [_evaluate_binary(*items[-3:])]

          return items[0]


          def _evaluate_binary(lhs, op, rhs):
          """Evalutates a single binary operation op where lhs and rhs are floats."""
          ops = '+': operator.add,
          '-': operator.sub,
          '*': operator.mul,
          '/': operator.truediv

          return ops[op](lhs, rhs)


          There's a lot to unpack here! First, take a look at parse. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression we parse a term , collect any number of ("+"|"-") Term that follow it, and evaluate out that math. We do something similar for Term. For Factor we either try to parse a number (just a float) or a "(" (subexpression) and if we can't find that we raise an error.



          Notice how the actual work of doing the math in done in evaluate and _evaluate_binary. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate intentionally cannot handle [1, '+', 2, '*', 3]. The parser handles this. It would first parse a Term and call evaluate([2, '*', 3]). This would be returned and then it could finish parsing the Expression and call evaluate([1, '+', 6]).



          For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.



          This code needs the following helper:



          def flatten(iterable):
          """Flattens a nested iterable by one nesting layer.

          >>> flatten([[1,2], [3]])
          [1, 2, 3]
          """
          return [x for l in iterable for x in l]


          Now all that's left is to wire the scanner and parser together as we discussed before:



          def calculate(expression):
          """Evaluates a mathematical expression and returns the result.

          >>> calculate('3 * (1 + 6 / 3)')
          9
          """
          return Parser(Scanner(expression).scan()).parse()


          And we're done!



          Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:



          • Support hexadecimal numbers (you only need to modify the scanner!)

          • Support mathematical constants like pi and e (you only need to modify the scanner!)

          • Support leading negative signs (you only need to modify the parser!)

          • Support new operations like % (modulo)

          Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f






          share|improve this answer















          Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)



          • Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.

            • Comments need one space after the #

            • There's some required spaces between phrases you're missing

            • No space after print!

            • Wrap lines to 79 columns


          • The comment before is_number should be a """Doc comment."""


          • set_up_list isn't a good name. I had to read the code and see it used in context to understand what it did. read_expression might be better.

          • You have a lot of use of while with some sort of counter (ie. while count < len(expression) - 1:). In python, we almost exclusively spell this as for e in expression:. If you need the index, then for i, e in enumerate(expression).

          • We typically use 'single quoted strings'. "Double quotes" are usually reserved for format strings (ex. "1 + 1 is ".format(1 + 1)).

          • Your use of in is nice. But, did you know instead of that big long list you can just do item not in '01234567890+-*/.()'? Strings are behave like lists in this way.

          • Do the same thing for P = ['(', ')']. P is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But, x in '()' is nearly as short and gets the point across much better.

          • There is no need for your creates the list and adds each individual character to the list (no need for a_list, just use astring and give it a better name!). Strings are sequences so you can do len(astring), astring[23], etc.

          • You have print()s and exit()s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function like calculate(), which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extending Exception for things like UnclosedParenError or UnexpectedCharacterError) in error conditions. This means you can test it like so:

          from unittest import main, TestCase
          from yourcalculator import calculate


          class TestCalculator(TestCase):
          def test_basic_sums(self):
          self.assertEqual(42, calculate('40 + 2'))


          if __name__ == '__main__':
          main()


          • Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a calculate function. You should use a main() function for this and use the standard if __name__ == '__main__': main():

          def main():
          try:
          while True:
          try:
          print(calculate(input('Enter an expression: ')))
          except CalculatorError as e:
          print(e)
          except KeyboardInterrupt:
          break


          if __name__ == '__main__':
          main()


          • You probably want better errors than Your formatting is off somehow

          • In perform_operation, you use float() (presumably on numbers that you've already called is_number on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things like if foo_wont_raise(bar): foo(bar)

          • I see a lot of del. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list)

          • There's really no need for emergency_count. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised)

          • I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html

          With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).



          You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).



          Intrinsically, we have two different concerns here:



          1. We want to be able to handle lots of arbitrary spacing (ie. 1+1 is the same as 1 + 1 is the same as 1 + 1) and ensure that there are no invalid characters (or bad numbers like 123.456.789)

          2. Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it

          In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:



          • Scanner: remove excess spacing and ensure all characters are valid c in '01234567890+-*/.()' and from each valid character produce a "Token" (ex. in the expression 2 * 20 + 2 the tokens are 2, '*', 20, '+', 2)

          • Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)

          Let's tackle the scanner first. It has three main things it needs to do:



          1. Ignore whitespace (although this has it's limits, we probably don't want 123 456 to become 123456--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that 123 and 456)

          2. Turn symbols into single tokens (ex c in '+-/*()')

          3. Parse numbers (may be multiple characters, can be ill formatted as in 123.456.789)

          Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)



          The scanner could look something like this:



          class Scanner(BaseParser):
          """Scanner scans an input string for calculator tokens and yields them.

          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']
          """

          def scan(self):
          """Yields all tokens in the input."""
          while not self._done():
          # Ignore any whitespace that may be next
          self._consume_whitespace()

          # Emit any symbol tokens that may be next
          yield from self._take(lambda char: char in '+-*/()')

          # Emit any number token that may be next
          yield from self._take_number()

          def _consume_whitespace(self):
          """_take()s whitespace characters, but does not yield them."""
          # Note we need the list here to force evaluation of the generator
          list(self._take(lambda char: char.isspace()))

          def _take_number(self):
          """Yields a single number if there is one next in the input."""

          # Gather up the digits/. forming the next number in the input
          number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))

          # If number is empty, we didn't scan digits, so don't try to float it
          if number:
          try:
          yield float(number)
          except ValueError:
          raise BadNumberError(number)


          Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan(). It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.



          In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:



          • The need for self._done() (we'd optimally like one of the functions we use to handle this for us, but unfortunately their StopIterations are consumed when we yield from them)

          • The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a _check_for_invalid_chars() that does something like self._take(lambda c: c not in '01234567890.+-/*()') and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.

          Now, hopefully you were able to figure out what _take() does (it's provided by BaseParser). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):



          class BaseParser:
          """A base class containing utilities useful for a Parser."""

          def __init__(self, items):
          self._items = PeekableIterator(items)

          def _take(self, predicate):
          """
          Yields a contiguous group of items from the items being parsed for
          which the predicate returns True.

          >>> p = BaseParser([2, 4, 3])
          >>> list(p._take(lambda x: x % 2 == 0))
          [2, 4]
          """
          while predicate(self._items.peek()):
          yield next(self._items)

          def _done(self):
          """Returns True if the underlying items have been fully consumed."""
          try:
          self._items.peek()
          return False
          except StopIteration:
          return True


          class PeekableIterator:
          """An iterator that supports 1-lookahead (peek)."""

          def __init__(self, iterable):
          self._iterator = iter(iterable)

          # NOTE: We use None here to denote that we haven't peeked yet. This
          # doesn't work if None can occur in the iterable (so this
          # doesn't generalize!), but for our purposes here it's fine!
          self._next_item = None

          def peek(self):
          """
          Return the next item that will be returned by the iterator without
          advancing the iterator. Raises StopIteration if the iterator is done.

          >>> i = PeekableIterator([1, 2, 3])
          >>> i.peek()
          1
          >>> i.peek()
          1
          >>> next(i)
          1
          >>> next(i)
          2
          >>> i.peek()
          3
          """
          if self._next_item is None:
          self._next_item = next(self._iterator)

          return self._next_item

          def __next__(self):
          if self._next_item is not None:
          next_item = self._next_item
          self._next_item = None
          return next_item

          return next(self._iterator)

          def __iter__(self):
          return self


          I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.



          Now we have a working scanner that we can use like so:



          >>> list(Scanner('11 * (2 + 3)').scan())
          [11, '(', 2, '+', 3, ')']


          See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.



          Now, let's talk about the parser. The reason why we are going to reuse BaseParser is because, in effect, Scanner is a parser that takes an iterable (specifically a string—an iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single float—or it will raise an exception).



          Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:



          Expression = Term ("+" .
          Term = Factor ("*" Factor} .
          Factor = number | "(" Expression ")" .


          Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-") means we need either a + or a - next. The indicate that whatever is inside them can occur zero or more times. So, an Expression could be just a Term, but it could also be a Term "+" Term "-" Term "+" Term.



          If you try some examples like 1 * (2 - 3) + 4, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression and Factor group things so order of operations works (the deepest in the nesting are the * and /, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression by evaluating all its Terms and then adding/subtracting the results as appropriate, evaluate a Term by evaluating all of its Factors and then multiplying/dividing the results as appropriate, and evaluate a Factor by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html



          With these in mind, let's write a Parser capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.



          import operator


          class Parser(BaseParser):
          """Parser for tokenized calculator inputs."""

          def parse(self):
          """Parse calculator input and return the result of evaluating it.

          >>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
          5
          """
          return self._parse_expression()

          def _parse_expression(self):
          """Parse an Expression and return the result of evaluating it.

          >>> Parser([1, '+', 2])._parse_expression()
          3
          """

          # Parse the first (required) Term
          terms = [self._parse_term()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '+-'
          terms += flatten((op, self._parse_term()) for op in self._take(op))

          return evaluate(terms)

          def _parse_term(self):
          """Parse a Term and return the result of evaluating it.

          >>> Parser([1, '*', 2])._parse_term()
          2
          """

          # Parse the first (required) Factor
          factors = [self._parse_factor()]

          # Parse any following: ("*"|"/") Factor
          op = lambda t: t in '*/'
          factors += flatten((op, self._parse_factor()) for op in self._take(op))

          return evaluate(factors)

          def _parse_factor(self):
          """Parse a Factor and return the result of evaluating it.

          >>> Parser([1])._parse_factor()
          1

          >>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
          7
          """

          # NOTE: Here's where Python gets a little cumbersome. This isn't really
          # a for, we're just using it to handle the case where it doesn't
          # find a number (gracefully skip). If it finds one, we return the
          # number.
          for n in self._take(lambda t: isinstance(t, float)):
          return n

          # If we failed to parse a number, then try to find a '('
          for _ in self._take(lambda t: t == '('):
          # If we found a '(', parse the subexpression
          value = self._parse_expression()
          # Make sure the subexpression is followed by a ')'
          self._expect(')')

          # Both parsing the number and subexpresion failed
          raise self._unexpected('number', '(')

          def _expect(self, char):
          """Expect a certain character, or raise if it is not next."""
          for _ in self._take(lambda t: t == char):
          return

          raise self._unexpected(char)

          def _unexpected(self, *expected):
          """Create an exception for an unexpected character."""
          try:
          return UnexpectedCharacterError(self._items.peek(), expected)
          except StopIteration:
          return UnexpectedEndError(expected)


          def evaluate(items):
          """
          Evaluate a list of floats separated by operators (at the same level of
          precedence). Returns the result.

          >>> evaluate([3, '*', 4, '/', 2])
          6
          """

          assert items, 'cannot evaluate empty list'
          # x, x + x, x + x + x, etc. all have an odd number of tokens
          assert len(items) % 2 == 1, 'list must be of odd length'

          while len(items) > 1:
          items[-3:] = [_evaluate_binary(*items[-3:])]

          return items[0]


          def _evaluate_binary(lhs, op, rhs):
          """Evalutates a single binary operation op where lhs and rhs are floats."""
          ops = '+': operator.add,
          '-': operator.sub,
          '*': operator.mul,
          '/': operator.truediv

          return ops[op](lhs, rhs)


          There's a lot to unpack here! First, take a look at parse. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression we parse a term , collect any number of ("+"|"-") Term that follow it, and evaluate out that math. We do something similar for Term. For Factor we either try to parse a number (just a float) or a "(" (subexpression) and if we can't find that we raise an error.



          Notice how the actual work of doing the math in done in evaluate and _evaluate_binary. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate intentionally cannot handle [1, '+', 2, '*', 3]. The parser handles this. It would first parse a Term and call evaluate([2, '*', 3]). This would be returned and then it could finish parsing the Expression and call evaluate([1, '+', 6]).



          For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.



          This code needs the following helper:



          def flatten(iterable):
          """Flattens a nested iterable by one nesting layer.

          >>> flatten([[1,2], [3]])
          [1, 2, 3]
          """
          return [x for l in iterable for x in l]


          Now all that's left is to wire the scanner and parser together as we discussed before:



          def calculate(expression):
          """Evaluates a mathematical expression and returns the result.

          >>> calculate('3 * (1 + 6 / 3)')
          9
          """
          return Parser(Scanner(expression).scan()).parse()


          And we're done!



          Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:



          • Support hexadecimal numbers (you only need to modify the scanner!)

          • Support mathematical constants like pi and e (you only need to modify the scanner!)

          • Support leading negative signs (you only need to modify the parser!)

          • Support new operations like % (modulo)

          Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f







          share|improve this answer















          share|improve this answer



          share|improve this answer








          edited Mar 24 at 15:01


























          answered Mar 24 at 14:41









          Bailey Parker

          1,161710




          1,161710











          • Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
            – Chuck Bartowski
            Mar 26 at 15:52
















          • Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
            – Chuck Bartowski
            Mar 26 at 15:52















          Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
          – Chuck Bartowski
          Mar 26 at 15:52




          Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
          – Chuck Bartowski
          Mar 26 at 15:52












          up vote
          2
          down vote













          Heres some quick suggestions...




          edit: as pointed out, isdigit() only works on integers


          Your is_number(item) function could be the built-in isdigit():



          if a_list[count].isdigit() and ...


          You could chain together method calls, doesn't make it less readable imo:



          astring = raw_input("Calculation: ").replace(" ", "")


          The first two loops you do could be condensed into one:



          # Next it will add only supported characters to the list
          a_list =
          for item in astring:
          if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
          print ("Unsupported Character: " + item)
          exit()
          a_list.append(item)



          Also, try to use better variable names than astring and a_list.






          share|improve this answer



















          • 1




            Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
            – Chuck Bartowski
            Mar 22 at 16:55







          • 1




            Nice call! Overlooked that one.
            – Ullauri
            Mar 22 at 17:37














          up vote
          2
          down vote













          Heres some quick suggestions...




          edit: as pointed out, isdigit() only works on integers


          Your is_number(item) function could be the built-in isdigit():



          if a_list[count].isdigit() and ...


          You could chain together method calls, doesn't make it less readable imo:



          astring = raw_input("Calculation: ").replace(" ", "")


          The first two loops you do could be condensed into one:



          # Next it will add only supported characters to the list
          a_list =
          for item in astring:
          if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
          print ("Unsupported Character: " + item)
          exit()
          a_list.append(item)



          Also, try to use better variable names than astring and a_list.






          share|improve this answer



















          • 1




            Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
            – Chuck Bartowski
            Mar 22 at 16:55







          • 1




            Nice call! Overlooked that one.
            – Ullauri
            Mar 22 at 17:37












          up vote
          2
          down vote










          up vote
          2
          down vote









          Heres some quick suggestions...




          edit: as pointed out, isdigit() only works on integers


          Your is_number(item) function could be the built-in isdigit():



          if a_list[count].isdigit() and ...


          You could chain together method calls, doesn't make it less readable imo:



          astring = raw_input("Calculation: ").replace(" ", "")


          The first two loops you do could be condensed into one:



          # Next it will add only supported characters to the list
          a_list =
          for item in astring:
          if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
          print ("Unsupported Character: " + item)
          exit()
          a_list.append(item)



          Also, try to use better variable names than astring and a_list.






          share|improve this answer















          Heres some quick suggestions...




          edit: as pointed out, isdigit() only works on integers


          Your is_number(item) function could be the built-in isdigit():



          if a_list[count].isdigit() and ...


          You could chain together method calls, doesn't make it less readable imo:



          astring = raw_input("Calculation: ").replace(" ", "")


          The first two loops you do could be condensed into one:



          # Next it will add only supported characters to the list
          a_list =
          for item in astring:
          if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
          print ("Unsupported Character: " + item)
          exit()
          a_list.append(item)



          Also, try to use better variable names than astring and a_list.







          share|improve this answer















          share|improve this answer



          share|improve this answer








          edited Mar 22 at 17:41


























          answered Mar 22 at 1:24









          Ullauri

          463




          463







          • 1




            Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
            – Chuck Bartowski
            Mar 22 at 16:55







          • 1




            Nice call! Overlooked that one.
            – Ullauri
            Mar 22 at 17:37












          • 1




            Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
            – Chuck Bartowski
            Mar 22 at 16:55







          • 1




            Nice call! Overlooked that one.
            – Ullauri
            Mar 22 at 17:37







          1




          1




          Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
          – Chuck Bartowski
          Mar 22 at 16:55





          Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
          – Chuck Bartowski
          Mar 22 at 16:55





          1




          1




          Nice call! Overlooked that one.
          – Ullauri
          Mar 22 at 17:37




          Nice call! Overlooked that one.
          – Ullauri
          Mar 22 at 17:37












           

          draft saved


          draft discarded


























           


          draft saved


          draft discarded














          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f190101%2fexpression-calculator-in-python%23new-answer', 'question_page');

          );

          Post as a guest













































































          Popular posts from this blog

          Chat program with C++ and SFML

          Function to Return a JSON Like Objects Using VBA Collections and Arrays

          Will my employers contract hold up in court?