Expression calculator in Python
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
4
down vote
favorite
This calculator accepts four operators: +, -, *, /, and works with parentheses.
Feedback on how I could make it easier to read and or more effective would be much appreciated.
This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.
from sys import exit
#Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
def is_number(item):
try:
float(item)
return True
except ValueError:
return False
def set_up_list():
#First part gets string and deletes whitespace
astring = raw_input("Calculation: ")
astring = astring.replace(" ", "")
#Next it will check if there are any unsupported characters in the string
for item in astring:
if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
print ("Unsupported Character: " + item)
exit()
#Then it creates the list and adds each individual character to the list
a_list =
for item in astring:
a_list.append(item)
count = 0
#Finally it combines individual numbers into actual numbers based on user input
while count < len(a_list) - 1:
if is_number(a_list[count]) and a_list[count + 1] == "(":
print ("Program does not accept parentheses directly after number, must have operator in between.")
exit()
if is_number(a_list[count]) and is_number(a_list[count + 1]):
a_list[count] += a_list[count + 1]
del a_list[count + 1]
elif is_number(a_list[count]) and a_list[count + 1] == ".":
try:
x = a_list[count+2]
except IndexError:
print ("Your formatting is off somehow.")
exit()
if is_number(a_list[count + 2]):
a_list[count] += a_list[count + 1] + a_list[count + 2]
del a_list[count + 2]
del a_list[count + 1]
else:
count += 1
return a_list
def perform_operation(n1, operand, n2):
if operand == "+":
return str(float(n1) + float(n2))
elif operand == "-":
return str(float(n1) - float(n2))
elif operand == "*":
return str(float(n1) * float(n2))
elif operand == "/":
try:
n = str(float(n1) / float(n2))
return n
except ZeroDivisionError:
print ("You tried to divide by 0.")
print ("Just for that I am going to terminate myself")
exit()
expression = set_up_list()
emergency_count = 0
#Makes code shorter, short for parentheses
P = ["(", ")"]
#If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
while len(expression) != 1:
#If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
#Will check to see if the first parentheses exists first so that it does not throw an index error
count = 0
while count < len(expression) - 1:
if expression[count] == "(":
if expression[count + 2] == ")":
del expression[count + 2]
del expression[count]
count += 1
#After that is done, it will multiply and divide what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#Then it will add and subtact what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
emergency_count += 1
if emergency_count >= 1000:
print ("Operation was too long or was bugged")
exit()
print (float(expression[0]))
python beginner python-3.x math-expression-eval
add a comment |Â
up vote
4
down vote
favorite
This calculator accepts four operators: +, -, *, /, and works with parentheses.
Feedback on how I could make it easier to read and or more effective would be much appreciated.
This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.
from sys import exit
#Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
def is_number(item):
try:
float(item)
return True
except ValueError:
return False
def set_up_list():
#First part gets string and deletes whitespace
astring = raw_input("Calculation: ")
astring = astring.replace(" ", "")
#Next it will check if there are any unsupported characters in the string
for item in astring:
if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
print ("Unsupported Character: " + item)
exit()
#Then it creates the list and adds each individual character to the list
a_list =
for item in astring:
a_list.append(item)
count = 0
#Finally it combines individual numbers into actual numbers based on user input
while count < len(a_list) - 1:
if is_number(a_list[count]) and a_list[count + 1] == "(":
print ("Program does not accept parentheses directly after number, must have operator in between.")
exit()
if is_number(a_list[count]) and is_number(a_list[count + 1]):
a_list[count] += a_list[count + 1]
del a_list[count + 1]
elif is_number(a_list[count]) and a_list[count + 1] == ".":
try:
x = a_list[count+2]
except IndexError:
print ("Your formatting is off somehow.")
exit()
if is_number(a_list[count + 2]):
a_list[count] += a_list[count + 1] + a_list[count + 2]
del a_list[count + 2]
del a_list[count + 1]
else:
count += 1
return a_list
def perform_operation(n1, operand, n2):
if operand == "+":
return str(float(n1) + float(n2))
elif operand == "-":
return str(float(n1) - float(n2))
elif operand == "*":
return str(float(n1) * float(n2))
elif operand == "/":
try:
n = str(float(n1) / float(n2))
return n
except ZeroDivisionError:
print ("You tried to divide by 0.")
print ("Just for that I am going to terminate myself")
exit()
expression = set_up_list()
emergency_count = 0
#Makes code shorter, short for parentheses
P = ["(", ")"]
#If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
while len(expression) != 1:
#If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
#Will check to see if the first parentheses exists first so that it does not throw an index error
count = 0
while count < len(expression) - 1:
if expression[count] == "(":
if expression[count + 2] == ")":
del expression[count + 2]
del expression[count]
count += 1
#After that is done, it will multiply and divide what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#Then it will add and subtact what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
emergency_count += 1
if emergency_count >= 1000:
print ("Operation was too long or was bugged")
exit()
print (float(expression[0]))
python beginner python-3.x math-expression-eval
add a comment |Â
up vote
4
down vote
favorite
up vote
4
down vote
favorite
This calculator accepts four operators: +, -, *, /, and works with parentheses.
Feedback on how I could make it easier to read and or more effective would be much appreciated.
This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.
from sys import exit
#Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
def is_number(item):
try:
float(item)
return True
except ValueError:
return False
def set_up_list():
#First part gets string and deletes whitespace
astring = raw_input("Calculation: ")
astring = astring.replace(" ", "")
#Next it will check if there are any unsupported characters in the string
for item in astring:
if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
print ("Unsupported Character: " + item)
exit()
#Then it creates the list and adds each individual character to the list
a_list =
for item in astring:
a_list.append(item)
count = 0
#Finally it combines individual numbers into actual numbers based on user input
while count < len(a_list) - 1:
if is_number(a_list[count]) and a_list[count + 1] == "(":
print ("Program does not accept parentheses directly after number, must have operator in between.")
exit()
if is_number(a_list[count]) and is_number(a_list[count + 1]):
a_list[count] += a_list[count + 1]
del a_list[count + 1]
elif is_number(a_list[count]) and a_list[count + 1] == ".":
try:
x = a_list[count+2]
except IndexError:
print ("Your formatting is off somehow.")
exit()
if is_number(a_list[count + 2]):
a_list[count] += a_list[count + 1] + a_list[count + 2]
del a_list[count + 2]
del a_list[count + 1]
else:
count += 1
return a_list
def perform_operation(n1, operand, n2):
if operand == "+":
return str(float(n1) + float(n2))
elif operand == "-":
return str(float(n1) - float(n2))
elif operand == "*":
return str(float(n1) * float(n2))
elif operand == "/":
try:
n = str(float(n1) / float(n2))
return n
except ZeroDivisionError:
print ("You tried to divide by 0.")
print ("Just for that I am going to terminate myself")
exit()
expression = set_up_list()
emergency_count = 0
#Makes code shorter, short for parentheses
P = ["(", ")"]
#If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
while len(expression) != 1:
#If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
#Will check to see if the first parentheses exists first so that it does not throw an index error
count = 0
while count < len(expression) - 1:
if expression[count] == "(":
if expression[count + 2] == ")":
del expression[count + 2]
del expression[count]
count += 1
#After that is done, it will multiply and divide what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#Then it will add and subtact what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
emergency_count += 1
if emergency_count >= 1000:
print ("Operation was too long or was bugged")
exit()
print (float(expression[0]))
python beginner python-3.x math-expression-eval
This calculator accepts four operators: +, -, *, /, and works with parentheses.
Feedback on how I could make it easier to read and or more effective would be much appreciated.
This is the first time I "built my own thing", and the other Python calculators on this website seem kinda different from mine, and I don't know if that is a good or bad thing.
from sys import exit
#Whenever is_number(x) exists, it is checking to see if x is a number, obviously.
def is_number(item):
try:
float(item)
return True
except ValueError:
return False
def set_up_list():
#First part gets string and deletes whitespace
astring = raw_input("Calculation: ")
astring = astring.replace(" ", "")
#Next it will check if there are any unsupported characters in the string
for item in astring:
if item not in ["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]:
print ("Unsupported Character: " + item)
exit()
#Then it creates the list and adds each individual character to the list
a_list =
for item in astring:
a_list.append(item)
count = 0
#Finally it combines individual numbers into actual numbers based on user input
while count < len(a_list) - 1:
if is_number(a_list[count]) and a_list[count + 1] == "(":
print ("Program does not accept parentheses directly after number, must have operator in between.")
exit()
if is_number(a_list[count]) and is_number(a_list[count + 1]):
a_list[count] += a_list[count + 1]
del a_list[count + 1]
elif is_number(a_list[count]) and a_list[count + 1] == ".":
try:
x = a_list[count+2]
except IndexError:
print ("Your formatting is off somehow.")
exit()
if is_number(a_list[count + 2]):
a_list[count] += a_list[count + 1] + a_list[count + 2]
del a_list[count + 2]
del a_list[count + 1]
else:
count += 1
return a_list
def perform_operation(n1, operand, n2):
if operand == "+":
return str(float(n1) + float(n2))
elif operand == "-":
return str(float(n1) - float(n2))
elif operand == "*":
return str(float(n1) * float(n2))
elif operand == "/":
try:
n = str(float(n1) / float(n2))
return n
except ZeroDivisionError:
print ("You tried to divide by 0.")
print ("Just for that I am going to terminate myself")
exit()
expression = set_up_list()
emergency_count = 0
#Makes code shorter, short for parentheses
P = ["(", ")"]
#If the length of the list is 1, there is only 1 number, meaning an answer has been reached.
while len(expression) != 1:
#If there are parentheses around a single list item, the list item is obviously just a number, eliminate parentheses
#Will check to see if the first parentheses exists first so that it does not throw an index error
count = 0
while count < len(expression) - 1:
if expression[count] == "(":
if expression[count + 2] == ")":
del expression[count + 2]
del expression[count]
count += 1
#After that is done, it will multiply and divide what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["*", "/"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#Then it will add and subtact what it can
count = 0
while count < len(expression) - 1:
if expression[count] in ["+", "-"] and not (expression[count+1] in P or expression[count-1] in P):
expression[count - 1] = perform_operation(expression[count - 1], expression[count], expression[count + 1])
del expression[count + 1]
del expression[count]
count += 1
#The steps will repeat until only one character is left. Operations that fail will be stopped by emergency count.
emergency_count += 1
if emergency_count >= 1000:
print ("Operation was too long or was bugged")
exit()
print (float(expression[0]))
python beginner python-3.x math-expression-eval
edited Mar 22 at 2:03
Jamalâ¦
30.1k11114225
30.1k11114225
asked Mar 21 at 6:52
Chuck Bartowski
213
213
add a comment |Â
add a comment |Â
2 Answers
2
active
oldest
votes
up vote
3
down vote
Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)
- Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.
- Comments need one space after the
#
- There's some required spaces between phrases you're missing
- No space after
print
! - Wrap lines to 79 columns
- Comments need one space after the
- The comment before
is_number
should be a"""Doc comment."""
set_up_list
isn't a good name. I had to read the code and see it used in context to understand what it did.read_expression
might be better.- You have a lot of use of
while
with some sort of counter (ie.while count < len(expression) - 1:
). In python, we almost exclusively spell this asfor e in expression:
. If you need the index, thenfor i, e in enumerate(expression)
. - We typically use
'single quoted strings'
."Double quotes"
are usually reserved for format strings (ex."1 + 1 is ".format(1 + 1)
). - Your use of
in
is nice. But, did you know instead of that big long list you can just doitem not in '01234567890+-*/.()'
? Strings are behave like lists in this way. - Do the same thing for
P = ['(', ')']
.P
is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But,x in '()'
is nearly as short and gets the point across much better. - There is no need for your
creates the list and adds each individual character to the list
(no need fora_list
, just useastring
and give it a better name!). Strings are sequences so you can dolen(astring)
,astring[23]
, etc. - You have
print()
s andexit()
s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function likecalculate()
, which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extendingException
for things likeUnclosedParenError
orUnexpectedCharacterError
) in error conditions. This means you can test it like so:
from unittest import main, TestCase
from yourcalculator import calculate
class TestCalculator(TestCase):
def test_basic_sums(self):
self.assertEqual(42, calculate('40 + 2'))
if __name__ == '__main__':
main()
- Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a
calculate
function. You should use amain()
function for this and use the standardif __name__ == '__main__': main()
:
def main():
try:
while True:
try:
print(calculate(input('Enter an expression: ')))
except CalculatorError as e:
print(e)
except KeyboardInterrupt:
break
if __name__ == '__main__':
main()
- You probably want better errors than
Your formatting is off somehow
- In
perform_operation
, you usefloat()
(presumably on numbers that you've already calledis_number
on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things likeif foo_wont_raise(bar): foo(bar)
- I see a lot of
del
. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list) - There's really no need for
emergency_count
. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised) - I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html
With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).
You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).
Intrinsically, we have two different concerns here:
- We want to be able to handle lots of arbitrary spacing (ie.
1+1
is the same as1 + 1
is the same as1 + 1
) and ensure that there are no invalid characters (or bad numbers like123.456.789
) - Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it
In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:
- Scanner: remove excess spacing and ensure all characters are valid
c in '01234567890+-*/.()'
and from each valid character produce a "Token" (ex. in the expression2 * 20 + 2
the tokens are2
,'*'
,20
,'+'
,2
) - Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)
Let's tackle the scanner first. It has three main things it needs to do:
- Ignore whitespace (although this has it's limits, we probably don't want
123 456
to become123456
--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that123
and456
) - Turn symbols into single tokens (ex
c in '+-/*()'
) - Parse numbers (may be multiple characters, can be ill formatted as in
123.456.789
)
Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)
The scanner could look something like this:
class Scanner(BaseParser):
"""Scanner scans an input string for calculator tokens and yields them.
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
"""
def scan(self):
"""Yields all tokens in the input."""
while not self._done():
# Ignore any whitespace that may be next
self._consume_whitespace()
# Emit any symbol tokens that may be next
yield from self._take(lambda char: char in '+-*/()')
# Emit any number token that may be next
yield from self._take_number()
def _consume_whitespace(self):
"""_take()s whitespace characters, but does not yield them."""
# Note we need the list here to force evaluation of the generator
list(self._take(lambda char: char.isspace()))
def _take_number(self):
"""Yields a single number if there is one next in the input."""
# Gather up the digits/. forming the next number in the input
number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))
# If number is empty, we didn't scan digits, so don't try to float it
if number:
try:
yield float(number)
except ValueError:
raise BadNumberError(number)
Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan()
. It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.
In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:
- The need for
self._done()
(we'd optimally like one of the functions we use to handle this for us, but unfortunately theirStopIteration
s are consumed when weyield from
them) - The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a
_check_for_invalid_chars()
that does something likeself._take(lambda c: c not in '01234567890.+-/*()')
and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.
Now, hopefully you were able to figure out what _take()
does (it's provided by BaseParser
). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):
class BaseParser:
"""A base class containing utilities useful for a Parser."""
def __init__(self, items):
self._items = PeekableIterator(items)
def _take(self, predicate):
"""
Yields a contiguous group of items from the items being parsed for
which the predicate returns True.
>>> p = BaseParser([2, 4, 3])
>>> list(p._take(lambda x: x % 2 == 0))
[2, 4]
"""
while predicate(self._items.peek()):
yield next(self._items)
def _done(self):
"""Returns True if the underlying items have been fully consumed."""
try:
self._items.peek()
return False
except StopIteration:
return True
class PeekableIterator:
"""An iterator that supports 1-lookahead (peek)."""
def __init__(self, iterable):
self._iterator = iter(iterable)
# NOTE: We use None here to denote that we haven't peeked yet. This
# doesn't work if None can occur in the iterable (so this
# doesn't generalize!), but for our purposes here it's fine!
self._next_item = None
def peek(self):
"""
Return the next item that will be returned by the iterator without
advancing the iterator. Raises StopIteration if the iterator is done.
>>> i = PeekableIterator([1, 2, 3])
>>> i.peek()
1
>>> i.peek()
1
>>> next(i)
1
>>> next(i)
2
>>> i.peek()
3
"""
if self._next_item is None:
self._next_item = next(self._iterator)
return self._next_item
def __next__(self):
if self._next_item is not None:
next_item = self._next_item
self._next_item = None
return next_item
return next(self._iterator)
def __iter__(self):
return self
I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.
Now we have a working scanner that we can use like so:
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float
will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.
Now, let's talk about the parser. The reason why we are going to reuse BaseParser
is because, in effect, Scanner
is a parser that takes an iterable (specifically a stringâÂÂan iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single floatâÂÂor it will raise an exception).
Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:
Expression = Term ("+" .
Term = Factor ("*" Factor} .
Factor = number | "(" Expression ")" .
Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-")
means we need either a +
or a -
next. The
indicate that whatever is inside them can occur zero or more times. So, an
Expression
could be just a Term
, but it could also be a Term "+" Term "-" Term "+" Term
.
If you try some examples like 1 * (2 - 3) + 4
, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression
and Factor
group things so order of operations works (the deepest in the nesting are the *
and /
, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression
by evaluating all its Term
s and then adding/subtracting the results as appropriate, evaluate a Term
by evaluating all of its Factor
s and then multiplying/dividing the results as appropriate, and evaluate a Factor
by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html
With these in mind, let's write a Parser
capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.
import operator
class Parser(BaseParser):
"""Parser for tokenized calculator inputs."""
def parse(self):
"""Parse calculator input and return the result of evaluating it.
>>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
5
"""
return self._parse_expression()
def _parse_expression(self):
"""Parse an Expression and return the result of evaluating it.
>>> Parser([1, '+', 2])._parse_expression()
3
"""
# Parse the first (required) Term
terms = [self._parse_term()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '+-'
terms += flatten((op, self._parse_term()) for op in self._take(op))
return evaluate(terms)
def _parse_term(self):
"""Parse a Term and return the result of evaluating it.
>>> Parser([1, '*', 2])._parse_term()
2
"""
# Parse the first (required) Factor
factors = [self._parse_factor()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '*/'
factors += flatten((op, self._parse_factor()) for op in self._take(op))
return evaluate(factors)
def _parse_factor(self):
"""Parse a Factor and return the result of evaluating it.
>>> Parser([1])._parse_factor()
1
>>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
7
"""
# NOTE: Here's where Python gets a little cumbersome. This isn't really
# a for, we're just using it to handle the case where it doesn't
# find a number (gracefully skip). If it finds one, we return the
# number.
for n in self._take(lambda t: isinstance(t, float)):
return n
# If we failed to parse a number, then try to find a '('
for _ in self._take(lambda t: t == '('):
# If we found a '(', parse the subexpression
value = self._parse_expression()
# Make sure the subexpression is followed by a ')'
self._expect(')')
# Both parsing the number and subexpresion failed
raise self._unexpected('number', '(')
def _expect(self, char):
"""Expect a certain character, or raise if it is not next."""
for _ in self._take(lambda t: t == char):
return
raise self._unexpected(char)
def _unexpected(self, *expected):
"""Create an exception for an unexpected character."""
try:
return UnexpectedCharacterError(self._items.peek(), expected)
except StopIteration:
return UnexpectedEndError(expected)
def evaluate(items):
"""
Evaluate a list of floats separated by operators (at the same level of
precedence). Returns the result.
>>> evaluate([3, '*', 4, '/', 2])
6
"""
assert items, 'cannot evaluate empty list'
# x, x + x, x + x + x, etc. all have an odd number of tokens
assert len(items) % 2 == 1, 'list must be of odd length'
while len(items) > 1:
items[-3:] = [_evaluate_binary(*items[-3:])]
return items[0]
def _evaluate_binary(lhs, op, rhs):
"""Evalutates a single binary operation op where lhs and rhs are floats."""
ops = '+': operator.add,
'-': operator.sub,
'*': operator.mul,
'/': operator.truediv
return ops[op](lhs, rhs)
There's a lot to unpack here! First, take a look at parse
. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression
we parse a term , collect any number of ("+"|"-") Term
that follow it, and evaluate out that math. We do something similar for Term
. For Factor
we either try to parse a number (just a float
) or a "("
(subexpression) and if we can't find that we raise an error.
Notice how the actual work of doing the math in done in evaluate
and _evaluate_binary
. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate
intentionally cannot handle [1, '+', 2, '*', 3]
. The parser handles this. It would first parse a Term
and call evaluate([2, '*', 3])
. This would be returned and then it could finish parsing the Expression
and call evaluate([1, '+', 6])
.
For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.
This code needs the following helper:
def flatten(iterable):
"""Flattens a nested iterable by one nesting layer.
>>> flatten([[1,2], [3]])
[1, 2, 3]
"""
return [x for l in iterable for x in l]
Now all that's left is to wire the scanner and parser together as we discussed before:
def calculate(expression):
"""Evaluates a mathematical expression and returns the result.
>>> calculate('3 * (1 + 6 / 3)')
9
"""
return Parser(Scanner(expression).scan()).parse()
And we're done!
Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:
- Support hexadecimal numbers (you only need to modify the scanner!)
- Support mathematical constants like
pi
ande
(you only need to modify the scanner!) - Support leading negative signs (you only need to modify the parser!)
- Support new operations like
%
(modulo)
Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f
Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
â Chuck Bartowski
Mar 26 at 15:52
add a comment |Â
up vote
2
down vote
Heres some quick suggestions...
edit: as pointed out, isdigit() only works on integers
Your is_number(item) function could be the built-in isdigit():
if a_list[count].isdigit() and ...
You could chain together method calls, doesn't make it less readable imo:
astring = raw_input("Calculation: ").replace(" ", "")
The first two loops you do could be condensed into one:
# Next it will add only supported characters to the list
a_list =
for item in astring:
if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
print ("Unsupported Character: " + item)
exit()
a_list.append(item)
Also, try to use better variable names than astring and a_list.
1
Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
â Chuck Bartowski
Mar 22 at 16:55
1
Nice call! Overlooked that one.
â Ullauri
Mar 22 at 17:37
add a comment |Â
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
3
down vote
Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)
- Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.
- Comments need one space after the
#
- There's some required spaces between phrases you're missing
- No space after
print
! - Wrap lines to 79 columns
- Comments need one space after the
- The comment before
is_number
should be a"""Doc comment."""
set_up_list
isn't a good name. I had to read the code and see it used in context to understand what it did.read_expression
might be better.- You have a lot of use of
while
with some sort of counter (ie.while count < len(expression) - 1:
). In python, we almost exclusively spell this asfor e in expression:
. If you need the index, thenfor i, e in enumerate(expression)
. - We typically use
'single quoted strings'
."Double quotes"
are usually reserved for format strings (ex."1 + 1 is ".format(1 + 1)
). - Your use of
in
is nice. But, did you know instead of that big long list you can just doitem not in '01234567890+-*/.()'
? Strings are behave like lists in this way. - Do the same thing for
P = ['(', ')']
.P
is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But,x in '()'
is nearly as short and gets the point across much better. - There is no need for your
creates the list and adds each individual character to the list
(no need fora_list
, just useastring
and give it a better name!). Strings are sequences so you can dolen(astring)
,astring[23]
, etc. - You have
print()
s andexit()
s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function likecalculate()
, which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extendingException
for things likeUnclosedParenError
orUnexpectedCharacterError
) in error conditions. This means you can test it like so:
from unittest import main, TestCase
from yourcalculator import calculate
class TestCalculator(TestCase):
def test_basic_sums(self):
self.assertEqual(42, calculate('40 + 2'))
if __name__ == '__main__':
main()
- Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a
calculate
function. You should use amain()
function for this and use the standardif __name__ == '__main__': main()
:
def main():
try:
while True:
try:
print(calculate(input('Enter an expression: ')))
except CalculatorError as e:
print(e)
except KeyboardInterrupt:
break
if __name__ == '__main__':
main()
- You probably want better errors than
Your formatting is off somehow
- In
perform_operation
, you usefloat()
(presumably on numbers that you've already calledis_number
on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things likeif foo_wont_raise(bar): foo(bar)
- I see a lot of
del
. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list) - There's really no need for
emergency_count
. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised) - I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html
With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).
You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).
Intrinsically, we have two different concerns here:
- We want to be able to handle lots of arbitrary spacing (ie.
1+1
is the same as1 + 1
is the same as1 + 1
) and ensure that there are no invalid characters (or bad numbers like123.456.789
) - Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it
In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:
- Scanner: remove excess spacing and ensure all characters are valid
c in '01234567890+-*/.()'
and from each valid character produce a "Token" (ex. in the expression2 * 20 + 2
the tokens are2
,'*'
,20
,'+'
,2
) - Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)
Let's tackle the scanner first. It has three main things it needs to do:
- Ignore whitespace (although this has it's limits, we probably don't want
123 456
to become123456
--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that123
and456
) - Turn symbols into single tokens (ex
c in '+-/*()'
) - Parse numbers (may be multiple characters, can be ill formatted as in
123.456.789
)
Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)
The scanner could look something like this:
class Scanner(BaseParser):
"""Scanner scans an input string for calculator tokens and yields them.
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
"""
def scan(self):
"""Yields all tokens in the input."""
while not self._done():
# Ignore any whitespace that may be next
self._consume_whitespace()
# Emit any symbol tokens that may be next
yield from self._take(lambda char: char in '+-*/()')
# Emit any number token that may be next
yield from self._take_number()
def _consume_whitespace(self):
"""_take()s whitespace characters, but does not yield them."""
# Note we need the list here to force evaluation of the generator
list(self._take(lambda char: char.isspace()))
def _take_number(self):
"""Yields a single number if there is one next in the input."""
# Gather up the digits/. forming the next number in the input
number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))
# If number is empty, we didn't scan digits, so don't try to float it
if number:
try:
yield float(number)
except ValueError:
raise BadNumberError(number)
Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan()
. It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.
In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:
- The need for
self._done()
(we'd optimally like one of the functions we use to handle this for us, but unfortunately theirStopIteration
s are consumed when weyield from
them) - The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a
_check_for_invalid_chars()
that does something likeself._take(lambda c: c not in '01234567890.+-/*()')
and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.
Now, hopefully you were able to figure out what _take()
does (it's provided by BaseParser
). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):
class BaseParser:
"""A base class containing utilities useful for a Parser."""
def __init__(self, items):
self._items = PeekableIterator(items)
def _take(self, predicate):
"""
Yields a contiguous group of items from the items being parsed for
which the predicate returns True.
>>> p = BaseParser([2, 4, 3])
>>> list(p._take(lambda x: x % 2 == 0))
[2, 4]
"""
while predicate(self._items.peek()):
yield next(self._items)
def _done(self):
"""Returns True if the underlying items have been fully consumed."""
try:
self._items.peek()
return False
except StopIteration:
return True
class PeekableIterator:
"""An iterator that supports 1-lookahead (peek)."""
def __init__(self, iterable):
self._iterator = iter(iterable)
# NOTE: We use None here to denote that we haven't peeked yet. This
# doesn't work if None can occur in the iterable (so this
# doesn't generalize!), but for our purposes here it's fine!
self._next_item = None
def peek(self):
"""
Return the next item that will be returned by the iterator without
advancing the iterator. Raises StopIteration if the iterator is done.
>>> i = PeekableIterator([1, 2, 3])
>>> i.peek()
1
>>> i.peek()
1
>>> next(i)
1
>>> next(i)
2
>>> i.peek()
3
"""
if self._next_item is None:
self._next_item = next(self._iterator)
return self._next_item
def __next__(self):
if self._next_item is not None:
next_item = self._next_item
self._next_item = None
return next_item
return next(self._iterator)
def __iter__(self):
return self
I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.
Now we have a working scanner that we can use like so:
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float
will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.
Now, let's talk about the parser. The reason why we are going to reuse BaseParser
is because, in effect, Scanner
is a parser that takes an iterable (specifically a stringâÂÂan iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single floatâÂÂor it will raise an exception).
Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:
Expression = Term ("+" .
Term = Factor ("*" Factor} .
Factor = number | "(" Expression ")" .
Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-")
means we need either a +
or a -
next. The
indicate that whatever is inside them can occur zero or more times. So, an
Expression
could be just a Term
, but it could also be a Term "+" Term "-" Term "+" Term
.
If you try some examples like 1 * (2 - 3) + 4
, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression
and Factor
group things so order of operations works (the deepest in the nesting are the *
and /
, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression
by evaluating all its Term
s and then adding/subtracting the results as appropriate, evaluate a Term
by evaluating all of its Factor
s and then multiplying/dividing the results as appropriate, and evaluate a Factor
by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html
With these in mind, let's write a Parser
capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.
import operator
class Parser(BaseParser):
"""Parser for tokenized calculator inputs."""
def parse(self):
"""Parse calculator input and return the result of evaluating it.
>>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
5
"""
return self._parse_expression()
def _parse_expression(self):
"""Parse an Expression and return the result of evaluating it.
>>> Parser([1, '+', 2])._parse_expression()
3
"""
# Parse the first (required) Term
terms = [self._parse_term()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '+-'
terms += flatten((op, self._parse_term()) for op in self._take(op))
return evaluate(terms)
def _parse_term(self):
"""Parse a Term and return the result of evaluating it.
>>> Parser([1, '*', 2])._parse_term()
2
"""
# Parse the first (required) Factor
factors = [self._parse_factor()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '*/'
factors += flatten((op, self._parse_factor()) for op in self._take(op))
return evaluate(factors)
def _parse_factor(self):
"""Parse a Factor and return the result of evaluating it.
>>> Parser([1])._parse_factor()
1
>>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
7
"""
# NOTE: Here's where Python gets a little cumbersome. This isn't really
# a for, we're just using it to handle the case where it doesn't
# find a number (gracefully skip). If it finds one, we return the
# number.
for n in self._take(lambda t: isinstance(t, float)):
return n
# If we failed to parse a number, then try to find a '('
for _ in self._take(lambda t: t == '('):
# If we found a '(', parse the subexpression
value = self._parse_expression()
# Make sure the subexpression is followed by a ')'
self._expect(')')
# Both parsing the number and subexpresion failed
raise self._unexpected('number', '(')
def _expect(self, char):
"""Expect a certain character, or raise if it is not next."""
for _ in self._take(lambda t: t == char):
return
raise self._unexpected(char)
def _unexpected(self, *expected):
"""Create an exception for an unexpected character."""
try:
return UnexpectedCharacterError(self._items.peek(), expected)
except StopIteration:
return UnexpectedEndError(expected)
def evaluate(items):
"""
Evaluate a list of floats separated by operators (at the same level of
precedence). Returns the result.
>>> evaluate([3, '*', 4, '/', 2])
6
"""
assert items, 'cannot evaluate empty list'
# x, x + x, x + x + x, etc. all have an odd number of tokens
assert len(items) % 2 == 1, 'list must be of odd length'
while len(items) > 1:
items[-3:] = [_evaluate_binary(*items[-3:])]
return items[0]
def _evaluate_binary(lhs, op, rhs):
"""Evalutates a single binary operation op where lhs and rhs are floats."""
ops = '+': operator.add,
'-': operator.sub,
'*': operator.mul,
'/': operator.truediv
return ops[op](lhs, rhs)
There's a lot to unpack here! First, take a look at parse
. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression
we parse a term , collect any number of ("+"|"-") Term
that follow it, and evaluate out that math. We do something similar for Term
. For Factor
we either try to parse a number (just a float
) or a "("
(subexpression) and if we can't find that we raise an error.
Notice how the actual work of doing the math in done in evaluate
and _evaluate_binary
. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate
intentionally cannot handle [1, '+', 2, '*', 3]
. The parser handles this. It would first parse a Term
and call evaluate([2, '*', 3])
. This would be returned and then it could finish parsing the Expression
and call evaluate([1, '+', 6])
.
For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.
This code needs the following helper:
def flatten(iterable):
"""Flattens a nested iterable by one nesting layer.
>>> flatten([[1,2], [3]])
[1, 2, 3]
"""
return [x for l in iterable for x in l]
Now all that's left is to wire the scanner and parser together as we discussed before:
def calculate(expression):
"""Evaluates a mathematical expression and returns the result.
>>> calculate('3 * (1 + 6 / 3)')
9
"""
return Parser(Scanner(expression).scan()).parse()
And we're done!
Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:
- Support hexadecimal numbers (you only need to modify the scanner!)
- Support mathematical constants like
pi
ande
(you only need to modify the scanner!) - Support leading negative signs (you only need to modify the parser!)
- Support new operations like
%
(modulo)
Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f
Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
â Chuck Bartowski
Mar 26 at 15:52
add a comment |Â
up vote
3
down vote
Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)
- Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.
- Comments need one space after the
#
- There's some required spaces between phrases you're missing
- No space after
print
! - Wrap lines to 79 columns
- Comments need one space after the
- The comment before
is_number
should be a"""Doc comment."""
set_up_list
isn't a good name. I had to read the code and see it used in context to understand what it did.read_expression
might be better.- You have a lot of use of
while
with some sort of counter (ie.while count < len(expression) - 1:
). In python, we almost exclusively spell this asfor e in expression:
. If you need the index, thenfor i, e in enumerate(expression)
. - We typically use
'single quoted strings'
."Double quotes"
are usually reserved for format strings (ex."1 + 1 is ".format(1 + 1)
). - Your use of
in
is nice. But, did you know instead of that big long list you can just doitem not in '01234567890+-*/.()'
? Strings are behave like lists in this way. - Do the same thing for
P = ['(', ')']
.P
is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But,x in '()'
is nearly as short and gets the point across much better. - There is no need for your
creates the list and adds each individual character to the list
(no need fora_list
, just useastring
and give it a better name!). Strings are sequences so you can dolen(astring)
,astring[23]
, etc. - You have
print()
s andexit()
s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function likecalculate()
, which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extendingException
for things likeUnclosedParenError
orUnexpectedCharacterError
) in error conditions. This means you can test it like so:
from unittest import main, TestCase
from yourcalculator import calculate
class TestCalculator(TestCase):
def test_basic_sums(self):
self.assertEqual(42, calculate('40 + 2'))
if __name__ == '__main__':
main()
- Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a
calculate
function. You should use amain()
function for this and use the standardif __name__ == '__main__': main()
:
def main():
try:
while True:
try:
print(calculate(input('Enter an expression: ')))
except CalculatorError as e:
print(e)
except KeyboardInterrupt:
break
if __name__ == '__main__':
main()
- You probably want better errors than
Your formatting is off somehow
- In
perform_operation
, you usefloat()
(presumably on numbers that you've already calledis_number
on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things likeif foo_wont_raise(bar): foo(bar)
- I see a lot of
del
. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list) - There's really no need for
emergency_count
. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised) - I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html
With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).
You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).
Intrinsically, we have two different concerns here:
- We want to be able to handle lots of arbitrary spacing (ie.
1+1
is the same as1 + 1
is the same as1 + 1
) and ensure that there are no invalid characters (or bad numbers like123.456.789
) - Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it
In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:
- Scanner: remove excess spacing and ensure all characters are valid
c in '01234567890+-*/.()'
and from each valid character produce a "Token" (ex. in the expression2 * 20 + 2
the tokens are2
,'*'
,20
,'+'
,2
) - Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)
Let's tackle the scanner first. It has three main things it needs to do:
- Ignore whitespace (although this has it's limits, we probably don't want
123 456
to become123456
--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that123
and456
) - Turn symbols into single tokens (ex
c in '+-/*()'
) - Parse numbers (may be multiple characters, can be ill formatted as in
123.456.789
)
Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)
The scanner could look something like this:
class Scanner(BaseParser):
"""Scanner scans an input string for calculator tokens and yields them.
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
"""
def scan(self):
"""Yields all tokens in the input."""
while not self._done():
# Ignore any whitespace that may be next
self._consume_whitespace()
# Emit any symbol tokens that may be next
yield from self._take(lambda char: char in '+-*/()')
# Emit any number token that may be next
yield from self._take_number()
def _consume_whitespace(self):
"""_take()s whitespace characters, but does not yield them."""
# Note we need the list here to force evaluation of the generator
list(self._take(lambda char: char.isspace()))
def _take_number(self):
"""Yields a single number if there is one next in the input."""
# Gather up the digits/. forming the next number in the input
number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))
# If number is empty, we didn't scan digits, so don't try to float it
if number:
try:
yield float(number)
except ValueError:
raise BadNumberError(number)
Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan()
. It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.
In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:
- The need for
self._done()
(we'd optimally like one of the functions we use to handle this for us, but unfortunately theirStopIteration
s are consumed when weyield from
them) - The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a
_check_for_invalid_chars()
that does something likeself._take(lambda c: c not in '01234567890.+-/*()')
and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.
Now, hopefully you were able to figure out what _take()
does (it's provided by BaseParser
). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):
class BaseParser:
"""A base class containing utilities useful for a Parser."""
def __init__(self, items):
self._items = PeekableIterator(items)
def _take(self, predicate):
"""
Yields a contiguous group of items from the items being parsed for
which the predicate returns True.
>>> p = BaseParser([2, 4, 3])
>>> list(p._take(lambda x: x % 2 == 0))
[2, 4]
"""
while predicate(self._items.peek()):
yield next(self._items)
def _done(self):
"""Returns True if the underlying items have been fully consumed."""
try:
self._items.peek()
return False
except StopIteration:
return True
class PeekableIterator:
"""An iterator that supports 1-lookahead (peek)."""
def __init__(self, iterable):
self._iterator = iter(iterable)
# NOTE: We use None here to denote that we haven't peeked yet. This
# doesn't work if None can occur in the iterable (so this
# doesn't generalize!), but for our purposes here it's fine!
self._next_item = None
def peek(self):
"""
Return the next item that will be returned by the iterator without
advancing the iterator. Raises StopIteration if the iterator is done.
>>> i = PeekableIterator([1, 2, 3])
>>> i.peek()
1
>>> i.peek()
1
>>> next(i)
1
>>> next(i)
2
>>> i.peek()
3
"""
if self._next_item is None:
self._next_item = next(self._iterator)
return self._next_item
def __next__(self):
if self._next_item is not None:
next_item = self._next_item
self._next_item = None
return next_item
return next(self._iterator)
def __iter__(self):
return self
I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.
Now we have a working scanner that we can use like so:
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float
will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.
Now, let's talk about the parser. The reason why we are going to reuse BaseParser
is because, in effect, Scanner
is a parser that takes an iterable (specifically a stringâÂÂan iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single floatâÂÂor it will raise an exception).
Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:
Expression = Term ("+" .
Term = Factor ("*" Factor} .
Factor = number | "(" Expression ")" .
Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-")
means we need either a +
or a -
next. The
indicate that whatever is inside them can occur zero or more times. So, an
Expression
could be just a Term
, but it could also be a Term "+" Term "-" Term "+" Term
.
If you try some examples like 1 * (2 - 3) + 4
, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression
and Factor
group things so order of operations works (the deepest in the nesting are the *
and /
, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression
by evaluating all its Term
s and then adding/subtracting the results as appropriate, evaluate a Term
by evaluating all of its Factor
s and then multiplying/dividing the results as appropriate, and evaluate a Factor
by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html
With these in mind, let's write a Parser
capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.
import operator
class Parser(BaseParser):
"""Parser for tokenized calculator inputs."""
def parse(self):
"""Parse calculator input and return the result of evaluating it.
>>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
5
"""
return self._parse_expression()
def _parse_expression(self):
"""Parse an Expression and return the result of evaluating it.
>>> Parser([1, '+', 2])._parse_expression()
3
"""
# Parse the first (required) Term
terms = [self._parse_term()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '+-'
terms += flatten((op, self._parse_term()) for op in self._take(op))
return evaluate(terms)
def _parse_term(self):
"""Parse a Term and return the result of evaluating it.
>>> Parser([1, '*', 2])._parse_term()
2
"""
# Parse the first (required) Factor
factors = [self._parse_factor()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '*/'
factors += flatten((op, self._parse_factor()) for op in self._take(op))
return evaluate(factors)
def _parse_factor(self):
"""Parse a Factor and return the result of evaluating it.
>>> Parser([1])._parse_factor()
1
>>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
7
"""
# NOTE: Here's where Python gets a little cumbersome. This isn't really
# a for, we're just using it to handle the case where it doesn't
# find a number (gracefully skip). If it finds one, we return the
# number.
for n in self._take(lambda t: isinstance(t, float)):
return n
# If we failed to parse a number, then try to find a '('
for _ in self._take(lambda t: t == '('):
# If we found a '(', parse the subexpression
value = self._parse_expression()
# Make sure the subexpression is followed by a ')'
self._expect(')')
# Both parsing the number and subexpresion failed
raise self._unexpected('number', '(')
def _expect(self, char):
"""Expect a certain character, or raise if it is not next."""
for _ in self._take(lambda t: t == char):
return
raise self._unexpected(char)
def _unexpected(self, *expected):
"""Create an exception for an unexpected character."""
try:
return UnexpectedCharacterError(self._items.peek(), expected)
except StopIteration:
return UnexpectedEndError(expected)
def evaluate(items):
"""
Evaluate a list of floats separated by operators (at the same level of
precedence). Returns the result.
>>> evaluate([3, '*', 4, '/', 2])
6
"""
assert items, 'cannot evaluate empty list'
# x, x + x, x + x + x, etc. all have an odd number of tokens
assert len(items) % 2 == 1, 'list must be of odd length'
while len(items) > 1:
items[-3:] = [_evaluate_binary(*items[-3:])]
return items[0]
def _evaluate_binary(lhs, op, rhs):
"""Evalutates a single binary operation op where lhs and rhs are floats."""
ops = '+': operator.add,
'-': operator.sub,
'*': operator.mul,
'/': operator.truediv
return ops[op](lhs, rhs)
There's a lot to unpack here! First, take a look at parse
. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression
we parse a term , collect any number of ("+"|"-") Term
that follow it, and evaluate out that math. We do something similar for Term
. For Factor
we either try to parse a number (just a float
) or a "("
(subexpression) and if we can't find that we raise an error.
Notice how the actual work of doing the math in done in evaluate
and _evaluate_binary
. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate
intentionally cannot handle [1, '+', 2, '*', 3]
. The parser handles this. It would first parse a Term
and call evaluate([2, '*', 3])
. This would be returned and then it could finish parsing the Expression
and call evaluate([1, '+', 6])
.
For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.
This code needs the following helper:
def flatten(iterable):
"""Flattens a nested iterable by one nesting layer.
>>> flatten([[1,2], [3]])
[1, 2, 3]
"""
return [x for l in iterable for x in l]
Now all that's left is to wire the scanner and parser together as we discussed before:
def calculate(expression):
"""Evaluates a mathematical expression and returns the result.
>>> calculate('3 * (1 + 6 / 3)')
9
"""
return Parser(Scanner(expression).scan()).parse()
And we're done!
Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:
- Support hexadecimal numbers (you only need to modify the scanner!)
- Support mathematical constants like
pi
ande
(you only need to modify the scanner!) - Support leading negative signs (you only need to modify the parser!)
- Support new operations like
%
(modulo)
Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f
Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
â Chuck Bartowski
Mar 26 at 15:52
add a comment |Â
up vote
3
down vote
up vote
3
down vote
Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)
- Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.
- Comments need one space after the
#
- There's some required spaces between phrases you're missing
- No space after
print
! - Wrap lines to 79 columns
- Comments need one space after the
- The comment before
is_number
should be a"""Doc comment."""
set_up_list
isn't a good name. I had to read the code and see it used in context to understand what it did.read_expression
might be better.- You have a lot of use of
while
with some sort of counter (ie.while count < len(expression) - 1:
). In python, we almost exclusively spell this asfor e in expression:
. If you need the index, thenfor i, e in enumerate(expression)
. - We typically use
'single quoted strings'
."Double quotes"
are usually reserved for format strings (ex."1 + 1 is ".format(1 + 1)
). - Your use of
in
is nice. But, did you know instead of that big long list you can just doitem not in '01234567890+-*/.()'
? Strings are behave like lists in this way. - Do the same thing for
P = ['(', ')']
.P
is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But,x in '()'
is nearly as short and gets the point across much better. - There is no need for your
creates the list and adds each individual character to the list
(no need fora_list
, just useastring
and give it a better name!). Strings are sequences so you can dolen(astring)
,astring[23]
, etc. - You have
print()
s andexit()
s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function likecalculate()
, which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extendingException
for things likeUnclosedParenError
orUnexpectedCharacterError
) in error conditions. This means you can test it like so:
from unittest import main, TestCase
from yourcalculator import calculate
class TestCalculator(TestCase):
def test_basic_sums(self):
self.assertEqual(42, calculate('40 + 2'))
if __name__ == '__main__':
main()
- Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a
calculate
function. You should use amain()
function for this and use the standardif __name__ == '__main__': main()
:
def main():
try:
while True:
try:
print(calculate(input('Enter an expression: ')))
except CalculatorError as e:
print(e)
except KeyboardInterrupt:
break
if __name__ == '__main__':
main()
- You probably want better errors than
Your formatting is off somehow
- In
perform_operation
, you usefloat()
(presumably on numbers that you've already calledis_number
on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things likeif foo_wont_raise(bar): foo(bar)
- I see a lot of
del
. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list) - There's really no need for
emergency_count
. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised) - I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html
With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).
You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).
Intrinsically, we have two different concerns here:
- We want to be able to handle lots of arbitrary spacing (ie.
1+1
is the same as1 + 1
is the same as1 + 1
) and ensure that there are no invalid characters (or bad numbers like123.456.789
) - Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it
In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:
- Scanner: remove excess spacing and ensure all characters are valid
c in '01234567890+-*/.()'
and from each valid character produce a "Token" (ex. in the expression2 * 20 + 2
the tokens are2
,'*'
,20
,'+'
,2
) - Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)
Let's tackle the scanner first. It has three main things it needs to do:
- Ignore whitespace (although this has it's limits, we probably don't want
123 456
to become123456
--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that123
and456
) - Turn symbols into single tokens (ex
c in '+-/*()'
) - Parse numbers (may be multiple characters, can be ill formatted as in
123.456.789
)
Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)
The scanner could look something like this:
class Scanner(BaseParser):
"""Scanner scans an input string for calculator tokens and yields them.
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
"""
def scan(self):
"""Yields all tokens in the input."""
while not self._done():
# Ignore any whitespace that may be next
self._consume_whitespace()
# Emit any symbol tokens that may be next
yield from self._take(lambda char: char in '+-*/()')
# Emit any number token that may be next
yield from self._take_number()
def _consume_whitespace(self):
"""_take()s whitespace characters, but does not yield them."""
# Note we need the list here to force evaluation of the generator
list(self._take(lambda char: char.isspace()))
def _take_number(self):
"""Yields a single number if there is one next in the input."""
# Gather up the digits/. forming the next number in the input
number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))
# If number is empty, we didn't scan digits, so don't try to float it
if number:
try:
yield float(number)
except ValueError:
raise BadNumberError(number)
Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan()
. It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.
In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:
- The need for
self._done()
(we'd optimally like one of the functions we use to handle this for us, but unfortunately theirStopIteration
s are consumed when weyield from
them) - The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a
_check_for_invalid_chars()
that does something likeself._take(lambda c: c not in '01234567890.+-/*()')
and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.
Now, hopefully you were able to figure out what _take()
does (it's provided by BaseParser
). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):
class BaseParser:
"""A base class containing utilities useful for a Parser."""
def __init__(self, items):
self._items = PeekableIterator(items)
def _take(self, predicate):
"""
Yields a contiguous group of items from the items being parsed for
which the predicate returns True.
>>> p = BaseParser([2, 4, 3])
>>> list(p._take(lambda x: x % 2 == 0))
[2, 4]
"""
while predicate(self._items.peek()):
yield next(self._items)
def _done(self):
"""Returns True if the underlying items have been fully consumed."""
try:
self._items.peek()
return False
except StopIteration:
return True
class PeekableIterator:
"""An iterator that supports 1-lookahead (peek)."""
def __init__(self, iterable):
self._iterator = iter(iterable)
# NOTE: We use None here to denote that we haven't peeked yet. This
# doesn't work if None can occur in the iterable (so this
# doesn't generalize!), but for our purposes here it's fine!
self._next_item = None
def peek(self):
"""
Return the next item that will be returned by the iterator without
advancing the iterator. Raises StopIteration if the iterator is done.
>>> i = PeekableIterator([1, 2, 3])
>>> i.peek()
1
>>> i.peek()
1
>>> next(i)
1
>>> next(i)
2
>>> i.peek()
3
"""
if self._next_item is None:
self._next_item = next(self._iterator)
return self._next_item
def __next__(self):
if self._next_item is not None:
next_item = self._next_item
self._next_item = None
return next_item
return next(self._iterator)
def __iter__(self):
return self
I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.
Now we have a working scanner that we can use like so:
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float
will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.
Now, let's talk about the parser. The reason why we are going to reuse BaseParser
is because, in effect, Scanner
is a parser that takes an iterable (specifically a stringâÂÂan iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single floatâÂÂor it will raise an exception).
Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:
Expression = Term ("+" .
Term = Factor ("*" Factor} .
Factor = number | "(" Expression ")" .
Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-")
means we need either a +
or a -
next. The
indicate that whatever is inside them can occur zero or more times. So, an
Expression
could be just a Term
, but it could also be a Term "+" Term "-" Term "+" Term
.
If you try some examples like 1 * (2 - 3) + 4
, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression
and Factor
group things so order of operations works (the deepest in the nesting are the *
and /
, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression
by evaluating all its Term
s and then adding/subtracting the results as appropriate, evaluate a Term
by evaluating all of its Factor
s and then multiplying/dividing the results as appropriate, and evaluate a Factor
by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html
With these in mind, let's write a Parser
capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.
import operator
class Parser(BaseParser):
"""Parser for tokenized calculator inputs."""
def parse(self):
"""Parse calculator input and return the result of evaluating it.
>>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
5
"""
return self._parse_expression()
def _parse_expression(self):
"""Parse an Expression and return the result of evaluating it.
>>> Parser([1, '+', 2])._parse_expression()
3
"""
# Parse the first (required) Term
terms = [self._parse_term()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '+-'
terms += flatten((op, self._parse_term()) for op in self._take(op))
return evaluate(terms)
def _parse_term(self):
"""Parse a Term and return the result of evaluating it.
>>> Parser([1, '*', 2])._parse_term()
2
"""
# Parse the first (required) Factor
factors = [self._parse_factor()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '*/'
factors += flatten((op, self._parse_factor()) for op in self._take(op))
return evaluate(factors)
def _parse_factor(self):
"""Parse a Factor and return the result of evaluating it.
>>> Parser([1])._parse_factor()
1
>>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
7
"""
# NOTE: Here's where Python gets a little cumbersome. This isn't really
# a for, we're just using it to handle the case where it doesn't
# find a number (gracefully skip). If it finds one, we return the
# number.
for n in self._take(lambda t: isinstance(t, float)):
return n
# If we failed to parse a number, then try to find a '('
for _ in self._take(lambda t: t == '('):
# If we found a '(', parse the subexpression
value = self._parse_expression()
# Make sure the subexpression is followed by a ')'
self._expect(')')
# Both parsing the number and subexpresion failed
raise self._unexpected('number', '(')
def _expect(self, char):
"""Expect a certain character, or raise if it is not next."""
for _ in self._take(lambda t: t == char):
return
raise self._unexpected(char)
def _unexpected(self, *expected):
"""Create an exception for an unexpected character."""
try:
return UnexpectedCharacterError(self._items.peek(), expected)
except StopIteration:
return UnexpectedEndError(expected)
def evaluate(items):
"""
Evaluate a list of floats separated by operators (at the same level of
precedence). Returns the result.
>>> evaluate([3, '*', 4, '/', 2])
6
"""
assert items, 'cannot evaluate empty list'
# x, x + x, x + x + x, etc. all have an odd number of tokens
assert len(items) % 2 == 1, 'list must be of odd length'
while len(items) > 1:
items[-3:] = [_evaluate_binary(*items[-3:])]
return items[0]
def _evaluate_binary(lhs, op, rhs):
"""Evalutates a single binary operation op where lhs and rhs are floats."""
ops = '+': operator.add,
'-': operator.sub,
'*': operator.mul,
'/': operator.truediv
return ops[op](lhs, rhs)
There's a lot to unpack here! First, take a look at parse
. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression
we parse a term , collect any number of ("+"|"-") Term
that follow it, and evaluate out that math. We do something similar for Term
. For Factor
we either try to parse a number (just a float
) or a "("
(subexpression) and if we can't find that we raise an error.
Notice how the actual work of doing the math in done in evaluate
and _evaluate_binary
. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate
intentionally cannot handle [1, '+', 2, '*', 3]
. The parser handles this. It would first parse a Term
and call evaluate([2, '*', 3])
. This would be returned and then it could finish parsing the Expression
and call evaluate([1, '+', 6])
.
For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.
This code needs the following helper:
def flatten(iterable):
"""Flattens a nested iterable by one nesting layer.
>>> flatten([[1,2], [3]])
[1, 2, 3]
"""
return [x for l in iterable for x in l]
Now all that's left is to wire the scanner and parser together as we discussed before:
def calculate(expression):
"""Evaluates a mathematical expression and returns the result.
>>> calculate('3 * (1 + 6 / 3)')
9
"""
return Parser(Scanner(expression).scan()).parse()
And we're done!
Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:
- Support hexadecimal numbers (you only need to modify the scanner!)
- Support mathematical constants like
pi
ande
(you only need to modify the scanner!) - Support leading negative signs (you only need to modify the parser!)
- Support new operations like
%
(modulo)
Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f
Nice job with your calculator. Overall it's pretty clean, and you have decent use of functions to break behaviors up. But, I'm very nit picky and am going to recommend some really aggressive refactoring, so don't take anything too personally :)
- Look into PEP8. You're actually pretty close to it, but it's the standard formatting that most pythoners use. There are great tools like flake8, which will enforce the style for you.
- Comments need one space after the
#
- There's some required spaces between phrases you're missing
- No space after
print
! - Wrap lines to 79 columns
- Comments need one space after the
- The comment before
is_number
should be a"""Doc comment."""
set_up_list
isn't a good name. I had to read the code and see it used in context to understand what it did.read_expression
might be better.- You have a lot of use of
while
with some sort of counter (ie.while count < len(expression) - 1:
). In python, we almost exclusively spell this asfor e in expression:
. If you need the index, thenfor i, e in enumerate(expression)
. - We typically use
'single quoted strings'
."Double quotes"
are usually reserved for format strings (ex."1 + 1 is ".format(1 + 1)
). - Your use of
in
is nice. But, did you know instead of that big long list you can just doitem not in '01234567890+-*/.()'
? Strings are behave like lists in this way. - Do the same thing for
P = ['(', ')']
.P
is also an unclear name. Don't name something that just to make lines shorter. If your lines are long, you're probably trying to do too much on each line. But,x in '()'
is nearly as short and gets the point across much better. - There is no need for your
creates the list and adds each individual character to the list
(no need fora_list
, just useastring
and give it a better name!). Strings are sequences so you can dolen(astring)
,astring[23]
, etc. - You have
print()
s andexit()
s in this code. While this is fine for now, I'd recommend pulling them out. Right now, it's hard to test your code, because it has side effects (printing and exiting). For a calculator, you definitely want to make sure that it works correctly! An easy way to make your calculator testable is to refactor it into a function likecalculate()
, which takes a string and returns a float (the result). It can raise exceptions (and perhaps you want to create your own by extendingException
for things likeUnclosedParenError
orUnexpectedCharacterError
) in error conditions. This means you can test it like so:
from unittest import main, TestCase
from yourcalculator import calculate
class TestCalculator(TestCase):
def test_basic_sums(self):
self.assertEqual(42, calculate('40 + 2'))
if __name__ == '__main__':
main()
- Piggy backing off that last point, you still want some sort of REPL interface if you refactor things out into a
calculate
function. You should use amain()
function for this and use the standardif __name__ == '__main__': main()
:
def main():
try:
while True:
try:
print(calculate(input('Enter an expression: ')))
except CalculatorError as e:
print(e)
except KeyboardInterrupt:
break
if __name__ == '__main__':
main()
- You probably want better errors than
Your formatting is off somehow
- In
perform_operation
, you usefloat()
(presumably on numbers that you've already calledis_number
on). In Python, we prefer the "do first, apologize later" approach (ie. call float and if it raises an error then we will be forced to stop executing and handle it). We typically don't do things likeif foo_wont_raise(bar): foo(bar)
- I see a lot of
del
. For this application, I'd recommend avoiding that (and mutating that list in general). Mutating the list makes it that much harder for someone trying to follow your code because then in their mind they have to keep track of what you've done to the list (and they can't just jump to a point in your code to see what's going on, they have to trace all the way up to that point to see if any mutations have been performed on the list) - There's really no need for
emergency_count
. Your code isn't recursive so you aren't going to hit the stack limit (and if you do an exception is raised) - I've recently become of the opinion that you should be typing anything non-trivial in Python. Python is of course famous for being untyped, but mypy has really progressed a lot and is sufficient for a lot of use cases. You'll see why below I think you want typing (it'll make ensuring your calculator is correct a lot easier). Read up on it: https://mypy.readthedocs.io/en/latest/getting_started.html
With specific comments about your code done. Let's think generally about calculators and talk about useful patterns for building them (and compilers for programming languages, to some extent). The end product here will likely be a lot longer than the code you've written, but it will be a lot easier to reason about and extend! As a result of it being long and me becoming hungry, I'll omit many uninteresting parts of it (such as exceptions, which I'll just assume exist).
You likely didn't realize, but your code is actually already divided into the right parts! You just named them a little differently (and they don't behave exactly the way the patterns proscribe).
Intrinsically, we have two different concerns here:
- We want to be able to handle lots of arbitrary spacing (ie.
1+1
is the same as1 + 1
is the same as1 + 1
) and ensure that there are no invalid characters (or bad numbers like123.456.789
) - Once we have valid characters and extraneous spaces removed, we want to make sure that the expression is well formed (parens are balanced and every operator has something that evaluates to a number before an after it) and then evaluate it
In compiler design these two are called the scanner and the parser, respectively (I'm using parser loosely here, since in a real compiler a parser produces an AST, but in the case of a calculator it's more ergonomic just to do the math in the parser). Their jobs are:
- Scanner: remove excess spacing and ensure all characters are valid
c in '01234567890+-*/.()'
and from each valid character produce a "Token" (ex. in the expression2 * 20 + 2
the tokens are2
,'*'
,20
,'+'
,2
) - Parser: ensure a list of tokens is well formed (parens are balanced and every operator is surrounded by things that evaluate to numbers) and then turn it into something else (in our case, evaluate the math)
Let's tackle the scanner first. It has three main things it needs to do:
- Ignore whitespace (although this has it's limits, we probably don't want
123 456
to become123456
--instead we probably want to error that we can't have two sequential numbers, we'll leave this to the parser so for now let's say the scanner will just emit two tokens for that123
and456
) - Turn symbols into single tokens (ex
c in '+-/*()'
) - Parse numbers (may be multiple characters, can be ill formatted as in
123.456.789
)
Before we actually implement it, I'll warn you that Python isn't the best language for expressing concepts like this (at least out of the box, there are libraries that can help make this a lot nicer). In the end, some of my suggestions above may need to be bent or broken. Nevermind, I love Python :P That said I'd still argue it isn't the best language out of the box for this. (If you're interested in the "best way" to do things, at least in my opinion, wait a few years (I don't want to confuse you too much!)--depending on your skill level--and then read about parser combinators and monadic parsing)
The scanner could look something like this:
class Scanner(BaseParser):
"""Scanner scans an input string for calculator tokens and yields them.
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
"""
def scan(self):
"""Yields all tokens in the input."""
while not self._done():
# Ignore any whitespace that may be next
self._consume_whitespace()
# Emit any symbol tokens that may be next
yield from self._take(lambda char: char in '+-*/()')
# Emit any number token that may be next
yield from self._take_number()
def _consume_whitespace(self):
"""_take()s whitespace characters, but does not yield them."""
# Note we need the list here to force evaluation of the generator
list(self._take(lambda char: char.isspace()))
def _take_number(self):
"""Yields a single number if there is one next in the input."""
# Gather up the digits/. forming the next number in the input
number = ''.join(self._take(lambda c: c.isdigit() or c == '.'))
# If number is empty, we didn't scan digits, so don't try to float it
if number:
try:
yield float(number)
except ValueError:
raise BadNumberError(number)
Without getting too caught up in some of the more advanced python patterns I've used, the core idea is in (the only public method) scan()
. It is a generator. While we still have things in the input string to parse, it does exactly what we discussed: (1) ignores whitespace (2) returns (yield in generator parlance, which loosely means it can return many) tokens for math operators and then (3) yields a number if it encounters one.
In case you're interested, earlier when I said Python is not well suited out of the box for this, those concerns manifests themselves in the above code:
- The need for
self._done()
(we'd optimally like one of the functions we use to handle this for us, but unfortunately theirStopIteration
s are consumed when weyield from
them) - The fact that this infinite loops on unexpected characters. To handle this somewhere in that while loop you'd need to introduce a
_check_for_invalid_chars()
that does something likeself._take(lambda c: c not in '01234567890.+-/*()')
and then if that's not empty it raises an exception about an unexpected character. I'll leave that as an exercise for you.
Now, hopefully you were able to figure out what _take()
does (it's provided by BaseParser
). If not, the high level summary is that it yields characters from the input as long as they satisfy the predicate (that's the lambda that's passed in). For completeness's sake, here are the utility classes needed to make that work (I've separated them out because our parser will use them too!):
class BaseParser:
"""A base class containing utilities useful for a Parser."""
def __init__(self, items):
self._items = PeekableIterator(items)
def _take(self, predicate):
"""
Yields a contiguous group of items from the items being parsed for
which the predicate returns True.
>>> p = BaseParser([2, 4, 3])
>>> list(p._take(lambda x: x % 2 == 0))
[2, 4]
"""
while predicate(self._items.peek()):
yield next(self._items)
def _done(self):
"""Returns True if the underlying items have been fully consumed."""
try:
self._items.peek()
return False
except StopIteration:
return True
class PeekableIterator:
"""An iterator that supports 1-lookahead (peek)."""
def __init__(self, iterable):
self._iterator = iter(iterable)
# NOTE: We use None here to denote that we haven't peeked yet. This
# doesn't work if None can occur in the iterable (so this
# doesn't generalize!), but for our purposes here it's fine!
self._next_item = None
def peek(self):
"""
Return the next item that will be returned by the iterator without
advancing the iterator. Raises StopIteration if the iterator is done.
>>> i = PeekableIterator([1, 2, 3])
>>> i.peek()
1
>>> i.peek()
1
>>> next(i)
1
>>> next(i)
2
>>> i.peek()
3
"""
if self._next_item is None:
self._next_item = next(self._iterator)
return self._next_item
def __next__(self):
if self._next_item is not None:
next_item = self._next_item
self._next_item = None
return next_item
return next(self._iterator)
def __iter__(self):
return self
I won't dig into the details of these (the doc comments should make their behavior obvious), but the need for them is a good demonstration of why Python isn't quite fit for this task out of the box.
Now we have a working scanner that we can use like so:
>>> list(Scanner('11 * (2 + 3)').scan())
[11, '(', 2, '+', 3, ')']
See how it gives us a list of tokens we need? Now, we can move onto the parser (without having to worry about things like bad whitespace, or numbers for which float
will raise errors). It's at this point that typing would make things a lot safer (notice how our list is of floats and strings--which we trust are operators), but I've chosen to omit it to not overwhelm.
Now, let's talk about the parser. The reason why we are going to reuse BaseParser
is because, in effect, Scanner
is a parser that takes an iterable (specifically a stringâÂÂan iterable of characters) and produces something (tokens). Our parser will also take an iterable (the tokens produced by the Scanner) and produce output (for the calculator this is a single floatâÂÂor it will raise an exception).
Now before we do this, we need to understand a little bit about EBNFs. It's a good way of representing regular languages (the input to our calculator is). For your calculator, it would look something like this:
Expression = Term ("+" .
Term = Factor ("*" Factor} .
Factor = number | "(" Expression ")" .
Now what does all that mean? The things on the right side of the equals are called productions. By default, the first one is the "main one." The ("+"|"-")
means we need either a +
or a -
next. The
indicate that whatever is inside them can occur zero or more times. So, an
Expression
could be just a Term
, but it could also be a Term "+" Term "-" Term "+" Term
.
If you try some examples like 1 * (2 - 3) + 4
, you'll see how it breaks down into the productions in the EBNF. Notice how the Expression
and Factor
group things so order of operations works (the deepest in the nesting are the *
and /
, which should happen first). Hopefully, you can see how if we were able to turn out stream of tokens into this nested structure, we could evaluate it (ex. evaluate an Expression
by evaluating all its Term
s and then adding/subtracting the results as appropriate, evaluate a Term
by evaluating all of its Factor
s and then multiplying/dividing the results as appropriate, and evaluate a Factor
by either returning the number or evaluating the expression and returning that). It takes a little to wrap your head around, but spend some time with it, and it'll become clearer: http://www.cs.utsa.edu/~wagner/CS3723/grammar/examples2.html
With these in mind, let's write a Parser
capable of parsing these productions. As we discussed, parsing a production should return the value it evaluates to.
import operator
class Parser(BaseParser):
"""Parser for tokenized calculator inputs."""
def parse(self):
"""Parse calculator input and return the result of evaluating it.
>>> Parser([1, '*', '(', 2, '+', 3, ')']).parse()
5
"""
return self._parse_expression()
def _parse_expression(self):
"""Parse an Expression and return the result of evaluating it.
>>> Parser([1, '+', 2])._parse_expression()
3
"""
# Parse the first (required) Term
terms = [self._parse_term()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '+-'
terms += flatten((op, self._parse_term()) for op in self._take(op))
return evaluate(terms)
def _parse_term(self):
"""Parse a Term and return the result of evaluating it.
>>> Parser([1, '*', 2])._parse_term()
2
"""
# Parse the first (required) Factor
factors = [self._parse_factor()]
# Parse any following: ("*"|"/") Factor
op = lambda t: t in '*/'
factors += flatten((op, self._parse_factor()) for op in self._take(op))
return evaluate(factors)
def _parse_factor(self):
"""Parse a Factor and return the result of evaluating it.
>>> Parser([1])._parse_factor()
1
>>> Parser(['(', 1, '+', 2, '*', 3, ')'])._parse_factor()
7
"""
# NOTE: Here's where Python gets a little cumbersome. This isn't really
# a for, we're just using it to handle the case where it doesn't
# find a number (gracefully skip). If it finds one, we return the
# number.
for n in self._take(lambda t: isinstance(t, float)):
return n
# If we failed to parse a number, then try to find a '('
for _ in self._take(lambda t: t == '('):
# If we found a '(', parse the subexpression
value = self._parse_expression()
# Make sure the subexpression is followed by a ')'
self._expect(')')
# Both parsing the number and subexpresion failed
raise self._unexpected('number', '(')
def _expect(self, char):
"""Expect a certain character, or raise if it is not next."""
for _ in self._take(lambda t: t == char):
return
raise self._unexpected(char)
def _unexpected(self, *expected):
"""Create an exception for an unexpected character."""
try:
return UnexpectedCharacterError(self._items.peek(), expected)
except StopIteration:
return UnexpectedEndError(expected)
def evaluate(items):
"""
Evaluate a list of floats separated by operators (at the same level of
precedence). Returns the result.
>>> evaluate([3, '*', 4, '/', 2])
6
"""
assert items, 'cannot evaluate empty list'
# x, x + x, x + x + x, etc. all have an odd number of tokens
assert len(items) % 2 == 1, 'list must be of odd length'
while len(items) > 1:
items[-3:] = [_evaluate_binary(*items[-3:])]
return items[0]
def _evaluate_binary(lhs, op, rhs):
"""Evalutates a single binary operation op where lhs and rhs are floats."""
ops = '+': operator.add,
'-': operator.sub,
'*': operator.mul,
'/': operator.truediv
return ops[op](lhs, rhs)
There's a lot to unpack here! First, take a look at parse
. It just parses an expression and returns the result of evaluating it (remember how the first production in the EBNF is the "main" one?). To parse an Expression
we parse a term , collect any number of ("+"|"-") Term
that follow it, and evaluate out that math. We do something similar for Term
. For Factor
we either try to parse a number (just a float
) or a "("
(subexpression) and if we can't find that we raise an error.
Notice how the actual work of doing the math in done in evaluate
and _evaluate_binary
. But, critically, note that the former should only be given lists of floats (must already be evaluated, the parser handles this via recursion) separated by operators of the same precedence. This means evaluate
intentionally cannot handle [1, '+', 2, '*', 3]
. The parser handles this. It would first parse a Term
and call evaluate([2, '*', 3])
. This would be returned and then it could finish parsing the Expression
and call evaluate([1, '+', 6])
.
For a simple calculator example, this code is decent. But you can see how things quickly get out of hand for more complicated EBNFs. This is what I was alluding to earlier.
This code needs the following helper:
def flatten(iterable):
"""Flattens a nested iterable by one nesting layer.
>>> flatten([[1,2], [3]])
[1, 2, 3]
"""
return [x for l in iterable for x in l]
Now all that's left is to wire the scanner and parser together as we discussed before:
def calculate(expression):
"""Evaluates a mathematical expression and returns the result.
>>> calculate('3 * (1 + 6 / 3)')
9
"""
return Parser(Scanner(expression).scan()).parse()
And we're done!
Hopefully that's given you an overview of how parsers are designed and how you can use parser patterns to make your calculator a lot easier to reason about. Because of this design you should be able to do the following as an exercise fairly easily:
- Support hexadecimal numbers (you only need to modify the scanner!)
- Support mathematical constants like
pi
ande
(you only need to modify the scanner!) - Support leading negative signs (you only need to modify the parser!)
- Support new operations like
%
(modulo)
Note that this code is only loosely tested. You should write tests for each component. As I've needed to loosely test, I've fully implemented everything and will put it in a gist in case you get stuck connecting the dots (but really the only thing I left out was exceptions): https://gist.github.com/baileyparker/309436dddf2f34f06cfc363aa5a6c86f
edited Mar 24 at 15:01
answered Mar 24 at 14:41
Bailey Parker
1,161710
1,161710
Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
â Chuck Bartowski
Mar 26 at 15:52
add a comment |Â
Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
â Chuck Bartowski
Mar 26 at 15:52
Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
â Chuck Bartowski
Mar 26 at 15:52
Thank you for your input. There are a lot of concepts you are using that I have not even really heard of before. I am going to read and re-read and understand what I can. The concepts parsers and scanners are entirely new to me, and I have not seen or used yields or assertions yet. Just another reminder that I have barely scratched the surface :).
â Chuck Bartowski
Mar 26 at 15:52
add a comment |Â
up vote
2
down vote
Heres some quick suggestions...
edit: as pointed out, isdigit() only works on integers
Your is_number(item) function could be the built-in isdigit():
if a_list[count].isdigit() and ...
You could chain together method calls, doesn't make it less readable imo:
astring = raw_input("Calculation: ").replace(" ", "")
The first two loops you do could be condensed into one:
# Next it will add only supported characters to the list
a_list =
for item in astring:
if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
print ("Unsupported Character: " + item)
exit()
a_list.append(item)
Also, try to use better variable names than astring and a_list.
1
Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
â Chuck Bartowski
Mar 22 at 16:55
1
Nice call! Overlooked that one.
â Ullauri
Mar 22 at 17:37
add a comment |Â
up vote
2
down vote
Heres some quick suggestions...
edit: as pointed out, isdigit() only works on integers
Your is_number(item) function could be the built-in isdigit():
if a_list[count].isdigit() and ...
You could chain together method calls, doesn't make it less readable imo:
astring = raw_input("Calculation: ").replace(" ", "")
The first two loops you do could be condensed into one:
# Next it will add only supported characters to the list
a_list =
for item in astring:
if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
print ("Unsupported Character: " + item)
exit()
a_list.append(item)
Also, try to use better variable names than astring and a_list.
1
Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
â Chuck Bartowski
Mar 22 at 16:55
1
Nice call! Overlooked that one.
â Ullauri
Mar 22 at 17:37
add a comment |Â
up vote
2
down vote
up vote
2
down vote
Heres some quick suggestions...
edit: as pointed out, isdigit() only works on integers
Your is_number(item) function could be the built-in isdigit():
if a_list[count].isdigit() and ...
You could chain together method calls, doesn't make it less readable imo:
astring = raw_input("Calculation: ").replace(" ", "")
The first two loops you do could be condensed into one:
# Next it will add only supported characters to the list
a_list =
for item in astring:
if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
print ("Unsupported Character: " + item)
exit()
a_list.append(item)
Also, try to use better variable names than astring and a_list.
Heres some quick suggestions...
edit: as pointed out, isdigit() only works on integers
Your is_number(item) function could be the built-in isdigit():
if a_list[count].isdigit() and ...
You could chain together method calls, doesn't make it less readable imo:
astring = raw_input("Calculation: ").replace(" ", "")
The first two loops you do could be condensed into one:
# Next it will add only supported characters to the list
a_list =
for item in astring:
if item not in set(["0", "1", "2", "3" , "4", "5", "6", "7", "8", "9", "+", "-", "*", "/", ".", "(", ")"]):
print ("Unsupported Character: " + item)
exit()
a_list.append(item)
Also, try to use better variable names than astring and a_list.
edited Mar 22 at 17:41
answered Mar 22 at 1:24
Ullauri
463
463
1
Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
â Chuck Bartowski
Mar 22 at 16:55
1
Nice call! Overlooked that one.
â Ullauri
Mar 22 at 17:37
add a comment |Â
1
Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
â Chuck Bartowski
Mar 22 at 16:55
1
Nice call! Overlooked that one.
â Ullauri
Mar 22 at 17:37
1
1
Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
â Chuck Bartowski
Mar 22 at 16:55
Thank you for your suggestions. Unfortunately .isdigit() only works with integers, so when working with decimal numbers in the list it fails. I will change the variable names and condense method calls, as well as place that if statement within the for loop.
â Chuck Bartowski
Mar 22 at 16:55
1
1
Nice call! Overlooked that one.
â Ullauri
Mar 22 at 17:37
Nice call! Overlooked that one.
â Ullauri
Mar 22 at 17:37
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f190101%2fexpression-calculator-in-python%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password