Generative testing for CSV using Hypothesis - project, API, implementation

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
2
down vote

favorite












I've just finished the first iteration of my new project. It's the first time I publish Python code and I would like to know what you think about the structure of the project, the API, testing, the documentation and obviously the implementation.



The goal of this library is to offer a flexible tool to perform generative-testing/property-based testing for software that accepts CSV files as an input.



It does so by leveraging hypothesis default strategies. The idea is to generate data tuples that are then converted to CSV files in different dialects. At the same time I follow the same approach of hypothesis giving complex customization options to generate specific subsets of CSV files.



You can find the whole project here: https://github.com/chobeat/hypothesis-csv



Feel free to comment on any of these aspects.



It's the first time I post here and I'm not sure if this is the right approach (big reviews are usually bad reviews) so I will also submit some chunks of code for a more direct and focused review.



These are the two main files:



_data_rows.py



import functools
import string

from hypothesis.errors import InvalidArgument
from hypothesis.strategies import composite, integers, sampled_from, floats, text
from multimethod import overload

from hypothesis_csv.type_utils import *

valid_column_types = [integers, floats,
functools.partial(text, min_size=1, max_size=10,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits)]


@overload
def get_columns(draw, columns):
raise InvalidArgument("Columns parameter must either be an integer or a list of strategies")


@overload
def get_columns(draw, columns: isa(collections.Iterable)):
return columns


@overload
def get_columns(draw, columns: isa(int)):
columns = [draw(sampled_from(valid_column_types))() for _ in range(columns)]
return columns


@overload
def get_columns(draw, columns: is_none):
return get_columns(draw, draw(integers(min_value=1, max_value=10)))


@overload
def get_lines_num(draw, lines_param):
raise InvalidArgument("Lines param must be an integer or None")


@overload
def get_lines_num(draw, lines_param: is_none):
return draw(integers(min_value=1, max_value=100))


@overload
def get_lines_num(draw, lines_param: isa(int)):
return lines_param


@composite
def data_rows(draw, lines=None, columns=None):
"""
Strategy to produce a list of data tuples
:param draw:
:param lines: number of data tuples to be produced. If not specified, it will be drawn randomly
in a range between 1 and 100
:param columns: int or List. If an int is provided, it will define the size of the data tuple.
Its data types will be drawn randomly.
If a List of strategies is provided, the elements for each tuple will be drawn from these strategies. They will
be returned in the same order of the list here provided.
:return: a list of data tuples
"""
lines_num = get_lines_num(draw, lines)
columns = get_columns(draw, columns)
rows = [tuple(draw(column) for column in columns) for _ in range(lines_num)]
return rows


_csv.py



from hypothesis.strategies import lists
from meza.convert import records2csv

from hypothesis_csv._data_rows import *
from hypothesis_csv.type_utils import *


def draw_header(draw, header_len):
return draw(lists(text(min_size=1,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits),
min_size=header_len, max_size=header_len, unique=True))


@overload
def _get_header_and_column_types(draw, header, columns):
raise InvalidArgument("Header or column are of invalid type")


@overload
def _get_header_and_column_types(draw, header: is_seq, columns: is_seq):
if len(header) == len(columns):
return header, columns
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_seq,
columns: is_none):
return header, len(header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_none):
columns = draw(integers(min_value=1, max_value=10))
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: isa(int)):
if header == columns:

header_fields = draw_header(draw, header)

return header_fields, len(header_fields)
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_none, columns: isa(int)):
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: is_none):
return _get_header_and_column_types(draw, header, header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_seq):
return None, columns


@composite
def csv(draw, header=None, columns=None, lines=None):
"""
Strategy to produce a CSV string. Uses `data_rows` strategy to generate the values. Refer to the `data_rows`
strategy for more details about the `columns` and `lines` parameter.

:param draw:
:param header: if a list of strings, these will be used as the header for each column, according to their position.
If an int, this parameter will define the number of columns to be used. If None, the produced CSV will have no
header
:param columns: If a list of strategies, these will be used to draw the values for each column.
If an int, this parameter will define the number of columns to be used. If not provided the number of columns will
be drawn randomly or the `header` param will be used.
:param lines: number of rows in the CSV.
:return: a string in CSV format
"""
header_param, columns = _get_header_and_column_types(draw, header, columns)
rows = list(draw(data_rows(lines=lines, columns=columns)))
header = header_param or ["col_".format(i) for i in range(len(rows[0]))] # placeholder header for meza records

data = [dict(zip(header, d)) for d in rows]

return records2csv(data, skip_header=header_param is None).getvalue()


My main concern is on the way I've handled parameter overloading: I wanted to accept both int and list for two optional parameters and instead of going through the usual jungle of if else that you see in many projects (sklearn, pandas and so on), I've tried to decompose the problem using multimethod library. I've never seen an example of this approach so I'm not sure I'm handling it the right way but I'm satisfied by the end result since it's very readable. A bit verbose if you ask me but still better than handling it with if-else.







share|improve this question





















  • Your code looks extremely rigid. If you had to change your code to also accept a dictionary, how many lines would you have to change? Hint: adding more overloaded signatures is not a solution.
    – C. Harley
    Aug 2 at 3:21










  • I know, I'm well aware of it but that is the external API that should change slowly. I'm already accepting 2 parameters of 3 types (int, list, None) and it's already a lot. Adding a dict would create too many combinations and I would avoid it in the first place, because it would be bad API design. What's the alternative though? If I do it with if-else, I have the same number of cases I have with overloaded signatures.
    – Chobeat
    Aug 2 at 7:01










  • Do you know about interfaces? You could make an interface as the initial receiver, and then fan out the data to the particular class depending on what type the data is. Then you can add extra data classes to your heart's content, without changing the signature of the receiver on the API. The "Adapter" pattern.
    – C. Harley
    Aug 2 at 14:29










  • yes but the "fan out to the particular class depending on what type the data is" in python means a bunch of if else statement where we explicitely check the type of the passed arguments. So the problem is still there and possibly is even more verbose than the current solution. The tests are the same but there's a proliferation of classes like ColumnsIntHeaderIntAdapter, ColumnsIntHeaderListAdapter and so on.
    – Chobeat
    Aug 2 at 16:40










  • Yes, sometimes developers have to throw away code when they realise they've gone down a long, dark, winding road that leads them into the depths of code hell. If your project was perfect you wouldn't be posting on codereview for assistance, would that be a fair comment to make? After all, if a bunch of if statements is going to wreck your code, there's not much anyone can do. You can either rewrite, or continue with even more rigid and inflexible code. Don't get me wrong - I've reviewed your project on github and see the potential, but as I said, if a client wants a dictionary and more...?
    – C. Harley
    Aug 2 at 22:46
















up vote
2
down vote

favorite












I've just finished the first iteration of my new project. It's the first time I publish Python code and I would like to know what you think about the structure of the project, the API, testing, the documentation and obviously the implementation.



The goal of this library is to offer a flexible tool to perform generative-testing/property-based testing for software that accepts CSV files as an input.



It does so by leveraging hypothesis default strategies. The idea is to generate data tuples that are then converted to CSV files in different dialects. At the same time I follow the same approach of hypothesis giving complex customization options to generate specific subsets of CSV files.



You can find the whole project here: https://github.com/chobeat/hypothesis-csv



Feel free to comment on any of these aspects.



It's the first time I post here and I'm not sure if this is the right approach (big reviews are usually bad reviews) so I will also submit some chunks of code for a more direct and focused review.



These are the two main files:



_data_rows.py



import functools
import string

from hypothesis.errors import InvalidArgument
from hypothesis.strategies import composite, integers, sampled_from, floats, text
from multimethod import overload

from hypothesis_csv.type_utils import *

valid_column_types = [integers, floats,
functools.partial(text, min_size=1, max_size=10,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits)]


@overload
def get_columns(draw, columns):
raise InvalidArgument("Columns parameter must either be an integer or a list of strategies")


@overload
def get_columns(draw, columns: isa(collections.Iterable)):
return columns


@overload
def get_columns(draw, columns: isa(int)):
columns = [draw(sampled_from(valid_column_types))() for _ in range(columns)]
return columns


@overload
def get_columns(draw, columns: is_none):
return get_columns(draw, draw(integers(min_value=1, max_value=10)))


@overload
def get_lines_num(draw, lines_param):
raise InvalidArgument("Lines param must be an integer or None")


@overload
def get_lines_num(draw, lines_param: is_none):
return draw(integers(min_value=1, max_value=100))


@overload
def get_lines_num(draw, lines_param: isa(int)):
return lines_param


@composite
def data_rows(draw, lines=None, columns=None):
"""
Strategy to produce a list of data tuples
:param draw:
:param lines: number of data tuples to be produced. If not specified, it will be drawn randomly
in a range between 1 and 100
:param columns: int or List. If an int is provided, it will define the size of the data tuple.
Its data types will be drawn randomly.
If a List of strategies is provided, the elements for each tuple will be drawn from these strategies. They will
be returned in the same order of the list here provided.
:return: a list of data tuples
"""
lines_num = get_lines_num(draw, lines)
columns = get_columns(draw, columns)
rows = [tuple(draw(column) for column in columns) for _ in range(lines_num)]
return rows


_csv.py



from hypothesis.strategies import lists
from meza.convert import records2csv

from hypothesis_csv._data_rows import *
from hypothesis_csv.type_utils import *


def draw_header(draw, header_len):
return draw(lists(text(min_size=1,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits),
min_size=header_len, max_size=header_len, unique=True))


@overload
def _get_header_and_column_types(draw, header, columns):
raise InvalidArgument("Header or column are of invalid type")


@overload
def _get_header_and_column_types(draw, header: is_seq, columns: is_seq):
if len(header) == len(columns):
return header, columns
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_seq,
columns: is_none):
return header, len(header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_none):
columns = draw(integers(min_value=1, max_value=10))
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: isa(int)):
if header == columns:

header_fields = draw_header(draw, header)

return header_fields, len(header_fields)
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_none, columns: isa(int)):
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: is_none):
return _get_header_and_column_types(draw, header, header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_seq):
return None, columns


@composite
def csv(draw, header=None, columns=None, lines=None):
"""
Strategy to produce a CSV string. Uses `data_rows` strategy to generate the values. Refer to the `data_rows`
strategy for more details about the `columns` and `lines` parameter.

:param draw:
:param header: if a list of strings, these will be used as the header for each column, according to their position.
If an int, this parameter will define the number of columns to be used. If None, the produced CSV will have no
header
:param columns: If a list of strategies, these will be used to draw the values for each column.
If an int, this parameter will define the number of columns to be used. If not provided the number of columns will
be drawn randomly or the `header` param will be used.
:param lines: number of rows in the CSV.
:return: a string in CSV format
"""
header_param, columns = _get_header_and_column_types(draw, header, columns)
rows = list(draw(data_rows(lines=lines, columns=columns)))
header = header_param or ["col_".format(i) for i in range(len(rows[0]))] # placeholder header for meza records

data = [dict(zip(header, d)) for d in rows]

return records2csv(data, skip_header=header_param is None).getvalue()


My main concern is on the way I've handled parameter overloading: I wanted to accept both int and list for two optional parameters and instead of going through the usual jungle of if else that you see in many projects (sklearn, pandas and so on), I've tried to decompose the problem using multimethod library. I've never seen an example of this approach so I'm not sure I'm handling it the right way but I'm satisfied by the end result since it's very readable. A bit verbose if you ask me but still better than handling it with if-else.







share|improve this question





















  • Your code looks extremely rigid. If you had to change your code to also accept a dictionary, how many lines would you have to change? Hint: adding more overloaded signatures is not a solution.
    – C. Harley
    Aug 2 at 3:21










  • I know, I'm well aware of it but that is the external API that should change slowly. I'm already accepting 2 parameters of 3 types (int, list, None) and it's already a lot. Adding a dict would create too many combinations and I would avoid it in the first place, because it would be bad API design. What's the alternative though? If I do it with if-else, I have the same number of cases I have with overloaded signatures.
    – Chobeat
    Aug 2 at 7:01










  • Do you know about interfaces? You could make an interface as the initial receiver, and then fan out the data to the particular class depending on what type the data is. Then you can add extra data classes to your heart's content, without changing the signature of the receiver on the API. The "Adapter" pattern.
    – C. Harley
    Aug 2 at 14:29










  • yes but the "fan out to the particular class depending on what type the data is" in python means a bunch of if else statement where we explicitely check the type of the passed arguments. So the problem is still there and possibly is even more verbose than the current solution. The tests are the same but there's a proliferation of classes like ColumnsIntHeaderIntAdapter, ColumnsIntHeaderListAdapter and so on.
    – Chobeat
    Aug 2 at 16:40










  • Yes, sometimes developers have to throw away code when they realise they've gone down a long, dark, winding road that leads them into the depths of code hell. If your project was perfect you wouldn't be posting on codereview for assistance, would that be a fair comment to make? After all, if a bunch of if statements is going to wreck your code, there's not much anyone can do. You can either rewrite, or continue with even more rigid and inflexible code. Don't get me wrong - I've reviewed your project on github and see the potential, but as I said, if a client wants a dictionary and more...?
    – C. Harley
    Aug 2 at 22:46












up vote
2
down vote

favorite









up vote
2
down vote

favorite











I've just finished the first iteration of my new project. It's the first time I publish Python code and I would like to know what you think about the structure of the project, the API, testing, the documentation and obviously the implementation.



The goal of this library is to offer a flexible tool to perform generative-testing/property-based testing for software that accepts CSV files as an input.



It does so by leveraging hypothesis default strategies. The idea is to generate data tuples that are then converted to CSV files in different dialects. At the same time I follow the same approach of hypothesis giving complex customization options to generate specific subsets of CSV files.



You can find the whole project here: https://github.com/chobeat/hypothesis-csv



Feel free to comment on any of these aspects.



It's the first time I post here and I'm not sure if this is the right approach (big reviews are usually bad reviews) so I will also submit some chunks of code for a more direct and focused review.



These are the two main files:



_data_rows.py



import functools
import string

from hypothesis.errors import InvalidArgument
from hypothesis.strategies import composite, integers, sampled_from, floats, text
from multimethod import overload

from hypothesis_csv.type_utils import *

valid_column_types = [integers, floats,
functools.partial(text, min_size=1, max_size=10,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits)]


@overload
def get_columns(draw, columns):
raise InvalidArgument("Columns parameter must either be an integer or a list of strategies")


@overload
def get_columns(draw, columns: isa(collections.Iterable)):
return columns


@overload
def get_columns(draw, columns: isa(int)):
columns = [draw(sampled_from(valid_column_types))() for _ in range(columns)]
return columns


@overload
def get_columns(draw, columns: is_none):
return get_columns(draw, draw(integers(min_value=1, max_value=10)))


@overload
def get_lines_num(draw, lines_param):
raise InvalidArgument("Lines param must be an integer or None")


@overload
def get_lines_num(draw, lines_param: is_none):
return draw(integers(min_value=1, max_value=100))


@overload
def get_lines_num(draw, lines_param: isa(int)):
return lines_param


@composite
def data_rows(draw, lines=None, columns=None):
"""
Strategy to produce a list of data tuples
:param draw:
:param lines: number of data tuples to be produced. If not specified, it will be drawn randomly
in a range between 1 and 100
:param columns: int or List. If an int is provided, it will define the size of the data tuple.
Its data types will be drawn randomly.
If a List of strategies is provided, the elements for each tuple will be drawn from these strategies. They will
be returned in the same order of the list here provided.
:return: a list of data tuples
"""
lines_num = get_lines_num(draw, lines)
columns = get_columns(draw, columns)
rows = [tuple(draw(column) for column in columns) for _ in range(lines_num)]
return rows


_csv.py



from hypothesis.strategies import lists
from meza.convert import records2csv

from hypothesis_csv._data_rows import *
from hypothesis_csv.type_utils import *


def draw_header(draw, header_len):
return draw(lists(text(min_size=1,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits),
min_size=header_len, max_size=header_len, unique=True))


@overload
def _get_header_and_column_types(draw, header, columns):
raise InvalidArgument("Header or column are of invalid type")


@overload
def _get_header_and_column_types(draw, header: is_seq, columns: is_seq):
if len(header) == len(columns):
return header, columns
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_seq,
columns: is_none):
return header, len(header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_none):
columns = draw(integers(min_value=1, max_value=10))
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: isa(int)):
if header == columns:

header_fields = draw_header(draw, header)

return header_fields, len(header_fields)
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_none, columns: isa(int)):
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: is_none):
return _get_header_and_column_types(draw, header, header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_seq):
return None, columns


@composite
def csv(draw, header=None, columns=None, lines=None):
"""
Strategy to produce a CSV string. Uses `data_rows` strategy to generate the values. Refer to the `data_rows`
strategy for more details about the `columns` and `lines` parameter.

:param draw:
:param header: if a list of strings, these will be used as the header for each column, according to their position.
If an int, this parameter will define the number of columns to be used. If None, the produced CSV will have no
header
:param columns: If a list of strategies, these will be used to draw the values for each column.
If an int, this parameter will define the number of columns to be used. If not provided the number of columns will
be drawn randomly or the `header` param will be used.
:param lines: number of rows in the CSV.
:return: a string in CSV format
"""
header_param, columns = _get_header_and_column_types(draw, header, columns)
rows = list(draw(data_rows(lines=lines, columns=columns)))
header = header_param or ["col_".format(i) for i in range(len(rows[0]))] # placeholder header for meza records

data = [dict(zip(header, d)) for d in rows]

return records2csv(data, skip_header=header_param is None).getvalue()


My main concern is on the way I've handled parameter overloading: I wanted to accept both int and list for two optional parameters and instead of going through the usual jungle of if else that you see in many projects (sklearn, pandas and so on), I've tried to decompose the problem using multimethod library. I've never seen an example of this approach so I'm not sure I'm handling it the right way but I'm satisfied by the end result since it's very readable. A bit verbose if you ask me but still better than handling it with if-else.







share|improve this question













I've just finished the first iteration of my new project. It's the first time I publish Python code and I would like to know what you think about the structure of the project, the API, testing, the documentation and obviously the implementation.



The goal of this library is to offer a flexible tool to perform generative-testing/property-based testing for software that accepts CSV files as an input.



It does so by leveraging hypothesis default strategies. The idea is to generate data tuples that are then converted to CSV files in different dialects. At the same time I follow the same approach of hypothesis giving complex customization options to generate specific subsets of CSV files.



You can find the whole project here: https://github.com/chobeat/hypothesis-csv



Feel free to comment on any of these aspects.



It's the first time I post here and I'm not sure if this is the right approach (big reviews are usually bad reviews) so I will also submit some chunks of code for a more direct and focused review.



These are the two main files:



_data_rows.py



import functools
import string

from hypothesis.errors import InvalidArgument
from hypothesis.strategies import composite, integers, sampled_from, floats, text
from multimethod import overload

from hypothesis_csv.type_utils import *

valid_column_types = [integers, floats,
functools.partial(text, min_size=1, max_size=10,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits)]


@overload
def get_columns(draw, columns):
raise InvalidArgument("Columns parameter must either be an integer or a list of strategies")


@overload
def get_columns(draw, columns: isa(collections.Iterable)):
return columns


@overload
def get_columns(draw, columns: isa(int)):
columns = [draw(sampled_from(valid_column_types))() for _ in range(columns)]
return columns


@overload
def get_columns(draw, columns: is_none):
return get_columns(draw, draw(integers(min_value=1, max_value=10)))


@overload
def get_lines_num(draw, lines_param):
raise InvalidArgument("Lines param must be an integer or None")


@overload
def get_lines_num(draw, lines_param: is_none):
return draw(integers(min_value=1, max_value=100))


@overload
def get_lines_num(draw, lines_param: isa(int)):
return lines_param


@composite
def data_rows(draw, lines=None, columns=None):
"""
Strategy to produce a list of data tuples
:param draw:
:param lines: number of data tuples to be produced. If not specified, it will be drawn randomly
in a range between 1 and 100
:param columns: int or List. If an int is provided, it will define the size of the data tuple.
Its data types will be drawn randomly.
If a List of strategies is provided, the elements for each tuple will be drawn from these strategies. They will
be returned in the same order of the list here provided.
:return: a list of data tuples
"""
lines_num = get_lines_num(draw, lines)
columns = get_columns(draw, columns)
rows = [tuple(draw(column) for column in columns) for _ in range(lines_num)]
return rows


_csv.py



from hypothesis.strategies import lists
from meza.convert import records2csv

from hypothesis_csv._data_rows import *
from hypothesis_csv.type_utils import *


def draw_header(draw, header_len):
return draw(lists(text(min_size=1,
alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits),
min_size=header_len, max_size=header_len, unique=True))


@overload
def _get_header_and_column_types(draw, header, columns):
raise InvalidArgument("Header or column are of invalid type")


@overload
def _get_header_and_column_types(draw, header: is_seq, columns: is_seq):
if len(header) == len(columns):
return header, columns
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_seq,
columns: is_none):
return header, len(header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_none):
columns = draw(integers(min_value=1, max_value=10))
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: isa(int)):
if header == columns:

header_fields = draw_header(draw, header)

return header_fields, len(header_fields)
else:
raise InvalidArgument("Header and columns must be of the same size")


@overload
def _get_header_and_column_types(draw, header: is_none, columns: isa(int)):
return None, columns


@overload
def _get_header_and_column_types(draw, header: isa(int), columns: is_none):
return _get_header_and_column_types(draw, header, header)


@overload
def _get_header_and_column_types(draw, header: is_none, columns: is_seq):
return None, columns


@composite
def csv(draw, header=None, columns=None, lines=None):
"""
Strategy to produce a CSV string. Uses `data_rows` strategy to generate the values. Refer to the `data_rows`
strategy for more details about the `columns` and `lines` parameter.

:param draw:
:param header: if a list of strings, these will be used as the header for each column, according to their position.
If an int, this parameter will define the number of columns to be used. If None, the produced CSV will have no
header
:param columns: If a list of strategies, these will be used to draw the values for each column.
If an int, this parameter will define the number of columns to be used. If not provided the number of columns will
be drawn randomly or the `header` param will be used.
:param lines: number of rows in the CSV.
:return: a string in CSV format
"""
header_param, columns = _get_header_and_column_types(draw, header, columns)
rows = list(draw(data_rows(lines=lines, columns=columns)))
header = header_param or ["col_".format(i) for i in range(len(rows[0]))] # placeholder header for meza records

data = [dict(zip(header, d)) for d in rows]

return records2csv(data, skip_header=header_param is None).getvalue()


My main concern is on the way I've handled parameter overloading: I wanted to accept both int and list for two optional parameters and instead of going through the usual jungle of if else that you see in many projects (sklearn, pandas and so on), I've tried to decompose the problem using multimethod library. I've never seen an example of this approach so I'm not sure I'm handling it the right way but I'm satisfied by the end result since it's very readable. A bit verbose if you ask me but still better than handling it with if-else.









share|improve this question












share|improve this question




share|improve this question








edited Aug 1 at 10:47
























asked Aug 1 at 8:02









Chobeat

1113




1113











  • Your code looks extremely rigid. If you had to change your code to also accept a dictionary, how many lines would you have to change? Hint: adding more overloaded signatures is not a solution.
    – C. Harley
    Aug 2 at 3:21










  • I know, I'm well aware of it but that is the external API that should change slowly. I'm already accepting 2 parameters of 3 types (int, list, None) and it's already a lot. Adding a dict would create too many combinations and I would avoid it in the first place, because it would be bad API design. What's the alternative though? If I do it with if-else, I have the same number of cases I have with overloaded signatures.
    – Chobeat
    Aug 2 at 7:01










  • Do you know about interfaces? You could make an interface as the initial receiver, and then fan out the data to the particular class depending on what type the data is. Then you can add extra data classes to your heart's content, without changing the signature of the receiver on the API. The "Adapter" pattern.
    – C. Harley
    Aug 2 at 14:29










  • yes but the "fan out to the particular class depending on what type the data is" in python means a bunch of if else statement where we explicitely check the type of the passed arguments. So the problem is still there and possibly is even more verbose than the current solution. The tests are the same but there's a proliferation of classes like ColumnsIntHeaderIntAdapter, ColumnsIntHeaderListAdapter and so on.
    – Chobeat
    Aug 2 at 16:40










  • Yes, sometimes developers have to throw away code when they realise they've gone down a long, dark, winding road that leads them into the depths of code hell. If your project was perfect you wouldn't be posting on codereview for assistance, would that be a fair comment to make? After all, if a bunch of if statements is going to wreck your code, there's not much anyone can do. You can either rewrite, or continue with even more rigid and inflexible code. Don't get me wrong - I've reviewed your project on github and see the potential, but as I said, if a client wants a dictionary and more...?
    – C. Harley
    Aug 2 at 22:46
















  • Your code looks extremely rigid. If you had to change your code to also accept a dictionary, how many lines would you have to change? Hint: adding more overloaded signatures is not a solution.
    – C. Harley
    Aug 2 at 3:21










  • I know, I'm well aware of it but that is the external API that should change slowly. I'm already accepting 2 parameters of 3 types (int, list, None) and it's already a lot. Adding a dict would create too many combinations and I would avoid it in the first place, because it would be bad API design. What's the alternative though? If I do it with if-else, I have the same number of cases I have with overloaded signatures.
    – Chobeat
    Aug 2 at 7:01










  • Do you know about interfaces? You could make an interface as the initial receiver, and then fan out the data to the particular class depending on what type the data is. Then you can add extra data classes to your heart's content, without changing the signature of the receiver on the API. The "Adapter" pattern.
    – C. Harley
    Aug 2 at 14:29










  • yes but the "fan out to the particular class depending on what type the data is" in python means a bunch of if else statement where we explicitely check the type of the passed arguments. So the problem is still there and possibly is even more verbose than the current solution. The tests are the same but there's a proliferation of classes like ColumnsIntHeaderIntAdapter, ColumnsIntHeaderListAdapter and so on.
    – Chobeat
    Aug 2 at 16:40










  • Yes, sometimes developers have to throw away code when they realise they've gone down a long, dark, winding road that leads them into the depths of code hell. If your project was perfect you wouldn't be posting on codereview for assistance, would that be a fair comment to make? After all, if a bunch of if statements is going to wreck your code, there's not much anyone can do. You can either rewrite, or continue with even more rigid and inflexible code. Don't get me wrong - I've reviewed your project on github and see the potential, but as I said, if a client wants a dictionary and more...?
    – C. Harley
    Aug 2 at 22:46















Your code looks extremely rigid. If you had to change your code to also accept a dictionary, how many lines would you have to change? Hint: adding more overloaded signatures is not a solution.
– C. Harley
Aug 2 at 3:21




Your code looks extremely rigid. If you had to change your code to also accept a dictionary, how many lines would you have to change? Hint: adding more overloaded signatures is not a solution.
– C. Harley
Aug 2 at 3:21












I know, I'm well aware of it but that is the external API that should change slowly. I'm already accepting 2 parameters of 3 types (int, list, None) and it's already a lot. Adding a dict would create too many combinations and I would avoid it in the first place, because it would be bad API design. What's the alternative though? If I do it with if-else, I have the same number of cases I have with overloaded signatures.
– Chobeat
Aug 2 at 7:01




I know, I'm well aware of it but that is the external API that should change slowly. I'm already accepting 2 parameters of 3 types (int, list, None) and it's already a lot. Adding a dict would create too many combinations and I would avoid it in the first place, because it would be bad API design. What's the alternative though? If I do it with if-else, I have the same number of cases I have with overloaded signatures.
– Chobeat
Aug 2 at 7:01












Do you know about interfaces? You could make an interface as the initial receiver, and then fan out the data to the particular class depending on what type the data is. Then you can add extra data classes to your heart's content, without changing the signature of the receiver on the API. The "Adapter" pattern.
– C. Harley
Aug 2 at 14:29




Do you know about interfaces? You could make an interface as the initial receiver, and then fan out the data to the particular class depending on what type the data is. Then you can add extra data classes to your heart's content, without changing the signature of the receiver on the API. The "Adapter" pattern.
– C. Harley
Aug 2 at 14:29












yes but the "fan out to the particular class depending on what type the data is" in python means a bunch of if else statement where we explicitely check the type of the passed arguments. So the problem is still there and possibly is even more verbose than the current solution. The tests are the same but there's a proliferation of classes like ColumnsIntHeaderIntAdapter, ColumnsIntHeaderListAdapter and so on.
– Chobeat
Aug 2 at 16:40




yes but the "fan out to the particular class depending on what type the data is" in python means a bunch of if else statement where we explicitely check the type of the passed arguments. So the problem is still there and possibly is even more verbose than the current solution. The tests are the same but there's a proliferation of classes like ColumnsIntHeaderIntAdapter, ColumnsIntHeaderListAdapter and so on.
– Chobeat
Aug 2 at 16:40












Yes, sometimes developers have to throw away code when they realise they've gone down a long, dark, winding road that leads them into the depths of code hell. If your project was perfect you wouldn't be posting on codereview for assistance, would that be a fair comment to make? After all, if a bunch of if statements is going to wreck your code, there's not much anyone can do. You can either rewrite, or continue with even more rigid and inflexible code. Don't get me wrong - I've reviewed your project on github and see the potential, but as I said, if a client wants a dictionary and more...?
– C. Harley
Aug 2 at 22:46




Yes, sometimes developers have to throw away code when they realise they've gone down a long, dark, winding road that leads them into the depths of code hell. If your project was perfect you wouldn't be posting on codereview for assistance, would that be a fair comment to make? After all, if a bunch of if statements is going to wreck your code, there's not much anyone can do. You can either rewrite, or continue with even more rigid and inflexible code. Don't get me wrong - I've reviewed your project on github and see the potential, but as I said, if a client wants a dictionary and more...?
– C. Harley
Aug 2 at 22:46















active

oldest

votes











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f200722%2fgenerative-testing-for-csv-using-hypothesis-project-api-implementation%23new-answer', 'question_page');

);

Post as a guest



































active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes










 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f200722%2fgenerative-testing-for-csv-using-hypothesis-project-api-implementation%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Greedy Best First Search implementation in Rust

Function to Return a JSON Like Objects Using VBA Collections and Arrays

C++11 CLH Lock Implementation