Python program to transform an Excel document, with some parsing

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
5
down vote

favorite

I wrote this to parse through an Excel doc, extract desired columns, parse certain cells and output another formatted Excel doc.

This is the original Excel doc I have condensed most of the columns for space

excelsnip

Here are the the results of the program that I am mostly happy with, it parses the name column and writes a unit identifier(i.e 83) along with a tower identifier(i.e 5) on a new line along with a completed date if the completed column is TRUE. I think the only thing I would change would be getting rid of the blank line under the row that has H 7

resultsnip

And here is the code

import xlrd
import xlwt
import re

# open and create workbook
workbookread = xlrd.open_workbook('seedtestexcelbytask.xls')
workbookwrite = xlwt.Workbook('output.xlsx')

# open and create sheets
worksheetread = workbookread.sheet_by_index(0)
worksheetwrite = workbookwrite.add_sheet('Sheet1')

timelist = 
towerlist = 

i = 0
l = 0
t = 0

# iterate through rows in worksheet, get columns completed, completed_at and name.
# add to timelist and towerlist if completed == true
for rows in range(worksheetread.nrows):
 cell0 = worksheetread.cell(rows, 2).value
 cell1 = worksheetread.cell(rows, 3).value
 cell2 = worksheetread.cell(rows, 15).value

 if cell0 == 1:
 parsedvalue = re.split(', |/', cell2)
 timelist.append(cell1)
 towerlist.append(parsedvalue)

b = 2
g = 1
for rows in range(len(timelist)):
 row = worksheetwrite.row(i)

# get current index from timelist and towerlist and assign to currenttime and currentvalue

 currenttime = timelist[t]
 currentname = towerlist[t]

# get the length of the current index this will help us decide how to parse
 length = len(towerlist[t])

# if length <= 2 the index is not formatted properly so we only need index 1 which is the cafe and towers together
 if length <= 2:
 row.write(0, currenttime)
 row.write(1, currentname[1])
 i += 1
 l += 1

# if length = 4 index is formatted properly and we just grab the columns we need
 elif length == 4:
 a = 2

 for values in range(0, 2):
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentname[1])
 row.write(2, currentname[a])
 a += 1
 i += 1
 l += 1

# if length > 4 we need to iterate through each index and write them
 else:

 # define the length of the current cell of the excel sheet we have
 lengthd = (len(currentname)+1)/2
 length = int(lengthd)

 for values in range(length):
 # set variables that we will need to hold on to
 currentcafe = currentname[g]
 currenttower = currentname[b]

 # if the value of the current tower is 2,4,5,6,7 we need to write the current cafe and current tower
 # then move on to the next value and set that as the current cafe, we also need to jump to the
 # value after the current cafe and set that as the current tower
 if currenttower == "2" or currenttower == "4" or currenttower == "5" or currenttower == "6" or currenttower == "7":
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)

 b += 1
 i += 1
 g = b
 b = g + 1
 # otherwise we can just write the value fo the current cafe and current tower, advance to the next value
 # which is another tower and set current tower to that
 else:
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)
 b += 1
 i += 1

 l += 1
 i += 1
 t += 1
workbookwrite.save('output.xls')

This is the first time I have written anything with Python and I am fairly new to programming, any comments would be greatly appreciated.

edited Jun 8 at 14:26

asked Jun 7 at 20:13

azryel6500

263

2

Could you expand the column width to fully show the "notes" column? Also, could you summarize what this transformation is for?
â€“Â 200_success
Jun 7 at 20:55

I updated the the pic to better show relevant columns
â€“Â azryel6500
Jun 8 at 14:27

check out pandas
â€“Â Maarten FabrÃ©
Jun 8 at 14:30

add a commentÂ |Â

up vote
5
down vote

favorite

I wrote this to parse through an Excel doc, extract desired columns, parse certain cells and output another formatted Excel doc.

This is the original Excel doc I have condensed most of the columns for space

excelsnip

resultsnip

And here is the code

import xlrd
import xlwt
import re

# open and create workbook
workbookread = xlrd.open_workbook('seedtestexcelbytask.xls')
workbookwrite = xlwt.Workbook('output.xlsx')

# open and create sheets
worksheetread = workbookread.sheet_by_index(0)
worksheetwrite = workbookwrite.add_sheet('Sheet1')

timelist = 
towerlist = 

i = 0
l = 0
t = 0

# iterate through rows in worksheet, get columns completed, completed_at and name.
# add to timelist and towerlist if completed == true
for rows in range(worksheetread.nrows):
 cell0 = worksheetread.cell(rows, 2).value
 cell1 = worksheetread.cell(rows, 3).value
 cell2 = worksheetread.cell(rows, 15).value

 if cell0 == 1:
 parsedvalue = re.split(', |/', cell2)
 timelist.append(cell1)
 towerlist.append(parsedvalue)

b = 2
g = 1
for rows in range(len(timelist)):
 row = worksheetwrite.row(i)

# get current index from timelist and towerlist and assign to currenttime and currentvalue

 currenttime = timelist[t]
 currentname = towerlist[t]

# get the length of the current index this will help us decide how to parse
 length = len(towerlist[t])

# if length <= 2 the index is not formatted properly so we only need index 1 which is the cafe and towers together
 if length <= 2:
 row.write(0, currenttime)
 row.write(1, currentname[1])
 i += 1
 l += 1

# if length = 4 index is formatted properly and we just grab the columns we need
 elif length == 4:
 a = 2

 for values in range(0, 2):
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentname[1])
 row.write(2, currentname[a])
 a += 1
 i += 1
 l += 1

# if length > 4 we need to iterate through each index and write them
 else:

 # define the length of the current cell of the excel sheet we have
 lengthd = (len(currentname)+1)/2
 length = int(lengthd)

 for values in range(length):
 # set variables that we will need to hold on to
 currentcafe = currentname[g]
 currenttower = currentname[b]

 # if the value of the current tower is 2,4,5,6,7 we need to write the current cafe and current tower
 # then move on to the next value and set that as the current cafe, we also need to jump to the
 # value after the current cafe and set that as the current tower
 if currenttower == "2" or currenttower == "4" or currenttower == "5" or currenttower == "6" or currenttower == "7":
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)

 b += 1
 i += 1
 g = b
 b = g + 1
 # otherwise we can just write the value fo the current cafe and current tower, advance to the next value
 # which is another tower and set current tower to that
 else:
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)
 b += 1
 i += 1

 l += 1
 i += 1
 t += 1
workbookwrite.save('output.xls')

This is the first time I have written anything with Python and I am fairly new to programming, any comments would be greatly appreciated.

edited Jun 8 at 14:26

asked Jun 7 at 20:13

azryel6500

263

2

Could you expand the column width to fully show the "notes" column? Also, could you summarize what this transformation is for?
â€“Â 200_success
Jun 7 at 20:55

I updated the the pic to better show relevant columns
â€“Â azryel6500
Jun 8 at 14:27

check out pandas
â€“Â Maarten FabrÃ©
Jun 8 at 14:30

add a commentÂ |Â

up vote
5
down vote

favorite

I wrote this to parse through an Excel doc, extract desired columns, parse certain cells and output another formatted Excel doc.

This is the original Excel doc I have condensed most of the columns for space

excelsnip

resultsnip

And here is the code

import xlrd
import xlwt
import re

# open and create workbook
workbookread = xlrd.open_workbook('seedtestexcelbytask.xls')
workbookwrite = xlwt.Workbook('output.xlsx')

# open and create sheets
worksheetread = workbookread.sheet_by_index(0)
worksheetwrite = workbookwrite.add_sheet('Sheet1')

timelist = 
towerlist = 

i = 0
l = 0
t = 0

# iterate through rows in worksheet, get columns completed, completed_at and name.
# add to timelist and towerlist if completed == true
for rows in range(worksheetread.nrows):
 cell0 = worksheetread.cell(rows, 2).value
 cell1 = worksheetread.cell(rows, 3).value
 cell2 = worksheetread.cell(rows, 15).value

 if cell0 == 1:
 parsedvalue = re.split(', |/', cell2)
 timelist.append(cell1)
 towerlist.append(parsedvalue)

b = 2
g = 1
for rows in range(len(timelist)):
 row = worksheetwrite.row(i)

# get current index from timelist and towerlist and assign to currenttime and currentvalue

 currenttime = timelist[t]
 currentname = towerlist[t]

# get the length of the current index this will help us decide how to parse
 length = len(towerlist[t])

# if length <= 2 the index is not formatted properly so we only need index 1 which is the cafe and towers together
 if length <= 2:
 row.write(0, currenttime)
 row.write(1, currentname[1])
 i += 1
 l += 1

# if length = 4 index is formatted properly and we just grab the columns we need
 elif length == 4:
 a = 2

 for values in range(0, 2):
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentname[1])
 row.write(2, currentname[a])
 a += 1
 i += 1
 l += 1

# if length > 4 we need to iterate through each index and write them
 else:

 # define the length of the current cell of the excel sheet we have
 lengthd = (len(currentname)+1)/2
 length = int(lengthd)

 for values in range(length):
 # set variables that we will need to hold on to
 currentcafe = currentname[g]
 currenttower = currentname[b]

 # if the value of the current tower is 2,4,5,6,7 we need to write the current cafe and current tower
 # then move on to the next value and set that as the current cafe, we also need to jump to the
 # value after the current cafe and set that as the current tower
 if currenttower == "2" or currenttower == "4" or currenttower == "5" or currenttower == "6" or currenttower == "7":
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)

 b += 1
 i += 1
 g = b
 b = g + 1
 # otherwise we can just write the value fo the current cafe and current tower, advance to the next value
 # which is another tower and set current tower to that
 else:
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)
 b += 1
 i += 1

 l += 1
 i += 1
 t += 1
workbookwrite.save('output.xls')

This is the first time I have written anything with Python and I am fairly new to programming, any comments would be greatly appreciated.

edited Jun 8 at 14:26

asked Jun 7 at 20:13

azryel6500

263

I wrote this to parse through an Excel doc, extract desired columns, parse certain cells and output another formatted Excel doc.

This is the original Excel doc I have condensed most of the columns for space

excelsnip

resultsnip

And here is the code

import xlrd
import xlwt
import re

# open and create workbook
workbookread = xlrd.open_workbook('seedtestexcelbytask.xls')
workbookwrite = xlwt.Workbook('output.xlsx')

# open and create sheets
worksheetread = workbookread.sheet_by_index(0)
worksheetwrite = workbookwrite.add_sheet('Sheet1')

timelist = 
towerlist = 

i = 0
l = 0
t = 0

# iterate through rows in worksheet, get columns completed, completed_at and name.
# add to timelist and towerlist if completed == true
for rows in range(worksheetread.nrows):
 cell0 = worksheetread.cell(rows, 2).value
 cell1 = worksheetread.cell(rows, 3).value
 cell2 = worksheetread.cell(rows, 15).value

 if cell0 == 1:
 parsedvalue = re.split(', |/', cell2)
 timelist.append(cell1)
 towerlist.append(parsedvalue)

b = 2
g = 1
for rows in range(len(timelist)):
 row = worksheetwrite.row(i)

# get current index from timelist and towerlist and assign to currenttime and currentvalue

 currenttime = timelist[t]
 currentname = towerlist[t]

# get the length of the current index this will help us decide how to parse
 length = len(towerlist[t])

# if length <= 2 the index is not formatted properly so we only need index 1 which is the cafe and towers together
 if length <= 2:
 row.write(0, currenttime)
 row.write(1, currentname[1])
 i += 1
 l += 1

# if length = 4 index is formatted properly and we just grab the columns we need
 elif length == 4:
 a = 2

 for values in range(0, 2):
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentname[1])
 row.write(2, currentname[a])
 a += 1
 i += 1
 l += 1

# if length > 4 we need to iterate through each index and write them
 else:

 # define the length of the current cell of the excel sheet we have
 lengthd = (len(currentname)+1)/2
 length = int(lengthd)

 for values in range(length):
 # set variables that we will need to hold on to
 currentcafe = currentname[g]
 currenttower = currentname[b]

 # if the value of the current tower is 2,4,5,6,7 we need to write the current cafe and current tower
 # then move on to the next value and set that as the current cafe, we also need to jump to the
 # value after the current cafe and set that as the current tower
 if currenttower == "2" or currenttower == "4" or currenttower == "5" or currenttower == "6" or currenttower == "7":
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)

 b += 1
 i += 1
 g = b
 b = g + 1
 # otherwise we can just write the value fo the current cafe and current tower, advance to the next value
 # which is another tower and set current tower to that
 else:
 row = worksheetwrite.row(l)
 row.write(0, currenttime)
 row.write(1, currentcafe)
 row.write(2, currenttower)
 b += 1
 i += 1

 l += 1
 i += 1
 t += 1
workbookwrite.save('output.xls')

This is the first time I have written anything with Python and I am fairly new to programming, any comments would be greatly appreciated.

edited Jun 8 at 14:26

asked Jun 7 at 20:13

azryel6500

263

edited Jun 8 at 14:26

asked Jun 7 at 20:13

azryel6500

263

asked Jun 7 at 20:13

azryel6500

263

asked Jun 7 at 20:13

azryel6500

263

2

Could you expand the column width to fully show the "notes" column? Also, could you summarize what this transformation is for?
â€“Â 200_success
Jun 7 at 20:55

I updated the the pic to better show relevant columns
â€“Â azryel6500
Jun 8 at 14:27

add a commentÂ |Â

2

Could you expand the column width to fully show the "notes" column? Also, could you summarize what this transformation is for?
â€“Â 200_success
Jun 7 at 20:55

I updated the the pic to better show relevant columns
â€“Â azryel6500
Jun 8 at 14:27

Could you expand the column width to fully show the "notes" column? Also, could you summarize what this transformation is for?
â€“Â 200_success
Jun 7 at 20:55

I updated the the pic to better show relevant columns
â€“Â azryel6500
Jun 8 at 14:27

add a commentÂ |Â

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f196066%2fpython-program-to-transform-an-excel-document-with-some-parsing%23new-answer', 'question_page');

);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr