Manipulating DataFrames on pandas

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
0
down vote

favorite












I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.



The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.



Moreover, if you have any observation about style it is more than welcome.



import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt

chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'

col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']

col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']

start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):

ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique

auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)


days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)

for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))


the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]

stocky_b = set(chunk.id.unique().tolist())

for stock in stocky_b:
for day, ab in zip( days, au_bound ):

stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]

#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions

stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan

#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]

#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):

stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan

#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1


end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size






share|improve this question





















  • Does it work as intended? What does the input and output data look like? Can you post examples of both?
    – Mast
    Jul 16 at 6:31
















up vote
0
down vote

favorite












I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.



The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.



Moreover, if you have any observation about style it is more than welcome.



import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt

chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'

col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']

col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']

start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):

ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique

auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)


days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)

for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))


the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]

stocky_b = set(chunk.id.unique().tolist())

for stock in stocky_b:
for day, ab in zip( days, au_bound ):

stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]

#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions

stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan

#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]

#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):

stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan

#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1


end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size






share|improve this question





















  • Does it work as intended? What does the input and output data look like? Can you post examples of both?
    – Mast
    Jul 16 at 6:31












up vote
0
down vote

favorite









up vote
0
down vote

favorite











I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.



The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.



Moreover, if you have any observation about style it is more than welcome.



import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt

chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'

col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']

col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']

start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):

ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique

auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)


days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)

for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))


the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]

stocky_b = set(chunk.id.unique().tolist())

for stock in stocky_b:
for day, ab in zip( days, au_bound ):

stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]

#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions

stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan

#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]

#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):

stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan

#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1


end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size






share|improve this question













I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.



The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.



Moreover, if you have any observation about style it is more than welcome.



import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt

chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'

col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']

col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']

start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):

ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique

auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)


days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)

for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))


the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag

for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]

stocky_b = set(chunk.id.unique().tolist())

for stock in stocky_b:
for day, ab in zip( days, au_bound ):

stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]

#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions

stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan

#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]

#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):

stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan

#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1


end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size








share|improve this question












share|improve this question




share|improve this question








edited Jul 23 at 0:28









Jamal♦

30.1k11114225




30.1k11114225









asked Jul 16 at 6:29









spec3

334




334











  • Does it work as intended? What does the input and output data look like? Can you post examples of both?
    – Mast
    Jul 16 at 6:31
















  • Does it work as intended? What does the input and output data look like? Can you post examples of both?
    – Mast
    Jul 16 at 6:31















Does it work as intended? What does the input and output data look like? Can you post examples of both?
– Mast
Jul 16 at 6:31




Does it work as intended? What does the input and output data look like? Can you post examples of both?
– Mast
Jul 16 at 6:31















active

oldest

votes











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f199569%2fmanipulating-dataframes-on-pandas%23new-answer', 'question_page');

);

Post as a guest



































active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes










 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f199569%2fmanipulating-dataframes-on-pandas%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Chat program with C++ and SFML

Function to Return a JSON Like Objects Using VBA Collections and Arrays

Will my employers contract hold up in court?