Manipulating DataFrames on pandas
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
0
down vote
favorite
I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.
The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.
Moreover, if you have any observation about style it is more than welcome.
import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt
chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'
col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']
col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']
start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):
ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique
auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)
days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)
for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))
the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]
stocky_b = set(chunk.id.unique().tolist())
for stock in stocky_b:
for day, ab in zip( days, au_bound ):
stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]
#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions
stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan
#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]
#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):
stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1
end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size
python pandas
add a comment |Â
up vote
0
down vote
favorite
I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.
The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.
Moreover, if you have any observation about style it is more than welcome.
import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt
chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'
col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']
col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']
start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):
ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique
auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)
days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)
for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))
the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]
stocky_b = set(chunk.id.unique().tolist())
for stock in stocky_b:
for day, ab in zip( days, au_bound ):
stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]
#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions
stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan
#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]
#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):
stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1
end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size
python pandas
Does it work as intended? What does the input and output data look like? Can you post examples of both?
â Mast
Jul 16 at 6:31
add a comment |Â
up vote
0
down vote
favorite
up vote
0
down vote
favorite
I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.
The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.
Moreover, if you have any observation about style it is more than welcome.
import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt
chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'
col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']
col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']
start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):
ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique
auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)
days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)
for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))
the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]
stocky_b = set(chunk.id.unique().tolist())
for stock in stocky_b:
for day, ab in zip( days, au_bound ):
stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]
#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions
stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan
#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]
#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):
stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1
end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size
python pandas
I recently failed to finish a code for a job interview. One of the problems is that I decided to use Pandas (it made sense) but I was unfamiliar with it (however I know Python Scipy and Numpy), so it took a lot of time to figure out everything. It's the first time I wrote such kind of code manipulating Pandas data frames, thus I was wondering if you could give me advice to do things better.
The purpose of the code was to read a CSV table of trading data (for a finance company) and then manipulating the data in order to find certain properties. I did not finish it, but do you think I could have done something that make it run faster? it took 43 minutes to do just what it does now. The .csv file is a 1.2GB file.
Moreover, if you have any observation about style it is more than welcome.
import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt
chunk_size = 10**5 #safe on memory
auction_division = 40000.0 #empirical
path = 'scandi.csv'
col_names = ['id','empty0','bid_price','ask_price','trade_price',
'bid_volume','ask_volume','trade_volume','update',
'empty1','date','seconds','opening','empty2','con_codes']
col_names_load = ['id','bid_price','ask_price','trade_price',
'trade_volume','update','date','seconds','con_codes']
start = time.time()
total=0
stocks=set()
days=set()
sub_chunks =
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,',names=col_names, usecols=
['id','bid_price','ask_price','date','seconds'],
engine='python', chunksize=chunk_size)):
ids_unique = set(chunk.id.unique().tolist())
day_unique = set(chunk.date.unique().tolist())
stocks|=ids_unique
days|=day_unique
auction_data = chunk[chunk['bid_price']>chunk['ask_price']]
new_el = auction_data[['date','seconds']]
sub_chunks.append(new_el)
days_list = list(days)
auction = pd.concat(sub_chunks)
au_bound =
stocky=list(stocks)
for day in days_list:
g=auction[auction['date']==day]
slot1=g[g['seconds']<auction_division].seconds
slot2=g[g['seconds']>auction_division].seconds
au_bound.append((slot1.min(),slot1.max(),slot2.min(),slot2.max()))
the_last_element = pd.DataFrame(np.zeros((7, len(stocks))), columns=stocky)
#rows: 0: bid price, 1: ask price, 2: trade price, 3: trade volumes, 4: date, 5: seconds, 6: flag
for i,chunk in enumerate(pd.read_csv(path, sep=',,|,', names=col_names,
usecols = col_names_load, engine='python', chunksize=chunk_size)):
print i+1, 'th chunk'
#I select just trade updates because ticks and bid-ask spreads make sense at the trade (that's what I think to have learnt from investopedia)
#Tick: https://www.investopedia.com/terms/t/tick.asp
#Bid-Ask Spread: https://www.investopedia.com/terms/b/bid-askspread.asp
chunk_clean = chunk[ chunk['update']==1 & ((chunk.con_codes=='@1')
| (chunk.con_codes=='XT') | (chunk.con_codes=='XT|C') | (chunk.con_codes=='XT|O') ) ]
stocky_b = set(chunk.id.unique().tolist())
for stock in stocky_b:
for day, ab in zip( days, au_bound ):
stock_chunk=chunk_clean[(chunk_clean['id']==stock) & (chunk_clean['date']==day)]
#time between trades subtraction
stock_chunk.loc[:,'t_b_trades']=stock_chunk['seconds']-stock_chunk['seconds'].shift(1)
#eliminate trades that cross auctions
stock_chunk.loc[stock_chunk[
( (stock_chunk['seconds'] > ab[0]) & (stock_chunk['seconds'] < ab[1] ) )
| ( (stock_chunk['seconds'] > ab[2]) & (stock_chunk['seconds'] < ab[3] ) )
| ( (stock_chunk['seconds'].shift(1) > ab[0]) & (stock_chunk['seconds'].shift(1) < ab[1]) )
| ( (stock_chunk['seconds'].shift(1) > ab[2]) & (stock_chunk['seconds'].shift(1) < ab[3]) )
| ( (stock_chunk['seconds'].shift(1) < ab[0]) & (stock_chunk['seconds'] > ab[1] ) )
| ( (stock_chunk['seconds'].shift(1) < ab[2]) & (stock_chunk['seconds'] > ab[3] ) )
].index.values,'t_b_trades'] = np.nan
#the first row is always wrong
if (the_last_element[stock][6]==1 and the_last_element[stock][4]==day):
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades']
= stock_chunk.seconds.iloc[0]-the_last_element[stock][5]
#eliminate trades that cross auctions
if ( ( (stock_chunk.seconds.iloc[0] >ab[0] ) & (stock_chunk.seconds.iloc[0]< ab[1]))
| ( (stock_chunk.seconds.iloc[0] >ab[2] ) & (stock_chunk.seconds.iloc[0]< ab[3]))
| ( (the_last_element[stock][5] > ab[0] ) & (the_last_element[stock][5] < ab[1]))
| ( (the_last_element[stock][5] > ab[2] ) & (the_last_element[stock][5] < ab[3]))
| ( (the_last_element[stock][5] < ab[0] ) & (stock_chunk.seconds.iloc[0]> ab[1]))
| ( (the_last_element[stock][5] < ab[2] ) & (stock_chunk.seconds.iloc[0]> ab[3]))):
stock_chunk.loc[stock_chunk[stock_chunk['seconds'] == stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
else:
if stock_chunk.empty==False:
stock_chunk.loc[stock_chunk[stock_chunk['seconds']==stock_chunk.seconds.iloc[0]].index.values,'t_b_trades'] = np.nan
#fill the last row for the next chunk
if stock_chunk.empty==False:
the_last_element[stock][0]=stock_chunk.bid_price.iloc[-1]
the_last_element[stock][1]=stock_chunk.ask_price.iloc[-1]
the_last_element[stock][2]=stock_chunk.trade_price.iloc[-1]
the_last_element[stock][3]=stock_chunk.trade_volume.iloc[-1]
the_last_element[stock][4]=stock_chunk.date.iloc[-1]
the_last_element[stock][5]=stock_chunk.seconds.iloc[-1]
the_last_element[stock][6]=1
end = time.time()
tot_time = (end-start)/60.0
print tot_time, 'minutes for data! for', total, 'chunks of size', chunk_size
python pandas
edited Jul 23 at 0:28
Jamalâ¦
30.1k11114225
30.1k11114225
asked Jul 16 at 6:29
spec3
334
334
Does it work as intended? What does the input and output data look like? Can you post examples of both?
â Mast
Jul 16 at 6:31
add a comment |Â
Does it work as intended? What does the input and output data look like? Can you post examples of both?
â Mast
Jul 16 at 6:31
Does it work as intended? What does the input and output data look like? Can you post examples of both?
â Mast
Jul 16 at 6:31
Does it work as intended? What does the input and output data look like? Can you post examples of both?
â Mast
Jul 16 at 6:31
add a comment |Â
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f199569%2fmanipulating-dataframes-on-pandas%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Does it work as intended? What does the input and output data look like? Can you post examples of both?
â Mast
Jul 16 at 6:31