How optimize my geopandas script?

Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
0
down vote
favorite
how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".
I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').

Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/
# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf
def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)
python performance python-3.x
add a comment |Â
up vote
0
down vote
favorite
how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".
I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').

Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/
# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf
def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)
python performance python-3.x
It'd be helpful if we knew whatbatiandparcelleare. Both in type and data they contain. It would also be helpful to know what missing functions such asclean_dataandspatial_overlaysare.
â Peilonrayz
Apr 19 at 8:09
what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
â miracle173
Apr 19 at 14:47
gpd is for geopandas library
â Vincent Ferrand
Apr 19 at 15:51
add a comment |Â
up vote
0
down vote
favorite
up vote
0
down vote
favorite
how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".
I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').

Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/
# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf
def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)
python performance python-3.x
how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".
I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').

Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/
# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf
def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)
python performance python-3.x
edited Apr 20 at 12:32
asked Apr 19 at 7:54
Vincent Ferrand
42
42
It'd be helpful if we knew whatbatiandparcelleare. Both in type and data they contain. It would also be helpful to know what missing functions such asclean_dataandspatial_overlaysare.
â Peilonrayz
Apr 19 at 8:09
what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
â miracle173
Apr 19 at 14:47
gpd is for geopandas library
â Vincent Ferrand
Apr 19 at 15:51
add a comment |Â
It'd be helpful if we knew whatbatiandparcelleare. Both in type and data they contain. It would also be helpful to know what missing functions such asclean_dataandspatial_overlaysare.
â Peilonrayz
Apr 19 at 8:09
what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
â miracle173
Apr 19 at 14:47
gpd is for geopandas library
â Vincent Ferrand
Apr 19 at 15:51
It'd be helpful if we knew what
bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.â Peilonrayz
Apr 19 at 8:09
It'd be helpful if we knew what
bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.â Peilonrayz
Apr 19 at 8:09
what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
â miracle173
Apr 19 at 14:47
what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
â miracle173
Apr 19 at 14:47
gpd is for geopandas library
â Vincent Ferrand
Apr 19 at 15:51
gpd is for geopandas library
â Vincent Ferrand
Apr 19 at 15:51
add a comment |Â
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f192441%2fhow-optimize-my-geopandas-script%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
It'd be helpful if we knew what
batiandparcelleare. Both in type and data they contain. It would also be helpful to know what missing functions such asclean_dataandspatial_overlaysare.â Peilonrayz
Apr 19 at 8:09
what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
â miracle173
Apr 19 at 14:47
gpd is for geopandas library
â Vincent Ferrand
Apr 19 at 15:51