How optimize my geopandas script?

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
0
down vote

favorite
1












how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".



I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').



enter image description here



Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/



# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf


def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)






share|improve this question





















  • It'd be helpful if we knew what bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.
    – Peilonrayz
    Apr 19 at 8:09










  • what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
    – miracle173
    Apr 19 at 14:47










  • gpd is for geopandas library
    – Vincent Ferrand
    Apr 19 at 15:51
















up vote
0
down vote

favorite
1












how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".



I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').



enter image description here



Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/



# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf


def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)






share|improve this question





















  • It'd be helpful if we knew what bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.
    – Peilonrayz
    Apr 19 at 8:09










  • what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
    – miracle173
    Apr 19 at 14:47










  • gpd is for geopandas library
    – Vincent Ferrand
    Apr 19 at 15:51












up vote
0
down vote

favorite
1









up vote
0
down vote

favorite
1






1





how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".



I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').



enter image description here



Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/



# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf


def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)






share|improve this question













how can I optimize the following function ? I'm using geopandas library to calculate percentage of parcel dedicated to building (ces). I have 2 shapefiles : "Bati" for buildings and "parcelle" for parcels. As of now, it is not very "clean".



I'm using spatial_overlays function to speed up my script. This function is similar to gpd.overlay(df1, df2, how='intersection').



enter image description here



Data are here : https://cadastre.data.gouv.fr/data/etalab-cadastre/2018-01-02/geojson/communes/14/14007/



# clean_data function
def clean_data (gdf):
gdf = gdf.copy()
gdf = gdf[gdf["geometry"].is_valid]
gdf = gdf[gdf["geometry"].notnull()]
gdf = explode(gdf)
gdf = gdf.to_crs('+init=epsg:2154')
gdf.reset_index(inplace=True)
gdf = gdf[["geometry"]]
gdf = gdf.set_geometry("geometry")
return gdf


def ces (bati, parcelle) :
bati = bati.copy()
bati = clean_data(bati)
parcelle = parcelle.copy()
parcelle = clean_data(parcelle)
parcelle.insert(0, "id_par", range(1, 1 + len(parcelle)))
intersection = spatial_overlays(parcelle, bati, how='intersection')
dissolve = intersection.dissolve(by="id_par").reset_index()
dissolve.insert(len(dissolve.columns), "surf_bat", dissolve["geometry"].area)
dissolve.drop("geometry", axis=1, inplace=True)
ces = parcelle.merge(dissolve, how='left', on="id_par", suffixes=('', '_y'))
ces.insert(1, "surf_par", ces["geometry"].area)
ces['ces'] = ces['surf_bat']/ces['surf_par']*100
ces = ces.fillna(0)
ces.insert(len(ces.columns), "shape", ((ces.boundary.length)/(2*np.sqrt(np.pi*ces["surf_par"]))))
ces.insert(len(ces.columns), "shape2", ((ces.boundary.length)/(np.sqrt(ces["surf_par"]))))
ces = ces[['id_par','surf_par', 'surf_bat', 'ces', 'shape', 'shape2', 'geometry']]
ces.crs = ('+init=epsg:2154')
ces.to_file("ces.shp")
return(ces)








share|improve this question












share|improve this question




share|improve this question








edited Apr 20 at 12:32
























asked Apr 19 at 7:54









Vincent Ferrand

42




42











  • It'd be helpful if we knew what bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.
    – Peilonrayz
    Apr 19 at 8:09










  • what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
    – miracle173
    Apr 19 at 14:47










  • gpd is for geopandas library
    – Vincent Ferrand
    Apr 19 at 15:51
















  • It'd be helpful if we knew what bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.
    – Peilonrayz
    Apr 19 at 8:09










  • what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
    – miracle173
    Apr 19 at 14:47










  • gpd is for geopandas library
    – Vincent Ferrand
    Apr 19 at 15:51















It'd be helpful if we knew what bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.
– Peilonrayz
Apr 19 at 8:09




It'd be helpful if we knew what bati and parcelle are. Both in type and data they contain. It would also be helpful to know what missing functions such as clean_data and spatial_overlays are.
– Peilonrayz
Apr 19 at 8:09












what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
– miracle173
Apr 19 at 14:47




what is "gpd function"? gdp: Google says, "Generalized ParetoDistribution"
– miracle173
Apr 19 at 14:47












gpd is for geopandas library
– Vincent Ferrand
Apr 19 at 15:51




gpd is for geopandas library
– Vincent Ferrand
Apr 19 at 15:51















active

oldest

votes











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f192441%2fhow-optimize-my-geopandas-script%23new-answer', 'question_page');

);

Post as a guest



































active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes










 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f192441%2fhow-optimize-my-geopandas-script%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Python Lists

Aion

JavaScript Array Iteration Methods