Extracting data from a used car sales site

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
1
down vote

favorite
1












I am developing code for extracting data from a used car sales site. There are 4 sites in total. In 3 of them I use requests and beautifulsoup. The time taken to extract data from these sites was satisfactory. The problem is that the extraction of data from the site whose class is called Localiza is taking too long. It takes almost 20 minutes to extract sales data from the 6000 cars. Could anyone any tips on how to reduce the time of scraping this site?



This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.



from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
self.__base_url = url
self.__page_index = 1
self.__emprise_name = emprise_name
self.__soup = None

def goto_next_page(self):
self.__page_index += 1

@property
def base_url(self):
return self.__base_url

@property
def page_index(self):
return self.__page_index

@property
def soup(self):
return self.__soup

def set_soup(self):
r = req.get(self.__base_url.format(self.__page_index))
self.__soup = bs(r.text, "lxml")

def is_finished(self):
pass

def get_cars(self):
pass

@property
def emprise_name(self):
return self.__emprise_name

def get_price(self, car):
pass

def get_kilometragem(self, car):
pass

def get_model(self, car):
pass

def get_year(self, car):
pass


This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.



class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
super().__init__(url, emprise_name)
#self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
self.__web_driver.get(url)
self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
self.__finished = False

def set_soup(self):
self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
return self.__finished

def get_cars(self):
price = self.__soup.find_all(class_="busca-right-container")
cars = self.__soup.find_all(class_="busca-left-container")
return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
return car[1][1].split("/")[0]

def get_kilometragem(self, car):
return car[1][2]

def get_model(self, car):
return car[1][0]

def get_price(self, car):
return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
try:
self.__web_driver.find_element_by_id(self.__id_next_page).click()
except:
self.__finished = True


Main:



from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

cursor = con.cursor()
cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

localiza = Localiza(urls_sql["url_localiza"], "Localiza")

data =

for seller_site in [localiza]:
while True:
seller_site.set_soup()
if seller_site.is_finished():
break
for car in seller_site.get_cars():
data["Empresa"] = seller_site.emprise_name
data["Modelo"] = seller_site.get_model(car)
data["Preco"] = seller_site.get_price(car)
data["Kilometragem"] = seller_site.get_kilometragem(car)
data["Ano"] = seller_site.get_year(car)
cursor.execute(inserir_dados_sql.format(**data))
con.commit()
seller_site.goto_next_page()


JSON with the Localiza site URL and SQL queries:



 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"







share|improve this question





















  • Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
    – C. Harley
    May 24 at 1:30










  • you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
    – bobrobbob
    May 25 at 15:59
















up vote
1
down vote

favorite
1












I am developing code for extracting data from a used car sales site. There are 4 sites in total. In 3 of them I use requests and beautifulsoup. The time taken to extract data from these sites was satisfactory. The problem is that the extraction of data from the site whose class is called Localiza is taking too long. It takes almost 20 minutes to extract sales data from the 6000 cars. Could anyone any tips on how to reduce the time of scraping this site?



This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.



from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
self.__base_url = url
self.__page_index = 1
self.__emprise_name = emprise_name
self.__soup = None

def goto_next_page(self):
self.__page_index += 1

@property
def base_url(self):
return self.__base_url

@property
def page_index(self):
return self.__page_index

@property
def soup(self):
return self.__soup

def set_soup(self):
r = req.get(self.__base_url.format(self.__page_index))
self.__soup = bs(r.text, "lxml")

def is_finished(self):
pass

def get_cars(self):
pass

@property
def emprise_name(self):
return self.__emprise_name

def get_price(self, car):
pass

def get_kilometragem(self, car):
pass

def get_model(self, car):
pass

def get_year(self, car):
pass


This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.



class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
super().__init__(url, emprise_name)
#self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
self.__web_driver.get(url)
self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
self.__finished = False

def set_soup(self):
self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
return self.__finished

def get_cars(self):
price = self.__soup.find_all(class_="busca-right-container")
cars = self.__soup.find_all(class_="busca-left-container")
return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
return car[1][1].split("/")[0]

def get_kilometragem(self, car):
return car[1][2]

def get_model(self, car):
return car[1][0]

def get_price(self, car):
return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
try:
self.__web_driver.find_element_by_id(self.__id_next_page).click()
except:
self.__finished = True


Main:



from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

cursor = con.cursor()
cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

localiza = Localiza(urls_sql["url_localiza"], "Localiza")

data =

for seller_site in [localiza]:
while True:
seller_site.set_soup()
if seller_site.is_finished():
break
for car in seller_site.get_cars():
data["Empresa"] = seller_site.emprise_name
data["Modelo"] = seller_site.get_model(car)
data["Preco"] = seller_site.get_price(car)
data["Kilometragem"] = seller_site.get_kilometragem(car)
data["Ano"] = seller_site.get_year(car)
cursor.execute(inserir_dados_sql.format(**data))
con.commit()
seller_site.goto_next_page()


JSON with the Localiza site URL and SQL queries:



 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"







share|improve this question





















  • Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
    – C. Harley
    May 24 at 1:30










  • you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
    – bobrobbob
    May 25 at 15:59












up vote
1
down vote

favorite
1









up vote
1
down vote

favorite
1






1





I am developing code for extracting data from a used car sales site. There are 4 sites in total. In 3 of them I use requests and beautifulsoup. The time taken to extract data from these sites was satisfactory. The problem is that the extraction of data from the site whose class is called Localiza is taking too long. It takes almost 20 minutes to extract sales data from the 6000 cars. Could anyone any tips on how to reduce the time of scraping this site?



This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.



from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
self.__base_url = url
self.__page_index = 1
self.__emprise_name = emprise_name
self.__soup = None

def goto_next_page(self):
self.__page_index += 1

@property
def base_url(self):
return self.__base_url

@property
def page_index(self):
return self.__page_index

@property
def soup(self):
return self.__soup

def set_soup(self):
r = req.get(self.__base_url.format(self.__page_index))
self.__soup = bs(r.text, "lxml")

def is_finished(self):
pass

def get_cars(self):
pass

@property
def emprise_name(self):
return self.__emprise_name

def get_price(self, car):
pass

def get_kilometragem(self, car):
pass

def get_model(self, car):
pass

def get_year(self, car):
pass


This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.



class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
super().__init__(url, emprise_name)
#self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
self.__web_driver.get(url)
self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
self.__finished = False

def set_soup(self):
self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
return self.__finished

def get_cars(self):
price = self.__soup.find_all(class_="busca-right-container")
cars = self.__soup.find_all(class_="busca-left-container")
return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
return car[1][1].split("/")[0]

def get_kilometragem(self, car):
return car[1][2]

def get_model(self, car):
return car[1][0]

def get_price(self, car):
return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
try:
self.__web_driver.find_element_by_id(self.__id_next_page).click()
except:
self.__finished = True


Main:



from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

cursor = con.cursor()
cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

localiza = Localiza(urls_sql["url_localiza"], "Localiza")

data =

for seller_site in [localiza]:
while True:
seller_site.set_soup()
if seller_site.is_finished():
break
for car in seller_site.get_cars():
data["Empresa"] = seller_site.emprise_name
data["Modelo"] = seller_site.get_model(car)
data["Preco"] = seller_site.get_price(car)
data["Kilometragem"] = seller_site.get_kilometragem(car)
data["Ano"] = seller_site.get_year(car)
cursor.execute(inserir_dados_sql.format(**data))
con.commit()
seller_site.goto_next_page()


JSON with the Localiza site URL and SQL queries:



 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"







share|improve this question













I am developing code for extracting data from a used car sales site. There are 4 sites in total. In 3 of them I use requests and beautifulsoup. The time taken to extract data from these sites was satisfactory. The problem is that the extraction of data from the site whose class is called Localiza is taking too long. It takes almost 20 minutes to extract sales data from the 6000 cars. Could anyone any tips on how to reduce the time of scraping this site?



This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.



from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
self.__base_url = url
self.__page_index = 1
self.__emprise_name = emprise_name
self.__soup = None

def goto_next_page(self):
self.__page_index += 1

@property
def base_url(self):
return self.__base_url

@property
def page_index(self):
return self.__page_index

@property
def soup(self):
return self.__soup

def set_soup(self):
r = req.get(self.__base_url.format(self.__page_index))
self.__soup = bs(r.text, "lxml")

def is_finished(self):
pass

def get_cars(self):
pass

@property
def emprise_name(self):
return self.__emprise_name

def get_price(self, car):
pass

def get_kilometragem(self, car):
pass

def get_model(self, car):
pass

def get_year(self, car):
pass


This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.



class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
super().__init__(url, emprise_name)
#self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
self.__web_driver.get(url)
self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
self.__finished = False

def set_soup(self):
self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
return self.__finished

def get_cars(self):
price = self.__soup.find_all(class_="busca-right-container")
cars = self.__soup.find_all(class_="busca-left-container")
return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
return car[1][1].split("/")[0]

def get_kilometragem(self, car):
return car[1][2]

def get_model(self, car):
return car[1][0]

def get_price(self, car):
return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
try:
self.__web_driver.find_element_by_id(self.__id_next_page).click()
except:
self.__finished = True


Main:



from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

cursor = con.cursor()
cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

localiza = Localiza(urls_sql["url_localiza"], "Localiza")

data =

for seller_site in [localiza]:
while True:
seller_site.set_soup()
if seller_site.is_finished():
break
for car in seller_site.get_cars():
data["Empresa"] = seller_site.emprise_name
data["Modelo"] = seller_site.get_model(car)
data["Preco"] = seller_site.get_price(car)
data["Kilometragem"] = seller_site.get_kilometragem(car)
data["Ano"] = seller_site.get_year(car)
cursor.execute(inserir_dados_sql.format(**data))
con.commit()
seller_site.goto_next_page()


JSON with the Localiza site URL and SQL queries:



 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"









share|improve this question












share|improve this question




share|improve this question








edited May 24 at 0:06









Sam Onela

5,75961543




5,75961543









asked May 23 at 23:50









Rafael Ribeiro

61




61











  • Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
    – C. Harley
    May 24 at 1:30










  • you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
    – bobrobbob
    May 25 at 15:59
















  • Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
    – C. Harley
    May 24 at 1:30










  • you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
    – bobrobbob
    May 25 at 15:59















Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
– C. Harley
May 24 at 1:30




Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
– C. Harley
May 24 at 1:30












you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
– bobrobbob
May 25 at 15:59




you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
– bobrobbob
May 25 at 15:59















active

oldest

votes











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f195053%2fextracting-data-from-a-used-car-sales-site%23new-answer', 'question_page');

);

Post as a guest



































active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes










 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f195053%2fextracting-data-from-a-used-car-sales-site%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Greedy Best First Search implementation in Rust

Function to Return a JSON Like Objects Using VBA Collections and Arrays

C++11 CLH Lock Implementation