Extracting data from a used car sales site

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
1
down vote

favorite

I am developing code for extracting data from a used car sales site. There are 4 sites in total. In 3 of them I use requests and beautifulsoup. The time taken to extract data from these sites was satisfactory. The problem is that the extraction of data from the site whose class is called Localiza is taking too long. It takes almost 20 minutes to extract sales data from the 6000 cars. Could anyone any tips on how to reduce the time of scraping this site?

This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.

from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
 self.__base_url = url
 self.__page_index = 1
 self.__emprise_name = emprise_name
 self.__soup = None

def goto_next_page(self):
 self.__page_index += 1

@property
def base_url(self):
 return self.__base_url

@property
def page_index(self):
 return self.__page_index

@property
def soup(self):
 return self.__soup

def set_soup(self):
 r = req.get(self.__base_url.format(self.__page_index))
 self.__soup = bs(r.text, "lxml")

def is_finished(self):
 pass

def get_cars(self):
 pass

@property
def emprise_name(self):
 return self.__emprise_name

def get_price(self, car):
 pass

def get_kilometragem(self, car):
 pass

def get_model(self, car):
 pass

def get_year(self, car):
 pass

This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.

class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
 super().__init__(url, emprise_name)
 #self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
 self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
 self.__web_driver.get(url)
 self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
 self.__finished = False

def set_soup(self):
 self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
 return self.__finished

def get_cars(self):
 price = self.__soup.find_all(class_="busca-right-container")
 cars = self.__soup.find_all(class_="busca-left-container")
 return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
 return car[1][1].split("/")[0]

def get_kilometragem(self, car):
 return car[1][2]

def get_model(self, car):
 return car[1][0]

def get_price(self, car):
 return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
 try:
 self.__web_driver.find_element_by_id(self.__id_next_page).click()
 except:
 self.__finished = True

Main:

from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

 cursor = con.cursor()
 cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
 inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

 localiza = Localiza(urls_sql["url_localiza"], "Localiza")

 data = 

 for seller_site in [localiza]:
 while True:
 seller_site.set_soup()
 if seller_site.is_finished():
 break
 for car in seller_site.get_cars():
 data["Empresa"] = seller_site.emprise_name
 data["Modelo"] = seller_site.get_model(car)
 data["Preco"] = seller_site.get_price(car)
 data["Kilometragem"] = seller_site.get_kilometragem(car)
 data["Ano"] = seller_site.get_year(car)
 cursor.execute(inserir_dados_sql.format(**data))
 con.commit()
 seller_site.goto_next_page()

JSON with the Localiza site URL and SQL queries:

 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"

edited May 24 at 0:06

Sam Onela

5,75961543

asked May 23 at 23:50

Rafael Ribeiro

Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
â€“Â C. Harley
May 24 at 1:30

you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
â€“Â bobrobbob
May 25 at 15:59

add a commentÂ |Â

up vote
1
down vote

favorite

This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.

from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
 self.__base_url = url
 self.__page_index = 1
 self.__emprise_name = emprise_name
 self.__soup = None

def goto_next_page(self):
 self.__page_index += 1

@property
def base_url(self):
 return self.__base_url

@property
def page_index(self):
 return self.__page_index

@property
def soup(self):
 return self.__soup

def set_soup(self):
 r = req.get(self.__base_url.format(self.__page_index))
 self.__soup = bs(r.text, "lxml")

def is_finished(self):
 pass

def get_cars(self):
 pass

@property
def emprise_name(self):
 return self.__emprise_name

def get_price(self, car):
 pass

def get_kilometragem(self, car):
 pass

def get_model(self, car):
 pass

def get_year(self, car):
 pass

This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.

class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
 super().__init__(url, emprise_name)
 #self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
 self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
 self.__web_driver.get(url)
 self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
 self.__finished = False

def set_soup(self):
 self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
 return self.__finished

def get_cars(self):
 price = self.__soup.find_all(class_="busca-right-container")
 cars = self.__soup.find_all(class_="busca-left-container")
 return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
 return car[1][1].split("/")[0]

def get_kilometragem(self, car):
 return car[1][2]

def get_model(self, car):
 return car[1][0]

def get_price(self, car):
 return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
 try:
 self.__web_driver.find_element_by_id(self.__id_next_page).click()
 except:
 self.__finished = True

Main:

from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

 cursor = con.cursor()
 cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
 inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

 localiza = Localiza(urls_sql["url_localiza"], "Localiza")

 data = 

 for seller_site in [localiza]:
 while True:
 seller_site.set_soup()
 if seller_site.is_finished():
 break
 for car in seller_site.get_cars():
 data["Empresa"] = seller_site.emprise_name
 data["Modelo"] = seller_site.get_model(car)
 data["Preco"] = seller_site.get_price(car)
 data["Kilometragem"] = seller_site.get_kilometragem(car)
 data["Ano"] = seller_site.get_year(car)
 cursor.execute(inserir_dados_sql.format(**data))
 con.commit()
 seller_site.goto_next_page()

JSON with the Localiza site URL and SQL queries:

 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"

edited May 24 at 0:06

Sam Onela

5,75961543

asked May 23 at 23:50

Rafael Ribeiro

Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
â€“Â C. Harley
May 24 at 1:30

you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
â€“Â bobrobbob
May 25 at 15:59

add a commentÂ |Â

up vote
1
down vote

favorite

This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.

from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
 self.__base_url = url
 self.__page_index = 1
 self.__emprise_name = emprise_name
 self.__soup = None

def goto_next_page(self):
 self.__page_index += 1

@property
def base_url(self):
 return self.__base_url

@property
def page_index(self):
 return self.__page_index

@property
def soup(self):
 return self.__soup

def set_soup(self):
 r = req.get(self.__base_url.format(self.__page_index))
 self.__soup = bs(r.text, "lxml")

def is_finished(self):
 pass

def get_cars(self):
 pass

@property
def emprise_name(self):
 return self.__emprise_name

def get_price(self, car):
 pass

def get_kilometragem(self, car):
 pass

def get_model(self, car):
 pass

def get_year(self, car):
 pass

This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.

class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
 super().__init__(url, emprise_name)
 #self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
 self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
 self.__web_driver.get(url)
 self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
 self.__finished = False

def set_soup(self):
 self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
 return self.__finished

def get_cars(self):
 price = self.__soup.find_all(class_="busca-right-container")
 cars = self.__soup.find_all(class_="busca-left-container")
 return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
 return car[1][1].split("/")[0]

def get_kilometragem(self, car):
 return car[1][2]

def get_model(self, car):
 return car[1][0]

def get_price(self, car):
 return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
 try:
 self.__web_driver.find_element_by_id(self.__id_next_page).click()
 except:
 self.__finished = True

Main:

from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

 cursor = con.cursor()
 cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
 inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

 localiza = Localiza(urls_sql["url_localiza"], "Localiza")

 data = 

 for seller_site in [localiza]:
 while True:
 seller_site.set_soup()
 if seller_site.is_finished():
 break
 for car in seller_site.get_cars():
 data["Empresa"] = seller_site.emprise_name
 data["Modelo"] = seller_site.get_model(car)
 data["Preco"] = seller_site.get_price(car)
 data["Kilometragem"] = seller_site.get_kilometragem(car)
 data["Ano"] = seller_site.get_year(car)
 cursor.execute(inserir_dados_sql.format(**data))
 con.commit()
 seller_site.goto_next_page()

JSON with the Localiza site URL and SQL queries:

 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"

edited May 24 at 0:06

Sam Onela

5,75961543

asked May 23 at 23:50

Rafael Ribeiro

This is the parent class of class Localiza. Forgive me for the lack of indentation in the name of the class. Guilty of the question editor, I could not fix it.

from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
import requests as req

class SiteVendaSeminovos:

def __init__(self, url, emprise_name):
 self.__base_url = url
 self.__page_index = 1
 self.__emprise_name = emprise_name
 self.__soup = None

def goto_next_page(self):
 self.__page_index += 1

@property
def base_url(self):
 return self.__base_url

@property
def page_index(self):
 return self.__page_index

@property
def soup(self):
 return self.__soup

def set_soup(self):
 r = req.get(self.__base_url.format(self.__page_index))
 self.__soup = bs(r.text, "lxml")

def is_finished(self):
 pass

def get_cars(self):
 pass

@property
def emprise_name(self):
 return self.__emprise_name

def get_price(self, car):
 pass

def get_kilometragem(self, car):
 pass

def get_model(self, car):
 pass

def get_year(self, car):
 pass

This is the class Localiza, which takes 20 minutes to get data from 400 pages, plus or minus 6000 cars. Again, sorry for the lack of indentation in the class name.

class Localiza(SiteVendaSeminovos):
def __init__(self, url, emprise_name):
 super().__init__(url, emprise_name)
 #self.__web_driver = wb.Chrome("/home/rafa/Documentos/web-scrap/chromedriver")
 self.__web_driver = wb.PhantomJS("/home/rafa/Documentos/web-scrap/phantomjs")
 self.__web_driver.get(url)
 self.__id_next_page = "ctl00_ctl42_g_f221d036_75d3_4ee2_893d_0d7b40180245_ProximaPaginaSuperior"
 self.__finished = False

def set_soup(self):
 self.__soup = bs(self.__web_driver.page_source, "lxml")

def is_finished(self):
 return self.__finished

def get_cars(self):
 price = self.__soup.find_all(class_="busca-right-container")
 cars = self.__soup.find_all(class_="busca-left-container")
 return [(price, list(car.stripped_strings)) for car, price in zip(cars, price)]

def get_year(self, car):
 return car[1][1].split("/")[0]

def get_kilometragem(self, car):
 return car[1][2]

def get_model(self, car):
 return car[1][0]

def get_price(self, car):
 return list(car[0].stripped_strings)[0][3:].replace(".", "")

def goto_next_page(self):
 try:
 self.__web_driver.find_element_by_id(self.__id_next_page).click()
 except:
 self.__finished = True

Main:

from classes import *
import sqlite3 as sqlt
import json


with sqlt.connect("seminovos.db"), json.load(open("urls_sql.json")) as urls_sql:

 cursor = con.cursor()
 cursor.execute(urls_sql["criar_tabela_sql"]) #cria a tabela "venda" no DB
 inserir_dados_sql = urls_sql["inserir_dados_sql"] # Seta a query de insercao de dados na tabela do DB

 localiza = Localiza(urls_sql["url_localiza"], "Localiza")

 data = 

 for seller_site in [localiza]:
 while True:
 seller_site.set_soup()
 if seller_site.is_finished():
 break
 for car in seller_site.get_cars():
 data["Empresa"] = seller_site.emprise_name
 data["Modelo"] = seller_site.get_model(car)
 data["Preco"] = seller_site.get_price(car)
 data["Kilometragem"] = seller_site.get_kilometragem(car)
 data["Ano"] = seller_site.get_year(car)
 cursor.execute(inserir_dados_sql.format(**data))
 con.commit()
 seller_site.goto_next_page()

JSON with the Localiza site URL and SQL queries:

 "url_localiza" : "https://seminovos.localiza.com/Paginas/resultado-busca.aspx?ct=4365_2002_8466_8607_8655_4389_2604_2612_8096_1734_4720_8719_3970_7267_7300_2826_8146_5758_6667_565_8167_1307_2108_7478_8875_2372_6698_8220_4777_8234_3159_8987_6018_108_4498_9040_6057_9061_6974_6744_6749_9123_7690_9185_7719_5210_6797_957_9317_9328_9332_9352_7876_2453_9362_1968_9391_9420_5454_3873_3874_4337_1987_1081_6875&st=AL_BA_CE_DF_ES_GO_MA_MG_MS_MT_PA_PB_PE_PI_PR_RJ_RN_RS_SC_SE_SP&yr=2013_2018&pc=20000_425000&fb=W_X_T_%C3%94_A_D_C_L_1_8_F_M_U_O_R_G_B&md=000192_000097_000148_000147_000136_000119_000137_000120_000729_001061_000132_000632_000699_000122_000041_000286_000772_000719_000180_000181_000179_000715_000736_000854_000748_001076_000334_000250_000333_000330_000332_000369_000424_000545_000511_000488_000408_000418_000510_000513_000391_000211_000431_001083_000325_000326_000344_000456_000451_000455_000458_000505_000506_000502_000805_000426_000427_000623_000726_000718_000119_000132_000694_000788_000828_000005_000484_000478_000481_000476_000473_000477_001017_001016_000356_000357_000319_000317_000297_000298_000354_000123_000780_000859_000138_001006_000614_000867_000858_000675_000139_000165_000171_000174_000039_000020_000022_000019_000747_000320_000365_000311_000355_000312_000313_000322_000342_000353_000362_000314_000699_000696_000114_000779_000781_000755_000807_000806_001084_000529_001063_001098_000667_001036_001077_000133_000143_000658_000705_000707_000047",
"criar_tabela_sql" : "CREATE TABLE vendas ( ID INTEGER PRIMARY KEY AUTOINCREMENT, Empresa VARCHAR (12) NOT NULL, Modelo VARCHAR(40) NOT NULL, Preco REAL NOT NULL, Kilometragem REAL NOT NULL, Ano NUMERIC(4,0) NOT NULL );",
"inserir_dados_sql":"INSERT INTO vendas (Empresa, Modelo, Preco, Kilometragem, Ano) VALUES ('Empresa','Modelo', Preco,Kilometragem,Ano)"

edited May 24 at 0:06

Sam Onela

5,75961543

asked May 23 at 23:50

Rafael Ribeiro

edited May 24 at 0:06

Sam Onela

5,75961543

edited May 24 at 0:06

Sam Onela

5,75961543

edited May 24 at 0:06

Sam Onela

5,75961543

asked May 23 at 23:50

Rafael Ribeiro

asked May 23 at 23:50

Rafael Ribeiro

asked May 23 at 23:50

Rafael Ribeiro

Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
â€“Â C. Harley
May 24 at 1:30

you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
â€“Â bobrobbob
May 25 at 15:59

add a commentÂ |Â

Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
â€“Â C. Harley
May 24 at 1:30

you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
â€“Â bobrobbob
May 25 at 15:59

Look into performing profiling on your code to to identify the bottlenecks. If it cannot identify it, refactor your for loop operations into functions, and I'm pretty sure the for car in seller_site.get_cars(): loop as the likely suspect.
â€“Â C. Harley
May 24 at 1:30

you commit too often. i think it should be safe to commit once the while is done. it may be tough on your ram depending on the amount of data gathered but i guess 6000 cars is not that much
â€“Â bobrobbob
May 25 at 15:59

add a commentÂ |Â

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f195053%2fextracting-data-from-a-used-car-sales-site%23new-answer', 'question_page');

);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr