Program to pull data from an Oracle database
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
4
down vote
favorite
EDIT: I should clarify that this program currently works as intended (not looking for help making it work); I just want to understand what I could have done better in terms of design.
I work on a team that tackles ad-hoc analyses and, as such, doesn't put much care into proper formatting or design. That being said, I wrote a quick program to pull data from an Oracle db to help some other teams (and to get practice) and would love some feedback.
All it needs to do is (at the command line) ask a user for credentials, ask a user for a query, and then write the oracle data out as a csv to be sent in an email. I'm mainly looking for the big issues: should I have used an OOP approach? Is user input obtained improperly? Error handling messed up? Any weird, glaring errors no self-respecting programmer would ever commit? Anything constructive for someone with no academic background in computer science, but reasonable practical comfort with it. I just need to know how to progress.
import cx_Oracle
import pandas as pd
from termcolor import colored
import getpass
from datetime import datetime
import os
import subprocess
os.environ['ORACLE_HOME'] = '/apps/oracle/product/12.1.0.2'
os.environ['ORACLE_SID'] = 'ORCL'
# print header
tool_name = '## ORACLE DATA PULL TOOL ##'
print('n##', '=' * (len(tool_name) - 4), '##', sep='')
print(tool_name)
print('##', '=' * (len(tool_name) - 4), '##n', sep='')
def conn_oracle():
# -------------------------------------------
# attempt connection to Oracle, close program if connection fails more than once
for tries in range(0, 2):
try:
conn_oracle.oracle_ntid = input("Please enter Oracle user ID: ")
oracle_pw = getpass.getpass("Please enter Oracle password: ")
con = cx_Oracle.connect(
conn_oracle.oracle_ntid.upper(), oracle_pw, "connection") #changed
except cx_Oracle.DatabaseError:
print(colored("Credentials incorrect, please try again.n", "red"))
continue
else:
query_oracle(con)
break
else:
print(colored("nCONNECTION FAILED TWICE, SHUTTING DOWN TO PREVENT ORACLE ACCOUNT FROM LOCKING.", "red"))
def query_oracle(con):
# -------------------------------------------
# get query and test size
for tries in range(0, 4):
try:
query_lines =
print("Enter/paste SQL query, terminated by a semicolon: ")
while True:
line = input()
if ';' in line:
query_lines.append(line)
break
else:
query_lines.append(line)
query_lines_2 = ' '.join(query_lines)
query = ' '.join(query_lines_2.split())
check_size = "select count(*) from (" +
query.replace(';', '') + ")"
check_size_df = pd.read_sql_query(check_size, con)
if int(check_size_df.max().values[0]) > 1000000:
print(colored("Desired data is more than 1,000,000 rows - trim query or request a sqoop.", "red"))
else:
oracle_df = pd.read_sql_query(query.replace(';', ''), con)
write_df(oracle_df)
except BaseException: # should catch a legitimate error here...
print(colored("SQL query contains one or more errors - please try again.n", "red"))
continue
else:
break
else:
print(colored("Final query attempt failed, shutting down.", "red"))
def write_df(oracle_df):
# -------------------------------------------
# write df to directory as .csv
email_addr = conn_oracle.oracle_ntid + '@company.com' #change co name
file_loc = '/home/kemri/ADW_Data/'
filename = 'oracle_output_' + datetime.now().strftime('%Y%m%d%H%M%S') + ".csv"
oracle_df.to_csv(file_loc + filename, index=False)
subprocess.call('/home/kemri/kython/send_file.sh '.format(
file_loc + filename, filename, email_addr), shell=True)
if __name__ == '__main__':
conn_oracle()
python console oracle
add a comment |Â
up vote
4
down vote
favorite
EDIT: I should clarify that this program currently works as intended (not looking for help making it work); I just want to understand what I could have done better in terms of design.
I work on a team that tackles ad-hoc analyses and, as such, doesn't put much care into proper formatting or design. That being said, I wrote a quick program to pull data from an Oracle db to help some other teams (and to get practice) and would love some feedback.
All it needs to do is (at the command line) ask a user for credentials, ask a user for a query, and then write the oracle data out as a csv to be sent in an email. I'm mainly looking for the big issues: should I have used an OOP approach? Is user input obtained improperly? Error handling messed up? Any weird, glaring errors no self-respecting programmer would ever commit? Anything constructive for someone with no academic background in computer science, but reasonable practical comfort with it. I just need to know how to progress.
import cx_Oracle
import pandas as pd
from termcolor import colored
import getpass
from datetime import datetime
import os
import subprocess
os.environ['ORACLE_HOME'] = '/apps/oracle/product/12.1.0.2'
os.environ['ORACLE_SID'] = 'ORCL'
# print header
tool_name = '## ORACLE DATA PULL TOOL ##'
print('n##', '=' * (len(tool_name) - 4), '##', sep='')
print(tool_name)
print('##', '=' * (len(tool_name) - 4), '##n', sep='')
def conn_oracle():
# -------------------------------------------
# attempt connection to Oracle, close program if connection fails more than once
for tries in range(0, 2):
try:
conn_oracle.oracle_ntid = input("Please enter Oracle user ID: ")
oracle_pw = getpass.getpass("Please enter Oracle password: ")
con = cx_Oracle.connect(
conn_oracle.oracle_ntid.upper(), oracle_pw, "connection") #changed
except cx_Oracle.DatabaseError:
print(colored("Credentials incorrect, please try again.n", "red"))
continue
else:
query_oracle(con)
break
else:
print(colored("nCONNECTION FAILED TWICE, SHUTTING DOWN TO PREVENT ORACLE ACCOUNT FROM LOCKING.", "red"))
def query_oracle(con):
# -------------------------------------------
# get query and test size
for tries in range(0, 4):
try:
query_lines =
print("Enter/paste SQL query, terminated by a semicolon: ")
while True:
line = input()
if ';' in line:
query_lines.append(line)
break
else:
query_lines.append(line)
query_lines_2 = ' '.join(query_lines)
query = ' '.join(query_lines_2.split())
check_size = "select count(*) from (" +
query.replace(';', '') + ")"
check_size_df = pd.read_sql_query(check_size, con)
if int(check_size_df.max().values[0]) > 1000000:
print(colored("Desired data is more than 1,000,000 rows - trim query or request a sqoop.", "red"))
else:
oracle_df = pd.read_sql_query(query.replace(';', ''), con)
write_df(oracle_df)
except BaseException: # should catch a legitimate error here...
print(colored("SQL query contains one or more errors - please try again.n", "red"))
continue
else:
break
else:
print(colored("Final query attempt failed, shutting down.", "red"))
def write_df(oracle_df):
# -------------------------------------------
# write df to directory as .csv
email_addr = conn_oracle.oracle_ntid + '@company.com' #change co name
file_loc = '/home/kemri/ADW_Data/'
filename = 'oracle_output_' + datetime.now().strftime('%Y%m%d%H%M%S') + ".csv"
oracle_df.to_csv(file_loc + filename, index=False)
subprocess.call('/home/kemri/kython/send_file.sh '.format(
file_loc + filename, filename, email_addr), shell=True)
if __name__ == '__main__':
conn_oracle()
python console oracle
add a comment |Â
up vote
4
down vote
favorite
up vote
4
down vote
favorite
EDIT: I should clarify that this program currently works as intended (not looking for help making it work); I just want to understand what I could have done better in terms of design.
I work on a team that tackles ad-hoc analyses and, as such, doesn't put much care into proper formatting or design. That being said, I wrote a quick program to pull data from an Oracle db to help some other teams (and to get practice) and would love some feedback.
All it needs to do is (at the command line) ask a user for credentials, ask a user for a query, and then write the oracle data out as a csv to be sent in an email. I'm mainly looking for the big issues: should I have used an OOP approach? Is user input obtained improperly? Error handling messed up? Any weird, glaring errors no self-respecting programmer would ever commit? Anything constructive for someone with no academic background in computer science, but reasonable practical comfort with it. I just need to know how to progress.
import cx_Oracle
import pandas as pd
from termcolor import colored
import getpass
from datetime import datetime
import os
import subprocess
os.environ['ORACLE_HOME'] = '/apps/oracle/product/12.1.0.2'
os.environ['ORACLE_SID'] = 'ORCL'
# print header
tool_name = '## ORACLE DATA PULL TOOL ##'
print('n##', '=' * (len(tool_name) - 4), '##', sep='')
print(tool_name)
print('##', '=' * (len(tool_name) - 4), '##n', sep='')
def conn_oracle():
# -------------------------------------------
# attempt connection to Oracle, close program if connection fails more than once
for tries in range(0, 2):
try:
conn_oracle.oracle_ntid = input("Please enter Oracle user ID: ")
oracle_pw = getpass.getpass("Please enter Oracle password: ")
con = cx_Oracle.connect(
conn_oracle.oracle_ntid.upper(), oracle_pw, "connection") #changed
except cx_Oracle.DatabaseError:
print(colored("Credentials incorrect, please try again.n", "red"))
continue
else:
query_oracle(con)
break
else:
print(colored("nCONNECTION FAILED TWICE, SHUTTING DOWN TO PREVENT ORACLE ACCOUNT FROM LOCKING.", "red"))
def query_oracle(con):
# -------------------------------------------
# get query and test size
for tries in range(0, 4):
try:
query_lines =
print("Enter/paste SQL query, terminated by a semicolon: ")
while True:
line = input()
if ';' in line:
query_lines.append(line)
break
else:
query_lines.append(line)
query_lines_2 = ' '.join(query_lines)
query = ' '.join(query_lines_2.split())
check_size = "select count(*) from (" +
query.replace(';', '') + ")"
check_size_df = pd.read_sql_query(check_size, con)
if int(check_size_df.max().values[0]) > 1000000:
print(colored("Desired data is more than 1,000,000 rows - trim query or request a sqoop.", "red"))
else:
oracle_df = pd.read_sql_query(query.replace(';', ''), con)
write_df(oracle_df)
except BaseException: # should catch a legitimate error here...
print(colored("SQL query contains one or more errors - please try again.n", "red"))
continue
else:
break
else:
print(colored("Final query attempt failed, shutting down.", "red"))
def write_df(oracle_df):
# -------------------------------------------
# write df to directory as .csv
email_addr = conn_oracle.oracle_ntid + '@company.com' #change co name
file_loc = '/home/kemri/ADW_Data/'
filename = 'oracle_output_' + datetime.now().strftime('%Y%m%d%H%M%S') + ".csv"
oracle_df.to_csv(file_loc + filename, index=False)
subprocess.call('/home/kemri/kython/send_file.sh '.format(
file_loc + filename, filename, email_addr), shell=True)
if __name__ == '__main__':
conn_oracle()
python console oracle
EDIT: I should clarify that this program currently works as intended (not looking for help making it work); I just want to understand what I could have done better in terms of design.
I work on a team that tackles ad-hoc analyses and, as such, doesn't put much care into proper formatting or design. That being said, I wrote a quick program to pull data from an Oracle db to help some other teams (and to get practice) and would love some feedback.
All it needs to do is (at the command line) ask a user for credentials, ask a user for a query, and then write the oracle data out as a csv to be sent in an email. I'm mainly looking for the big issues: should I have used an OOP approach? Is user input obtained improperly? Error handling messed up? Any weird, glaring errors no self-respecting programmer would ever commit? Anything constructive for someone with no academic background in computer science, but reasonable practical comfort with it. I just need to know how to progress.
import cx_Oracle
import pandas as pd
from termcolor import colored
import getpass
from datetime import datetime
import os
import subprocess
os.environ['ORACLE_HOME'] = '/apps/oracle/product/12.1.0.2'
os.environ['ORACLE_SID'] = 'ORCL'
# print header
tool_name = '## ORACLE DATA PULL TOOL ##'
print('n##', '=' * (len(tool_name) - 4), '##', sep='')
print(tool_name)
print('##', '=' * (len(tool_name) - 4), '##n', sep='')
def conn_oracle():
# -------------------------------------------
# attempt connection to Oracle, close program if connection fails more than once
for tries in range(0, 2):
try:
conn_oracle.oracle_ntid = input("Please enter Oracle user ID: ")
oracle_pw = getpass.getpass("Please enter Oracle password: ")
con = cx_Oracle.connect(
conn_oracle.oracle_ntid.upper(), oracle_pw, "connection") #changed
except cx_Oracle.DatabaseError:
print(colored("Credentials incorrect, please try again.n", "red"))
continue
else:
query_oracle(con)
break
else:
print(colored("nCONNECTION FAILED TWICE, SHUTTING DOWN TO PREVENT ORACLE ACCOUNT FROM LOCKING.", "red"))
def query_oracle(con):
# -------------------------------------------
# get query and test size
for tries in range(0, 4):
try:
query_lines =
print("Enter/paste SQL query, terminated by a semicolon: ")
while True:
line = input()
if ';' in line:
query_lines.append(line)
break
else:
query_lines.append(line)
query_lines_2 = ' '.join(query_lines)
query = ' '.join(query_lines_2.split())
check_size = "select count(*) from (" +
query.replace(';', '') + ")"
check_size_df = pd.read_sql_query(check_size, con)
if int(check_size_df.max().values[0]) > 1000000:
print(colored("Desired data is more than 1,000,000 rows - trim query or request a sqoop.", "red"))
else:
oracle_df = pd.read_sql_query(query.replace(';', ''), con)
write_df(oracle_df)
except BaseException: # should catch a legitimate error here...
print(colored("SQL query contains one or more errors - please try again.n", "red"))
continue
else:
break
else:
print(colored("Final query attempt failed, shutting down.", "red"))
def write_df(oracle_df):
# -------------------------------------------
# write df to directory as .csv
email_addr = conn_oracle.oracle_ntid + '@company.com' #change co name
file_loc = '/home/kemri/ADW_Data/'
filename = 'oracle_output_' + datetime.now().strftime('%Y%m%d%H%M%S') + ".csv"
oracle_df.to_csv(file_loc + filename, index=False)
subprocess.call('/home/kemri/kython/send_file.sh '.format(
file_loc + filename, filename, email_addr), shell=True)
if __name__ == '__main__':
conn_oracle()
python console oracle
edited Apr 6 at 0:42
asked Apr 4 at 21:43
kemri
212
212
add a comment |Â
add a comment |Â
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f191285%2fprogram-to-pull-data-from-an-oracle-database%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password