Wikipedia Random Page in Category Bot

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
5
down vote

favorite

I recently wrote a Python script to generate a random page within a Wikipedia category and its subcategories:

"""Generate a random page from a wikipedia category."""
import argparse
import random

import requests

DEBUGGING = False
max_depth = 4
current_depth = 0
header = "Garrett Credi's Random Page Bot(Contact @ gcc@ameritech.net)"
headerVal = 'Api-User-Agent': header
base_url = 'https://en.wikipedia.org/w/api.php'


def print_debug(str):
 """Print strings if in debug/verbose mode mode."""
 global DEBUGGING
 if(DEBUGGING):
 print("DEBUG: " + str)


def generateRequestsParams(category, mode):
 """Generate the params for requests given a category and a mode."""
 cmtype = ""
 if(mode == "Subcat"):
 cmtype = 'subcat'
 elif(mode == "Subpage"):
 cmtype = 'page'
 params = 
 'format': 'json',
 'action': 'query',
 'list': 'categorymembers',
 'cmtitle': category,
 'cmlimit': 500,
 'cmtype': cmtype
 
 if(mode == "Pagecats"):
 params = 
 'format': 'json',
 'action': 'query',
 'titles': category,
 'prop': 'categories'
 
 return params


def wrappedRequest(category, mode):
 """Wrap a request to deal with connection errors."""
 global base_url
 params = generateRequestsParams(category, mode)
 global headerVal
 max_times = 5
 times = 0
 propertyString = 'categorymembers'
 while(times < max_times):
 try:
 r = requests.get(base_url, headers=headerVal, params=params)
 if(mode != "Pagecats"):
 return r.json()['query'][propertyString]
 else:
 for key in r.json()['query']['pages']:
 return r.json()['query']['pages'][key]['categories']
 except requests.exceptions.ConnectionError as e:
 if(times > max_times):
 print_debug("category failed too many times (times) " +
 " times. Moving on".format(
 category=category,
 times=times
 )
 )
 times = 0
 return [category]
 else:
 print_debug("Retrying category due to connection " +
 " error".format(
 cateogry=category
 )
 )
 times += 1


def getSubcategories(category):
 """Get subcategories of a given subcategory."""
 global max_depth, DEBUGGING
 current_depth = 1
 singleStepSubcategories = [category]
 allSubcategories = 
 while(current_depth <= max_depth):
 print_debug("Current tree depth d".format(d=current_depth))
 subcategoryTemp = 
 if(len(singleStepSubcategories) == 0):
 break
 for subcat in singleStepSubcategories:
 allSubcategories.append(subcat)
 subcategories = wrappedRequest(subcat, mode="Subcat")
 for cat in subcategories:
 title = cat['title']
 print_debug("subcat has subcategory title".format(
 subcat=subcat,
 title=title
 )
 )
 if(title not in allSubcategories):
 allSubcategories.append(title)
 subcategoryTemp.append(title)
 else:
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
 singleStepSubcategories = subcategoryTemp
 current_depth += 1
 return allSubcategories


def saveArray(category, subcats):
 """Save array to file."""
 filename = "category_subcats.txt".format(category=category)
 print_debug("Saving to f".format(f=filename))
 with open(filename, 'w') as f:
 for cat in subcats:
 f.write(cat+"n")


def subcategoriesWithoutDuplicates(category):
 """Generate a list of subcategories without duplicates."""
 return set(getSubcategories(category))


def retreiveSubcategoriesFromLocation(category):
 """Get subcategories from file, or generate them from scratch."""
 subCats = 
 fileName = "category_subcats.txt".format(category=category)
 try:
 subCatFile = open(fileName, 'r')
 print_debug("Reading from filename".format(filename=fileName))
 for count, line in enumerate(subCatFile):
 subCats.append(line.replace("n", ""))
 subCatFile.close()
 except IOError as ioError:
 print_debug("fileName does not exist. Building from " +
 " network".format(fileName=fileName)
 )
 subCats = subcategoriesWithoutDuplicates(category)
 return subCats


def checkPageSimilarity(page, subcategories):
 """Check the similarity of page to a list of subcategories.
 Verify if page truly is a subpage of a category.
 """
 global similarityVal
 pageCats = wrappedRequest(page, mode="Pagecats")
 points = 0.0
 # For every supercategory of page, if it is also in subcategories
 # the page is more likely to be a true subpage.
 for cat in pageCats:
 title = cat['title']
 if(title in subcategories):
 points += 1.0
 score = points/len(pageCats)
 print_debug("Score of p is s".format(p=page, s=str(score)))
 if(score >= similarityVal):
 return True
 return False


def randomPage(category, save, regen, check):
 """Generate a random page from a category."""
 global DEBUGGING
 subCats = 
 read = True
 if(not regen):
 subCats = retreiveSubcategoriesFromLocation(category)
 if(regen or (not read)):
 print_debug("Rebuilding category".format(category=category))
 subCats = subcategoriesWithoutDuplicates(category)
 if(save or regen):
 saveArray(category, subCats)
 randomPage = None
 validRandomPage = True
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 while(not randomPage or not validRandomPage):
 try:
 randomPage = random.choice(pages)
 title = randomPage['title']
 if(check):
 print_debug("Checking " + title)
 validRandomPage = checkPageSimilarity(title, subCats)
 if(not validRandomPage):
 pages.remove(randomPage)
 except IndexError as a:
 print_debug("cat has no pages. Retrying".format(cat=cat))
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 return randomPage['title']


if(__name__ == "__main__"):
 parser = argparse.ArgumentParser(description='Get a random page from a ' +
 'wikipedia category')
 parser.add_argument('category', help="The category you wish to get a " +
 "page from."
 )
 parser.add_argument('--tree_depth',
 nargs='?',
 type=int,
 default=4,
 help="How far down to traverse the subcategory tree"
 )
 parser.add_argument('--similarity',
 nargs='?',
 type=float,
 default=.5,
 help="What percent of page categories need to be " +
 "in subcategory array. Must be used with -c/--check")
 parser.add_argument("-s",
 "--save",
 action="store_true",
 help="Save subcategories to a file for quick re-runs"
 )
 parser.add_argument("-r",
 "--regen",
 action="store_true",
 help="Regenerate the subcategory file"
 )
 parser.add_argument("-v",
 "--verbose",
 action="store_true",
 help="Print debug lines"
 )
 parser.add_argument("-c",
 "--check",
 action="store_true",
 help="After finding page check to see that it truly " +
 "fits in category"
 )
 args = parser.parse_args()
 print_debug(str(args.check))
 DEBUGGING = args.verbose
 max_depth = args.tree_depth
 similarityVal = args.similarity
 if(args.save):
 print_debug("Saving!")
 if(args.regen):
 print_debug("Regenerating!")

 print("https://en.wikipedia.org/wiki/" + randomPage("Category:" +
 args.category,
 save=args.save,
 regen=args.regen,
 check=args.check
 )
 )

Currently it runs fairly slowly, since it runs over networks via Python's requests module. Is there any way to make requests more efficient/faster or to get the Wikipedia subcategory tree locally (preferably in a small-ish format)?

I'd appreciate any feedback on the style/structure of my code (e.g. readability, function/variable name, function structure) and any advice on the performance of the program itself.

edited May 4 at 12:38

Daniel

4,1132836

asked May 2 at 2:20

Garrett Credi

363

add a commentÂ |Â

up vote
5
down vote

favorite

I recently wrote a Python script to generate a random page within a Wikipedia category and its subcategories:

"""Generate a random page from a wikipedia category."""
import argparse
import random

import requests

DEBUGGING = False
max_depth = 4
current_depth = 0
header = "Garrett Credi's Random Page Bot(Contact @ gcc@ameritech.net)"
headerVal = 'Api-User-Agent': header
base_url = 'https://en.wikipedia.org/w/api.php'


def print_debug(str):
 """Print strings if in debug/verbose mode mode."""
 global DEBUGGING
 if(DEBUGGING):
 print("DEBUG: " + str)


def generateRequestsParams(category, mode):
 """Generate the params for requests given a category and a mode."""
 cmtype = ""
 if(mode == "Subcat"):
 cmtype = 'subcat'
 elif(mode == "Subpage"):
 cmtype = 'page'
 params = 
 'format': 'json',
 'action': 'query',
 'list': 'categorymembers',
 'cmtitle': category,
 'cmlimit': 500,
 'cmtype': cmtype
 
 if(mode == "Pagecats"):
 params = 
 'format': 'json',
 'action': 'query',
 'titles': category,
 'prop': 'categories'
 
 return params


def wrappedRequest(category, mode):
 """Wrap a request to deal with connection errors."""
 global base_url
 params = generateRequestsParams(category, mode)
 global headerVal
 max_times = 5
 times = 0
 propertyString = 'categorymembers'
 while(times < max_times):
 try:
 r = requests.get(base_url, headers=headerVal, params=params)
 if(mode != "Pagecats"):
 return r.json()['query'][propertyString]
 else:
 for key in r.json()['query']['pages']:
 return r.json()['query']['pages'][key]['categories']
 except requests.exceptions.ConnectionError as e:
 if(times > max_times):
 print_debug("category failed too many times (times) " +
 " times. Moving on".format(
 category=category,
 times=times
 )
 )
 times = 0
 return [category]
 else:
 print_debug("Retrying category due to connection " +
 " error".format(
 cateogry=category
 )
 )
 times += 1


def getSubcategories(category):
 """Get subcategories of a given subcategory."""
 global max_depth, DEBUGGING
 current_depth = 1
 singleStepSubcategories = [category]
 allSubcategories = 
 while(current_depth <= max_depth):
 print_debug("Current tree depth d".format(d=current_depth))
 subcategoryTemp = 
 if(len(singleStepSubcategories) == 0):
 break
 for subcat in singleStepSubcategories:
 allSubcategories.append(subcat)
 subcategories = wrappedRequest(subcat, mode="Subcat")
 for cat in subcategories:
 title = cat['title']
 print_debug("subcat has subcategory title".format(
 subcat=subcat,
 title=title
 )
 )
 if(title not in allSubcategories):
 allSubcategories.append(title)
 subcategoryTemp.append(title)
 else:
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
 singleStepSubcategories = subcategoryTemp
 current_depth += 1
 return allSubcategories


def saveArray(category, subcats):
 """Save array to file."""
 filename = "category_subcats.txt".format(category=category)
 print_debug("Saving to f".format(f=filename))
 with open(filename, 'w') as f:
 for cat in subcats:
 f.write(cat+"n")


def subcategoriesWithoutDuplicates(category):
 """Generate a list of subcategories without duplicates."""
 return set(getSubcategories(category))


def retreiveSubcategoriesFromLocation(category):
 """Get subcategories from file, or generate them from scratch."""
 subCats = 
 fileName = "category_subcats.txt".format(category=category)
 try:
 subCatFile = open(fileName, 'r')
 print_debug("Reading from filename".format(filename=fileName))
 for count, line in enumerate(subCatFile):
 subCats.append(line.replace("n", ""))
 subCatFile.close()
 except IOError as ioError:
 print_debug("fileName does not exist. Building from " +
 " network".format(fileName=fileName)
 )
 subCats = subcategoriesWithoutDuplicates(category)
 return subCats


def checkPageSimilarity(page, subcategories):
 """Check the similarity of page to a list of subcategories.
 Verify if page truly is a subpage of a category.
 """
 global similarityVal
 pageCats = wrappedRequest(page, mode="Pagecats")
 points = 0.0
 # For every supercategory of page, if it is also in subcategories
 # the page is more likely to be a true subpage.
 for cat in pageCats:
 title = cat['title']
 if(title in subcategories):
 points += 1.0
 score = points/len(pageCats)
 print_debug("Score of p is s".format(p=page, s=str(score)))
 if(score >= similarityVal):
 return True
 return False


def randomPage(category, save, regen, check):
 """Generate a random page from a category."""
 global DEBUGGING
 subCats = 
 read = True
 if(not regen):
 subCats = retreiveSubcategoriesFromLocation(category)
 if(regen or (not read)):
 print_debug("Rebuilding category".format(category=category))
 subCats = subcategoriesWithoutDuplicates(category)
 if(save or regen):
 saveArray(category, subCats)
 randomPage = None
 validRandomPage = True
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 while(not randomPage or not validRandomPage):
 try:
 randomPage = random.choice(pages)
 title = randomPage['title']
 if(check):
 print_debug("Checking " + title)
 validRandomPage = checkPageSimilarity(title, subCats)
 if(not validRandomPage):
 pages.remove(randomPage)
 except IndexError as a:
 print_debug("cat has no pages. Retrying".format(cat=cat))
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 return randomPage['title']


if(__name__ == "__main__"):
 parser = argparse.ArgumentParser(description='Get a random page from a ' +
 'wikipedia category')
 parser.add_argument('category', help="The category you wish to get a " +
 "page from."
 )
 parser.add_argument('--tree_depth',
 nargs='?',
 type=int,
 default=4,
 help="How far down to traverse the subcategory tree"
 )
 parser.add_argument('--similarity',
 nargs='?',
 type=float,
 default=.5,
 help="What percent of page categories need to be " +
 "in subcategory array. Must be used with -c/--check")
 parser.add_argument("-s",
 "--save",
 action="store_true",
 help="Save subcategories to a file for quick re-runs"
 )
 parser.add_argument("-r",
 "--regen",
 action="store_true",
 help="Regenerate the subcategory file"
 )
 parser.add_argument("-v",
 "--verbose",
 action="store_true",
 help="Print debug lines"
 )
 parser.add_argument("-c",
 "--check",
 action="store_true",
 help="After finding page check to see that it truly " +
 "fits in category"
 )
 args = parser.parse_args()
 print_debug(str(args.check))
 DEBUGGING = args.verbose
 max_depth = args.tree_depth
 similarityVal = args.similarity
 if(args.save):
 print_debug("Saving!")
 if(args.regen):
 print_debug("Regenerating!")

 print("https://en.wikipedia.org/wiki/" + randomPage("Category:" +
 args.category,
 save=args.save,
 regen=args.regen,
 check=args.check
 )
 )

I'd appreciate any feedback on the style/structure of my code (e.g. readability, function/variable name, function structure) and any advice on the performance of the program itself.

edited May 4 at 12:38

Daniel

4,1132836

asked May 2 at 2:20

Garrett Credi

363

add a commentÂ |Â

up vote
5
down vote

favorite

I recently wrote a Python script to generate a random page within a Wikipedia category and its subcategories:

"""Generate a random page from a wikipedia category."""
import argparse
import random

import requests

DEBUGGING = False
max_depth = 4
current_depth = 0
header = "Garrett Credi's Random Page Bot(Contact @ gcc@ameritech.net)"
headerVal = 'Api-User-Agent': header
base_url = 'https://en.wikipedia.org/w/api.php'


def print_debug(str):
 """Print strings if in debug/verbose mode mode."""
 global DEBUGGING
 if(DEBUGGING):
 print("DEBUG: " + str)


def generateRequestsParams(category, mode):
 """Generate the params for requests given a category and a mode."""
 cmtype = ""
 if(mode == "Subcat"):
 cmtype = 'subcat'
 elif(mode == "Subpage"):
 cmtype = 'page'
 params = 
 'format': 'json',
 'action': 'query',
 'list': 'categorymembers',
 'cmtitle': category,
 'cmlimit': 500,
 'cmtype': cmtype
 
 if(mode == "Pagecats"):
 params = 
 'format': 'json',
 'action': 'query',
 'titles': category,
 'prop': 'categories'
 
 return params


def wrappedRequest(category, mode):
 """Wrap a request to deal with connection errors."""
 global base_url
 params = generateRequestsParams(category, mode)
 global headerVal
 max_times = 5
 times = 0
 propertyString = 'categorymembers'
 while(times < max_times):
 try:
 r = requests.get(base_url, headers=headerVal, params=params)
 if(mode != "Pagecats"):
 return r.json()['query'][propertyString]
 else:
 for key in r.json()['query']['pages']:
 return r.json()['query']['pages'][key]['categories']
 except requests.exceptions.ConnectionError as e:
 if(times > max_times):
 print_debug("category failed too many times (times) " +
 " times. Moving on".format(
 category=category,
 times=times
 )
 )
 times = 0
 return [category]
 else:
 print_debug("Retrying category due to connection " +
 " error".format(
 cateogry=category
 )
 )
 times += 1


def getSubcategories(category):
 """Get subcategories of a given subcategory."""
 global max_depth, DEBUGGING
 current_depth = 1
 singleStepSubcategories = [category]
 allSubcategories = 
 while(current_depth <= max_depth):
 print_debug("Current tree depth d".format(d=current_depth))
 subcategoryTemp = 
 if(len(singleStepSubcategories) == 0):
 break
 for subcat in singleStepSubcategories:
 allSubcategories.append(subcat)
 subcategories = wrappedRequest(subcat, mode="Subcat")
 for cat in subcategories:
 title = cat['title']
 print_debug("subcat has subcategory title".format(
 subcat=subcat,
 title=title
 )
 )
 if(title not in allSubcategories):
 allSubcategories.append(title)
 subcategoryTemp.append(title)
 else:
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
 singleStepSubcategories = subcategoryTemp
 current_depth += 1
 return allSubcategories


def saveArray(category, subcats):
 """Save array to file."""
 filename = "category_subcats.txt".format(category=category)
 print_debug("Saving to f".format(f=filename))
 with open(filename, 'w') as f:
 for cat in subcats:
 f.write(cat+"n")


def subcategoriesWithoutDuplicates(category):
 """Generate a list of subcategories without duplicates."""
 return set(getSubcategories(category))


def retreiveSubcategoriesFromLocation(category):
 """Get subcategories from file, or generate them from scratch."""
 subCats = 
 fileName = "category_subcats.txt".format(category=category)
 try:
 subCatFile = open(fileName, 'r')
 print_debug("Reading from filename".format(filename=fileName))
 for count, line in enumerate(subCatFile):
 subCats.append(line.replace("n", ""))
 subCatFile.close()
 except IOError as ioError:
 print_debug("fileName does not exist. Building from " +
 " network".format(fileName=fileName)
 )
 subCats = subcategoriesWithoutDuplicates(category)
 return subCats


def checkPageSimilarity(page, subcategories):
 """Check the similarity of page to a list of subcategories.
 Verify if page truly is a subpage of a category.
 """
 global similarityVal
 pageCats = wrappedRequest(page, mode="Pagecats")
 points = 0.0
 # For every supercategory of page, if it is also in subcategories
 # the page is more likely to be a true subpage.
 for cat in pageCats:
 title = cat['title']
 if(title in subcategories):
 points += 1.0
 score = points/len(pageCats)
 print_debug("Score of p is s".format(p=page, s=str(score)))
 if(score >= similarityVal):
 return True
 return False


def randomPage(category, save, regen, check):
 """Generate a random page from a category."""
 global DEBUGGING
 subCats = 
 read = True
 if(not regen):
 subCats = retreiveSubcategoriesFromLocation(category)
 if(regen or (not read)):
 print_debug("Rebuilding category".format(category=category))
 subCats = subcategoriesWithoutDuplicates(category)
 if(save or regen):
 saveArray(category, subCats)
 randomPage = None
 validRandomPage = True
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 while(not randomPage or not validRandomPage):
 try:
 randomPage = random.choice(pages)
 title = randomPage['title']
 if(check):
 print_debug("Checking " + title)
 validRandomPage = checkPageSimilarity(title, subCats)
 if(not validRandomPage):
 pages.remove(randomPage)
 except IndexError as a:
 print_debug("cat has no pages. Retrying".format(cat=cat))
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 return randomPage['title']


if(__name__ == "__main__"):
 parser = argparse.ArgumentParser(description='Get a random page from a ' +
 'wikipedia category')
 parser.add_argument('category', help="The category you wish to get a " +
 "page from."
 )
 parser.add_argument('--tree_depth',
 nargs='?',
 type=int,
 default=4,
 help="How far down to traverse the subcategory tree"
 )
 parser.add_argument('--similarity',
 nargs='?',
 type=float,
 default=.5,
 help="What percent of page categories need to be " +
 "in subcategory array. Must be used with -c/--check")
 parser.add_argument("-s",
 "--save",
 action="store_true",
 help="Save subcategories to a file for quick re-runs"
 )
 parser.add_argument("-r",
 "--regen",
 action="store_true",
 help="Regenerate the subcategory file"
 )
 parser.add_argument("-v",
 "--verbose",
 action="store_true",
 help="Print debug lines"
 )
 parser.add_argument("-c",
 "--check",
 action="store_true",
 help="After finding page check to see that it truly " +
 "fits in category"
 )
 args = parser.parse_args()
 print_debug(str(args.check))
 DEBUGGING = args.verbose
 max_depth = args.tree_depth
 similarityVal = args.similarity
 if(args.save):
 print_debug("Saving!")
 if(args.regen):
 print_debug("Regenerating!")

 print("https://en.wikipedia.org/wiki/" + randomPage("Category:" +
 args.category,
 save=args.save,
 regen=args.regen,
 check=args.check
 )
 )

I'd appreciate any feedback on the style/structure of my code (e.g. readability, function/variable name, function structure) and any advice on the performance of the program itself.

edited May 4 at 12:38

Daniel

4,1132836

asked May 2 at 2:20

Garrett Credi

363

I recently wrote a Python script to generate a random page within a Wikipedia category and its subcategories:

"""Generate a random page from a wikipedia category."""
import argparse
import random

import requests

DEBUGGING = False
max_depth = 4
current_depth = 0
header = "Garrett Credi's Random Page Bot(Contact @ gcc@ameritech.net)"
headerVal = 'Api-User-Agent': header
base_url = 'https://en.wikipedia.org/w/api.php'


def print_debug(str):
 """Print strings if in debug/verbose mode mode."""
 global DEBUGGING
 if(DEBUGGING):
 print("DEBUG: " + str)


def generateRequestsParams(category, mode):
 """Generate the params for requests given a category and a mode."""
 cmtype = ""
 if(mode == "Subcat"):
 cmtype = 'subcat'
 elif(mode == "Subpage"):
 cmtype = 'page'
 params = 
 'format': 'json',
 'action': 'query',
 'list': 'categorymembers',
 'cmtitle': category,
 'cmlimit': 500,
 'cmtype': cmtype
 
 if(mode == "Pagecats"):
 params = 
 'format': 'json',
 'action': 'query',
 'titles': category,
 'prop': 'categories'
 
 return params


def wrappedRequest(category, mode):
 """Wrap a request to deal with connection errors."""
 global base_url
 params = generateRequestsParams(category, mode)
 global headerVal
 max_times = 5
 times = 0
 propertyString = 'categorymembers'
 while(times < max_times):
 try:
 r = requests.get(base_url, headers=headerVal, params=params)
 if(mode != "Pagecats"):
 return r.json()['query'][propertyString]
 else:
 for key in r.json()['query']['pages']:
 return r.json()['query']['pages'][key]['categories']
 except requests.exceptions.ConnectionError as e:
 if(times > max_times):
 print_debug("category failed too many times (times) " +
 " times. Moving on".format(
 category=category,
 times=times
 )
 )
 times = 0
 return [category]
 else:
 print_debug("Retrying category due to connection " +
 " error".format(
 cateogry=category
 )
 )
 times += 1


def getSubcategories(category):
 """Get subcategories of a given subcategory."""
 global max_depth, DEBUGGING
 current_depth = 1
 singleStepSubcategories = [category]
 allSubcategories = 
 while(current_depth <= max_depth):
 print_debug("Current tree depth d".format(d=current_depth))
 subcategoryTemp = 
 if(len(singleStepSubcategories) == 0):
 break
 for subcat in singleStepSubcategories:
 allSubcategories.append(subcat)
 subcategories = wrappedRequest(subcat, mode="Subcat")
 for cat in subcategories:
 title = cat['title']
 print_debug("subcat has subcategory title".format(
 subcat=subcat,
 title=title
 )
 )
 if(title not in allSubcategories):
 allSubcategories.append(title)
 subcategoryTemp.append(title)
 else:
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
 singleStepSubcategories = subcategoryTemp
 current_depth += 1
 return allSubcategories


def saveArray(category, subcats):
 """Save array to file."""
 filename = "category_subcats.txt".format(category=category)
 print_debug("Saving to f".format(f=filename))
 with open(filename, 'w') as f:
 for cat in subcats:
 f.write(cat+"n")


def subcategoriesWithoutDuplicates(category):
 """Generate a list of subcategories without duplicates."""
 return set(getSubcategories(category))


def retreiveSubcategoriesFromLocation(category):
 """Get subcategories from file, or generate them from scratch."""
 subCats = 
 fileName = "category_subcats.txt".format(category=category)
 try:
 subCatFile = open(fileName, 'r')
 print_debug("Reading from filename".format(filename=fileName))
 for count, line in enumerate(subCatFile):
 subCats.append(line.replace("n", ""))
 subCatFile.close()
 except IOError as ioError:
 print_debug("fileName does not exist. Building from " +
 " network".format(fileName=fileName)
 )
 subCats = subcategoriesWithoutDuplicates(category)
 return subCats


def checkPageSimilarity(page, subcategories):
 """Check the similarity of page to a list of subcategories.
 Verify if page truly is a subpage of a category.
 """
 global similarityVal
 pageCats = wrappedRequest(page, mode="Pagecats")
 points = 0.0
 # For every supercategory of page, if it is also in subcategories
 # the page is more likely to be a true subpage.
 for cat in pageCats:
 title = cat['title']
 if(title in subcategories):
 points += 1.0
 score = points/len(pageCats)
 print_debug("Score of p is s".format(p=page, s=str(score)))
 if(score >= similarityVal):
 return True
 return False


def randomPage(category, save, regen, check):
 """Generate a random page from a category."""
 global DEBUGGING
 subCats = 
 read = True
 if(not regen):
 subCats = retreiveSubcategoriesFromLocation(category)
 if(regen or (not read)):
 print_debug("Rebuilding category".format(category=category))
 subCats = subcategoriesWithoutDuplicates(category)
 if(save or regen):
 saveArray(category, subCats)
 randomPage = None
 validRandomPage = True
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 while(not randomPage or not validRandomPage):
 try:
 randomPage = random.choice(pages)
 title = randomPage['title']
 if(check):
 print_debug("Checking " + title)
 validRandomPage = checkPageSimilarity(title, subCats)
 if(not validRandomPage):
 pages.remove(randomPage)
 except IndexError as a:
 print_debug("cat has no pages. Retrying".format(cat=cat))
 cat = random.sample(subCats, 1)[0]
 print_debug("Chose category cat".format(cat=cat))
 pages = wrappedRequest(cat, mode="Subpage")
 return randomPage['title']


if(__name__ == "__main__"):
 parser = argparse.ArgumentParser(description='Get a random page from a ' +
 'wikipedia category')
 parser.add_argument('category', help="The category you wish to get a " +
 "page from."
 )
 parser.add_argument('--tree_depth',
 nargs='?',
 type=int,
 default=4,
 help="How far down to traverse the subcategory tree"
 )
 parser.add_argument('--similarity',
 nargs='?',
 type=float,
 default=.5,
 help="What percent of page categories need to be " +
 "in subcategory array. Must be used with -c/--check")
 parser.add_argument("-s",
 "--save",
 action="store_true",
 help="Save subcategories to a file for quick re-runs"
 )
 parser.add_argument("-r",
 "--regen",
 action="store_true",
 help="Regenerate the subcategory file"
 )
 parser.add_argument("-v",
 "--verbose",
 action="store_true",
 help="Print debug lines"
 )
 parser.add_argument("-c",
 "--check",
 action="store_true",
 help="After finding page check to see that it truly " +
 "fits in category"
 )
 args = parser.parse_args()
 print_debug(str(args.check))
 DEBUGGING = args.verbose
 max_depth = args.tree_depth
 similarityVal = args.similarity
 if(args.save):
 print_debug("Saving!")
 if(args.regen):
 print_debug("Regenerating!")

 print("https://en.wikipedia.org/wiki/" + randomPage("Category:" +
 args.category,
 save=args.save,
 regen=args.regen,
 check=args.check
 )
 )

I'd appreciate any feedback on the style/structure of my code (e.g. readability, function/variable name, function structure) and any advice on the performance of the program itself.

edited May 4 at 12:38

Daniel

4,1132836

asked May 2 at 2:20

Garrett Credi

363

edited May 4 at 12:38

Daniel

4,1132836

edited May 4 at 12:38

Daniel

4,1132836

edited May 4 at 12:38

Daniel

4,1132836

asked May 2 at 2:20

Garrett Credi

363

asked May 2 at 2:20

Garrett Credi

363

asked May 2 at 2:20

Garrett Credi

363

add a commentÂ |Â

1 Answer
1

active

oldest

votes

up vote
2
down vote

General

In print_debug(), you don't have to use the global keyword to refer to DEBUGGING. If the Python interpreter can't find the name DEBUGGING locally, it will then search for it globally. If it still can't find it, a NameError is raised. The only two reasons to use global are:
1. When you have a local and a global variable with the same name, and you explicitly want to refer to the global variable;
2. In local scope, when you need to (re)assign to a global variable.

In print_debug(), you're shadowing the built-in str. To avoid shadowing a variable, by convention, you should add a trailing underscore (as in str_). If you find that ugly, you can also spell it out, or abbreviate further: string or s (the former is more desirable).

You don't need parentheses around if-statements and while-statements.

The idiomatic way of checking if a container is empty in Python is to directly use it in an if-statement, in this fashion:
```
if not container:
 # Container is empty
```
... this works because a container's __bool__() method returns True if it is not empty, and False otherwise.

When catching an exception, if you don't need access to the exception instance itself, you should leave out the as ... part.

If you do need access to the exception instance, most people use:
```
except <exception type> as exc:
```
... or:
```
except <exception type> as err:
```

The following:

if <boolean expression>:
 return True
return False

... can be shortened to:

return <boolean expression>

In randomPage(), the following:
```
if(regen or (not read)):
```
... can be simplified to become:
```
if regen or not read:
```

Avoid global variables. They are a telltale sign of a design problem in your code. Global constants are acceptable, but non-constant global variables can cause all kinds of trouble:
1. It's hard to track where they are being used and modified. This problem becomes very prominent when using threads;
2. Loading a global variable is more costly than loading a local one;
3. If you design an API and have lots of global variables floating about, when someone performs a wildcard import, their global namespace will be cluttered.

Generally, if you find yourself doing this:
```
my_list = 
for x in some_container:
 if some_condition_applies(x):
 my_list.append(x)
```
... a list comprehension would be a good fit:
```
my_list = [x for x in some_container if some_condition_applies(x)]
```
A list comprehensions is shorter and often faster than its for-loop counterpart.

Debug messages should be sent to stderr, not stdout:

import sys

...

print("DEBUG: " + str, file=sys.stderr)

PEP-8

PEP-8 is the name of the official Python style guide. You violated it a couple of times:

Your indentation is all over the place. Sorry, but it had to be said. Take this excerpt:
```
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
```
... isn't that hard to read? Something like this would be much easier on the eyes:
```
print_debug(
 "t already checked. Moving on".format(t=title)
)
```
... or this:
```
print_debug("t already checked. Moving on".format(
 t=title)
)
```
Ultimately, it's up to you. PEP-8 also lists some examples of acceptable styles.¹

Use snake_case for function and variable names. Only use UPPERCASE_WITH_UNDERSCORES for constants.²

Don't mix single and double quotes. I prefer double quotes, because they're less likely to cause clashes with quotation marks in flowing text.³

Limit the line length to 79 characters.⁴

Docstrings

Good job on adding docstrings to your functions and to the module itself. Most people (including me) often can't bring up the effort to add documentation, but you have! :) If you want to help yourself understand the code in 6 months' time, or if you want to publish this as an API, you should be a bit more thorough, for instance by adding information about:
- What arguments the function takes, what type they should be, and exactly what they convey;
- The return type of the value;
- Any special cases the caller should be aware of.
There's even a style guide for docstrings: PEP-257.

Performance

I don't currently have the time to do a full review on performance, but I suggest you run a profiler to see where you can optimize. The problem is very likely to be I/O-bound, but the debugging can have a noticable performance hit.

If you want to squeeze a little bit of extra speed out of requests, you can use a requests.Session object, which:

... [omitted] will use urllib3's connection pooling. So if you're making several requests to the same host, the underlying TCP connection will be reused, which can result in a significant performance increase ... [omitted]

References

1 PEP-8: Indentation

2 PEP-8: Function and variable names

3 PEP-8: String Quotes

4 PEP-8: Maximum Line Length

edited May 4 at 11:27

answered May 4 at 10:34

Daniel

4,1132836

add a commentÂ |Â

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f193411%2fwikipedia-random-page-in-category-bot%23new-answer', 'question_page');

);

Post as a guest

Name

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

up vote
2
down vote

General

In print_debug(), you don't have to use the global keyword to refer to DEBUGGING. If the Python interpreter can't find the name DEBUGGING locally, it will then search for it globally. If it still can't find it, a NameError is raised. The only two reasons to use global are:
1. When you have a local and a global variable with the same name, and you explicitly want to refer to the global variable;
2. In local scope, when you need to (re)assign to a global variable.

In print_debug(), you're shadowing the built-in str. To avoid shadowing a variable, by convention, you should add a trailing underscore (as in str_). If you find that ugly, you can also spell it out, or abbreviate further: string or s (the former is more desirable).

You don't need parentheses around if-statements and while-statements.

The idiomatic way of checking if a container is empty in Python is to directly use it in an if-statement, in this fashion:
```
if not container:
 # Container is empty
```
... this works because a container's __bool__() method returns True if it is not empty, and False otherwise.

When catching an exception, if you don't need access to the exception instance itself, you should leave out the as ... part.

If you do need access to the exception instance, most people use:
```
except <exception type> as exc:
```
... or:
```
except <exception type> as err:
```

The following:

if <boolean expression>:
 return True
return False

... can be shortened to:

return <boolean expression>

In randomPage(), the following:
```
if(regen or (not read)):
```
... can be simplified to become:
```
if regen or not read:
```

Avoid global variables. They are a telltale sign of a design problem in your code. Global constants are acceptable, but non-constant global variables can cause all kinds of trouble:
1. It's hard to track where they are being used and modified. This problem becomes very prominent when using threads;
2. Loading a global variable is more costly than loading a local one;
3. If you design an API and have lots of global variables floating about, when someone performs a wildcard import, their global namespace will be cluttered.

Generally, if you find yourself doing this:
```
my_list = 
for x in some_container:
 if some_condition_applies(x):
 my_list.append(x)
```
... a list comprehension would be a good fit:
```
my_list = [x for x in some_container if some_condition_applies(x)]
```
A list comprehensions is shorter and often faster than its for-loop counterpart.

Debug messages should be sent to stderr, not stdout:

import sys

...

print("DEBUG: " + str, file=sys.stderr)

PEP-8

PEP-8 is the name of the official Python style guide. You violated it a couple of times:

Your indentation is all over the place. Sorry, but it had to be said. Take this excerpt:
```
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
```
... isn't that hard to read? Something like this would be much easier on the eyes:
```
print_debug(
 "t already checked. Moving on".format(t=title)
)
```
... or this:
```
print_debug("t already checked. Moving on".format(
 t=title)
)
```
Ultimately, it's up to you. PEP-8 also lists some examples of acceptable styles.¹

Use snake_case for function and variable names. Only use UPPERCASE_WITH_UNDERSCORES for constants.²

Don't mix single and double quotes. I prefer double quotes, because they're less likely to cause clashes with quotation marks in flowing text.³

Limit the line length to 79 characters.⁴

Docstrings

Good job on adding docstrings to your functions and to the module itself. Most people (including me) often can't bring up the effort to add documentation, but you have! :) If you want to help yourself understand the code in 6 months' time, or if you want to publish this as an API, you should be a bit more thorough, for instance by adding information about:
- What arguments the function takes, what type they should be, and exactly what they convey;
- The return type of the value;
- Any special cases the caller should be aware of.
There's even a style guide for docstrings: PEP-257.

Performance

If you want to squeeze a little bit of extra speed out of requests, you can use a requests.Session object, which:

... [omitted] will use urllib3's connection pooling. So if you're making several requests to the same host, the underlying TCP connection will be reused, which can result in a significant performance increase ... [omitted]

References

1 PEP-8: Indentation

2 PEP-8: Function and variable names

3 PEP-8: String Quotes

4 PEP-8: Maximum Line Length

edited May 4 at 11:27

answered May 4 at 10:34

Daniel

4,1132836

add a commentÂ |Â

up vote
2
down vote

General

In print_debug(), you don't have to use the global keyword to refer to DEBUGGING. If the Python interpreter can't find the name DEBUGGING locally, it will then search for it globally. If it still can't find it, a NameError is raised. The only two reasons to use global are:
1. When you have a local and a global variable with the same name, and you explicitly want to refer to the global variable;
2. In local scope, when you need to (re)assign to a global variable.

In print_debug(), you're shadowing the built-in str. To avoid shadowing a variable, by convention, you should add a trailing underscore (as in str_). If you find that ugly, you can also spell it out, or abbreviate further: string or s (the former is more desirable).

You don't need parentheses around if-statements and while-statements.

The idiomatic way of checking if a container is empty in Python is to directly use it in an if-statement, in this fashion:
```
if not container:
 # Container is empty
```
... this works because a container's __bool__() method returns True if it is not empty, and False otherwise.

When catching an exception, if you don't need access to the exception instance itself, you should leave out the as ... part.

If you do need access to the exception instance, most people use:
```
except <exception type> as exc:
```
... or:
```
except <exception type> as err:
```

The following:

if <boolean expression>:
 return True
return False

... can be shortened to:

return <boolean expression>

In randomPage(), the following:
```
if(regen or (not read)):
```
... can be simplified to become:
```
if regen or not read:
```

Avoid global variables. They are a telltale sign of a design problem in your code. Global constants are acceptable, but non-constant global variables can cause all kinds of trouble:
1. It's hard to track where they are being used and modified. This problem becomes very prominent when using threads;
2. Loading a global variable is more costly than loading a local one;
3. If you design an API and have lots of global variables floating about, when someone performs a wildcard import, their global namespace will be cluttered.

Generally, if you find yourself doing this:
```
my_list = 
for x in some_container:
 if some_condition_applies(x):
 my_list.append(x)
```
... a list comprehension would be a good fit:
```
my_list = [x for x in some_container if some_condition_applies(x)]
```
A list comprehensions is shorter and often faster than its for-loop counterpart.

Debug messages should be sent to stderr, not stdout:

import sys

...

print("DEBUG: " + str, file=sys.stderr)

PEP-8

PEP-8 is the name of the official Python style guide. You violated it a couple of times:

Your indentation is all over the place. Sorry, but it had to be said. Take this excerpt:
```
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
```
... isn't that hard to read? Something like this would be much easier on the eyes:
```
print_debug(
 "t already checked. Moving on".format(t=title)
)
```
... or this:
```
print_debug("t already checked. Moving on".format(
 t=title)
)
```
Ultimately, it's up to you. PEP-8 also lists some examples of acceptable styles.¹

Use snake_case for function and variable names. Only use UPPERCASE_WITH_UNDERSCORES for constants.²

Don't mix single and double quotes. I prefer double quotes, because they're less likely to cause clashes with quotation marks in flowing text.³

Limit the line length to 79 characters.⁴

Docstrings

Good job on adding docstrings to your functions and to the module itself. Most people (including me) often can't bring up the effort to add documentation, but you have! :) If you want to help yourself understand the code in 6 months' time, or if you want to publish this as an API, you should be a bit more thorough, for instance by adding information about:
- What arguments the function takes, what type they should be, and exactly what they convey;
- The return type of the value;
- Any special cases the caller should be aware of.
There's even a style guide for docstrings: PEP-257.

Performance

If you want to squeeze a little bit of extra speed out of requests, you can use a requests.Session object, which:

... [omitted] will use urllib3's connection pooling. So if you're making several requests to the same host, the underlying TCP connection will be reused, which can result in a significant performance increase ... [omitted]

References

1 PEP-8: Indentation

2 PEP-8: Function and variable names

3 PEP-8: String Quotes

4 PEP-8: Maximum Line Length

edited May 4 at 11:27

answered May 4 at 10:34

Daniel

4,1132836

add a commentÂ |Â

up vote
2
down vote

General

In print_debug(), you don't have to use the global keyword to refer to DEBUGGING. If the Python interpreter can't find the name DEBUGGING locally, it will then search for it globally. If it still can't find it, a NameError is raised. The only two reasons to use global are:
1. When you have a local and a global variable with the same name, and you explicitly want to refer to the global variable;
2. In local scope, when you need to (re)assign to a global variable.

In print_debug(), you're shadowing the built-in str. To avoid shadowing a variable, by convention, you should add a trailing underscore (as in str_). If you find that ugly, you can also spell it out, or abbreviate further: string or s (the former is more desirable).

You don't need parentheses around if-statements and while-statements.

The idiomatic way of checking if a container is empty in Python is to directly use it in an if-statement, in this fashion:
```
if not container:
 # Container is empty
```
... this works because a container's __bool__() method returns True if it is not empty, and False otherwise.

When catching an exception, if you don't need access to the exception instance itself, you should leave out the as ... part.

If you do need access to the exception instance, most people use:
```
except <exception type> as exc:
```
... or:
```
except <exception type> as err:
```

The following:

if <boolean expression>:
 return True
return False

... can be shortened to:

return <boolean expression>

In randomPage(), the following:
```
if(regen or (not read)):
```
... can be simplified to become:
```
if regen or not read:
```

Avoid global variables. They are a telltale sign of a design problem in your code. Global constants are acceptable, but non-constant global variables can cause all kinds of trouble:
1. It's hard to track where they are being used and modified. This problem becomes very prominent when using threads;
2. Loading a global variable is more costly than loading a local one;
3. If you design an API and have lots of global variables floating about, when someone performs a wildcard import, their global namespace will be cluttered.

Generally, if you find yourself doing this:
```
my_list = 
for x in some_container:
 if some_condition_applies(x):
 my_list.append(x)
```
... a list comprehension would be a good fit:
```
my_list = [x for x in some_container if some_condition_applies(x)]
```
A list comprehensions is shorter and often faster than its for-loop counterpart.

Debug messages should be sent to stderr, not stdout:

import sys

...

print("DEBUG: " + str, file=sys.stderr)

PEP-8

PEP-8 is the name of the official Python style guide. You violated it a couple of times:

Your indentation is all over the place. Sorry, but it had to be said. Take this excerpt:
```
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
```
... isn't that hard to read? Something like this would be much easier on the eyes:
```
print_debug(
 "t already checked. Moving on".format(t=title)
)
```
... or this:
```
print_debug("t already checked. Moving on".format(
 t=title)
)
```
Ultimately, it's up to you. PEP-8 also lists some examples of acceptable styles.¹

Use snake_case for function and variable names. Only use UPPERCASE_WITH_UNDERSCORES for constants.²

Don't mix single and double quotes. I prefer double quotes, because they're less likely to cause clashes with quotation marks in flowing text.³

Limit the line length to 79 characters.⁴

Docstrings

Good job on adding docstrings to your functions and to the module itself. Most people (including me) often can't bring up the effort to add documentation, but you have! :) If you want to help yourself understand the code in 6 months' time, or if you want to publish this as an API, you should be a bit more thorough, for instance by adding information about:
- What arguments the function takes, what type they should be, and exactly what they convey;
- The return type of the value;
- Any special cases the caller should be aware of.
There's even a style guide for docstrings: PEP-257.

Performance

If you want to squeeze a little bit of extra speed out of requests, you can use a requests.Session object, which:

... [omitted] will use urllib3's connection pooling. So if you're making several requests to the same host, the underlying TCP connection will be reused, which can result in a significant performance increase ... [omitted]

References

1 PEP-8: Indentation

2 PEP-8: Function and variable names

3 PEP-8: String Quotes

4 PEP-8: Maximum Line Length

edited May 4 at 11:27

answered May 4 at 10:34

Daniel

4,1132836

General

In print_debug(), you don't have to use the global keyword to refer to DEBUGGING. If the Python interpreter can't find the name DEBUGGING locally, it will then search for it globally. If it still can't find it, a NameError is raised. The only two reasons to use global are:
1. When you have a local and a global variable with the same name, and you explicitly want to refer to the global variable;
2. In local scope, when you need to (re)assign to a global variable.

In print_debug(), you're shadowing the built-in str. To avoid shadowing a variable, by convention, you should add a trailing underscore (as in str_). If you find that ugly, you can also spell it out, or abbreviate further: string or s (the former is more desirable).

You don't need parentheses around if-statements and while-statements.

The idiomatic way of checking if a container is empty in Python is to directly use it in an if-statement, in this fashion:
```
if not container:
 # Container is empty
```
... this works because a container's __bool__() method returns True if it is not empty, and False otherwise.

When catching an exception, if you don't need access to the exception instance itself, you should leave out the as ... part.

If you do need access to the exception instance, most people use:
```
except <exception type> as exc:
```
... or:
```
except <exception type> as err:
```

The following:

if <boolean expression>:
 return True
return False

... can be shortened to:

return <boolean expression>

In randomPage(), the following:
```
if(regen or (not read)):
```
... can be simplified to become:
```
if regen or not read:
```

Avoid global variables. They are a telltale sign of a design problem in your code. Global constants are acceptable, but non-constant global variables can cause all kinds of trouble:
1. It's hard to track where they are being used and modified. This problem becomes very prominent when using threads;
2. Loading a global variable is more costly than loading a local one;
3. If you design an API and have lots of global variables floating about, when someone performs a wildcard import, their global namespace will be cluttered.

Generally, if you find yourself doing this:
```
my_list = 
for x in some_container:
 if some_condition_applies(x):
 my_list.append(x)
```
... a list comprehension would be a good fit:
```
my_list = [x for x in some_container if some_condition_applies(x)]
```
A list comprehensions is shorter and often faster than its for-loop counterpart.

Debug messages should be sent to stderr, not stdout:

import sys

...

print("DEBUG: " + str, file=sys.stderr)

PEP-8

PEP-8 is the name of the official Python style guide. You violated it a couple of times:

Your indentation is all over the place. Sorry, but it had to be said. Take this excerpt:
```
 print_debug("t already checked. Moving on".format(
 t=title
 )
 )
```
... isn't that hard to read? Something like this would be much easier on the eyes:
```
print_debug(
 "t already checked. Moving on".format(t=title)
)
```
... or this:
```
print_debug("t already checked. Moving on".format(
 t=title)
)
```
Ultimately, it's up to you. PEP-8 also lists some examples of acceptable styles.¹

Use snake_case for function and variable names. Only use UPPERCASE_WITH_UNDERSCORES for constants.²

Don't mix single and double quotes. I prefer double quotes, because they're less likely to cause clashes with quotation marks in flowing text.³

Limit the line length to 79 characters.⁴

Docstrings

Good job on adding docstrings to your functions and to the module itself. Most people (including me) often can't bring up the effort to add documentation, but you have! :) If you want to help yourself understand the code in 6 months' time, or if you want to publish this as an API, you should be a bit more thorough, for instance by adding information about:
- What arguments the function takes, what type they should be, and exactly what they convey;
- The return type of the value;
- Any special cases the caller should be aware of.
There's even a style guide for docstrings: PEP-257.

Performance

If you want to squeeze a little bit of extra speed out of requests, you can use a requests.Session object, which:

... [omitted] will use urllib3's connection pooling. So if you're making several requests to the same host, the underlying TCP connection will be reused, which can result in a significant performance increase ... [omitted]

References

1 PEP-8: Indentation

2 PEP-8: Function and variable names

3 PEP-8: String Quotes

4 PEP-8: Maximum Line Length

edited May 4 at 11:27

answered May 4 at 10:34

Daniel

4,1132836

edited May 4 at 11:27

answered May 4 at 10:34

Daniel

4,1132836

answered May 4 at 10:34

Daniel

4,1132836

answered May 4 at 10:34

Daniel

4,1132836

add a commentÂ |Â

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr

Wikipedia Random Page in Category Bot

1 Answer
1

General

PEP-8

Docstrings

Performance

References

Your Answer

Post as a guest

1 Answer
1

1 Answer
1

General

PEP-8

Docstrings

Performance

References

General

PEP-8

Docstrings

Performance

References

General

PEP-8

Docstrings

Performance

References

General

PEP-8

Docstrings

Performance

References

Post as a guest

Popular posts from this blog

Chat program with C++ and SFML

Read an image with ADNS2610 optical sensor and Arduino Uno

Read files from a directory using Promises

Wikipedia Random Page in Category Bot

1 Answer 1

General

PEP-8

Docstrings

Performance

References

Your Answer

Sign up or log in

Post as a guest

Post as a guest

1 Answer 1

1 Answer 1

General

PEP-8

Docstrings

Performance

References

General

PEP-8

Docstrings

Performance

References

General

PEP-8

Docstrings

Performance

References

General

PEP-8

Docstrings

Performance

References

Sign up or log in

Post as a guest

Post as a guest

Sign up or log in

Post as a guest

Sign up or log in

Post as a guest

Sign up or log in

Post as a guest

Popular posts from this blog

Chat program with C++ and SFML

Read an image with ADNS2610 optical sensor and Arduino Uno

Read files from a directory using Promises

1 Answer
1

1 Answer
1

1 Answer
1