Making 10000 HTTP requests as fast as possible

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
3
down vote

favorite












I need a program that needs to gather data from large list of API end points. Below is a mock-up program that attempts to make 10000 requests as fast as possible. Any suggestions on how to improve on this (especially for speed) is highly welcome. Experimentation showed that the Semaphore cap of around 100 gave the best speed.



import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
time.sleep(0.001)
return data

async def fetch(url, session):
async with session.get(url) as response:
data = await response.read()
data = processData(data)
return data

async def bound_fetch(sem, url, session):
async with sem:
return await fetch(url, session)


async def run(loop,N):
url = "https://www.example.com"
tasks =
sem = asyncio.Semaphore(100)
async with ClientSession() as session:
for i in range(N):
task = loop.create_task(bound_fetch(sem, url, session))
tasks.append(task)

print("Done starting tasks".format(N))
starttime = time.time()
print(datetime.datetime.now())
responses = await asyncio.gather(*tasks)
print("Done completing tasks in: ".format(N,time.time()-starttime))

return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
if len(sys.argv) == 2:
N = int(sys.argv[1])
else:
N = 10000
maintask = loop.create_task(run(loop, N))
result = loop.run_until_complete(maintask)
print(len(result))






share|improve this question

















  • 1




    Why do you have time.sleep(0.001)?
    – hjpotter92
    Apr 4 at 0:39










  • @hjpotter92 That is for simulating some data-processing time.
    – rajendra
    Apr 4 at 19:28






  • 1




    I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
    – Daniel
    Apr 5 at 5:56
















up vote
3
down vote

favorite












I need a program that needs to gather data from large list of API end points. Below is a mock-up program that attempts to make 10000 requests as fast as possible. Any suggestions on how to improve on this (especially for speed) is highly welcome. Experimentation showed that the Semaphore cap of around 100 gave the best speed.



import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
time.sleep(0.001)
return data

async def fetch(url, session):
async with session.get(url) as response:
data = await response.read()
data = processData(data)
return data

async def bound_fetch(sem, url, session):
async with sem:
return await fetch(url, session)


async def run(loop,N):
url = "https://www.example.com"
tasks =
sem = asyncio.Semaphore(100)
async with ClientSession() as session:
for i in range(N):
task = loop.create_task(bound_fetch(sem, url, session))
tasks.append(task)

print("Done starting tasks".format(N))
starttime = time.time()
print(datetime.datetime.now())
responses = await asyncio.gather(*tasks)
print("Done completing tasks in: ".format(N,time.time()-starttime))

return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
if len(sys.argv) == 2:
N = int(sys.argv[1])
else:
N = 10000
maintask = loop.create_task(run(loop, N))
result = loop.run_until_complete(maintask)
print(len(result))






share|improve this question

















  • 1




    Why do you have time.sleep(0.001)?
    – hjpotter92
    Apr 4 at 0:39










  • @hjpotter92 That is for simulating some data-processing time.
    – rajendra
    Apr 4 at 19:28






  • 1




    I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
    – Daniel
    Apr 5 at 5:56












up vote
3
down vote

favorite









up vote
3
down vote

favorite











I need a program that needs to gather data from large list of API end points. Below is a mock-up program that attempts to make 10000 requests as fast as possible. Any suggestions on how to improve on this (especially for speed) is highly welcome. Experimentation showed that the Semaphore cap of around 100 gave the best speed.



import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
time.sleep(0.001)
return data

async def fetch(url, session):
async with session.get(url) as response:
data = await response.read()
data = processData(data)
return data

async def bound_fetch(sem, url, session):
async with sem:
return await fetch(url, session)


async def run(loop,N):
url = "https://www.example.com"
tasks =
sem = asyncio.Semaphore(100)
async with ClientSession() as session:
for i in range(N):
task = loop.create_task(bound_fetch(sem, url, session))
tasks.append(task)

print("Done starting tasks".format(N))
starttime = time.time()
print(datetime.datetime.now())
responses = await asyncio.gather(*tasks)
print("Done completing tasks in: ".format(N,time.time()-starttime))

return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
if len(sys.argv) == 2:
N = int(sys.argv[1])
else:
N = 10000
maintask = loop.create_task(run(loop, N))
result = loop.run_until_complete(maintask)
print(len(result))






share|improve this question













I need a program that needs to gather data from large list of API end points. Below is a mock-up program that attempts to make 10000 requests as fast as possible. Any suggestions on how to improve on this (especially for speed) is highly welcome. Experimentation showed that the Semaphore cap of around 100 gave the best speed.



import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
time.sleep(0.001)
return data

async def fetch(url, session):
async with session.get(url) as response:
data = await response.read()
data = processData(data)
return data

async def bound_fetch(sem, url, session):
async with sem:
return await fetch(url, session)


async def run(loop,N):
url = "https://www.example.com"
tasks =
sem = asyncio.Semaphore(100)
async with ClientSession() as session:
for i in range(N):
task = loop.create_task(bound_fetch(sem, url, session))
tasks.append(task)

print("Done starting tasks".format(N))
starttime = time.time()
print(datetime.datetime.now())
responses = await asyncio.gather(*tasks)
print("Done completing tasks in: ".format(N,time.time()-starttime))

return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
if len(sys.argv) == 2:
N = int(sys.argv[1])
else:
N = 10000
maintask = loop.create_task(run(loop, N))
result = loop.run_until_complete(maintask)
print(len(result))








share|improve this question












share|improve this question




share|improve this question








edited Apr 3 at 21:05
























asked Apr 3 at 20:33









rajendra

192




192







  • 1




    Why do you have time.sleep(0.001)?
    – hjpotter92
    Apr 4 at 0:39










  • @hjpotter92 That is for simulating some data-processing time.
    – rajendra
    Apr 4 at 19:28






  • 1




    I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
    – Daniel
    Apr 5 at 5:56












  • 1




    Why do you have time.sleep(0.001)?
    – hjpotter92
    Apr 4 at 0:39










  • @hjpotter92 That is for simulating some data-processing time.
    – rajendra
    Apr 4 at 19:28






  • 1




    I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
    – Daniel
    Apr 5 at 5:56







1




1




Why do you have time.sleep(0.001)?
– hjpotter92
Apr 4 at 0:39




Why do you have time.sleep(0.001)?
– hjpotter92
Apr 4 at 0:39












@hjpotter92 That is for simulating some data-processing time.
– rajendra
Apr 4 at 19:28




@hjpotter92 That is for simulating some data-processing time.
– rajendra
Apr 4 at 19:28




1




1




I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
– Daniel
Apr 5 at 5:56




I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
– Daniel
Apr 5 at 5:56















active

oldest

votes











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f191198%2fmaking-10000-http-requests-as-fast-as-possible%23new-answer', 'question_page');

);

Post as a guest



































active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes










 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f191198%2fmaking-10000-http-requests-as-fast-as-possible%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Chat program with C++ and SFML

Function to Return a JSON Like Objects Using VBA Collections and Arrays

Will my employers contract hold up in court?