Making 10000 HTTP requests as fast as possible

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
3
down vote

favorite

I need a program that needs to gather data from large list of API end points. Below is a mock-up program that attempts to make 10000 requests as fast as possible. Any suggestions on how to improve on this (especially for speed) is highly welcome. Experimentation showed that the Semaphore cap of around 100 gave the best speed.

import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
 time.sleep(0.001)
 return data

async def fetch(url, session):
 async with session.get(url) as response:
 data = await response.read()
 data = processData(data)
 return data

async def bound_fetch(sem, url, session):
 async with sem:
 return await fetch(url, session)


async def run(loop,N):
 url = "https://www.example.com"
 tasks = 
 sem = asyncio.Semaphore(100)
 async with ClientSession() as session:
 for i in range(N):
 task = loop.create_task(bound_fetch(sem, url, session))
 tasks.append(task)

 print("Done starting tasks".format(N))
 starttime = time.time()
 print(datetime.datetime.now())
 responses = await asyncio.gather(*tasks)
 print("Done completing tasks in: ".format(N,time.time()-starttime))

 return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
 if len(sys.argv) == 2:
 N = int(sys.argv[1])
 else:
 N = 10000
 maintask = loop.create_task(run(loop, N))
 result = loop.run_until_complete(maintask)
 print(len(result))

edited Apr 3 at 21:05

asked Apr 3 at 20:33

rajendra

192

1

Why do you have time.sleep(0.001)?
â€“Â hjpotter92
Apr 4 at 0:39

@hjpotter92 That is for simulating some data-processing time.
â€“Â rajendra
Apr 4 at 19:28

1

I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
â€“Â Daniel
Apr 5 at 5:56

add a commentÂ |Â

up vote
3
down vote

favorite

import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
 time.sleep(0.001)
 return data

async def fetch(url, session):
 async with session.get(url) as response:
 data = await response.read()
 data = processData(data)
 return data

async def bound_fetch(sem, url, session):
 async with sem:
 return await fetch(url, session)


async def run(loop,N):
 url = "https://www.example.com"
 tasks = 
 sem = asyncio.Semaphore(100)
 async with ClientSession() as session:
 for i in range(N):
 task = loop.create_task(bound_fetch(sem, url, session))
 tasks.append(task)

 print("Done starting tasks".format(N))
 starttime = time.time()
 print(datetime.datetime.now())
 responses = await asyncio.gather(*tasks)
 print("Done completing tasks in: ".format(N,time.time()-starttime))

 return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
 if len(sys.argv) == 2:
 N = int(sys.argv[1])
 else:
 N = 10000
 maintask = loop.create_task(run(loop, N))
 result = loop.run_until_complete(maintask)
 print(len(result))

edited Apr 3 at 21:05

asked Apr 3 at 20:33

rajendra

192

1

Why do you have time.sleep(0.001)?
â€“Â hjpotter92
Apr 4 at 0:39

@hjpotter92 That is for simulating some data-processing time.
â€“Â rajendra
Apr 4 at 19:28

1

I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
â€“Â Daniel
Apr 5 at 5:56

add a commentÂ |Â

up vote
3
down vote

favorite

import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
 time.sleep(0.001)
 return data

async def fetch(url, session):
 async with session.get(url) as response:
 data = await response.read()
 data = processData(data)
 return data

async def bound_fetch(sem, url, session):
 async with sem:
 return await fetch(url, session)


async def run(loop,N):
 url = "https://www.example.com"
 tasks = 
 sem = asyncio.Semaphore(100)
 async with ClientSession() as session:
 for i in range(N):
 task = loop.create_task(bound_fetch(sem, url, session))
 tasks.append(task)

 print("Done starting tasks".format(N))
 starttime = time.time()
 print(datetime.datetime.now())
 responses = await asyncio.gather(*tasks)
 print("Done completing tasks in: ".format(N,time.time()-starttime))

 return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
 if len(sys.argv) == 2:
 N = int(sys.argv[1])
 else:
 N = 10000
 maintask = loop.create_task(run(loop, N))
 result = loop.run_until_complete(maintask)
 print(len(result))

edited Apr 3 at 21:05

asked Apr 3 at 20:33

rajendra

192

import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
 time.sleep(0.001)
 return data

async def fetch(url, session):
 async with session.get(url) as response:
 data = await response.read()
 data = processData(data)
 return data

async def bound_fetch(sem, url, session):
 async with sem:
 return await fetch(url, session)


async def run(loop,N):
 url = "https://www.example.com"
 tasks = 
 sem = asyncio.Semaphore(100)
 async with ClientSession() as session:
 for i in range(N):
 task = loop.create_task(bound_fetch(sem, url, session))
 tasks.append(task)

 print("Done starting tasks".format(N))
 starttime = time.time()
 print(datetime.datetime.now())
 responses = await asyncio.gather(*tasks)
 print("Done completing tasks in: ".format(N,time.time()-starttime))

 return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
 if len(sys.argv) == 2:
 N = int(sys.argv[1])
 else:
 N = 10000
 maintask = loop.create_task(run(loop, N))
 result = loop.run_until_complete(maintask)
 print(len(result))

edited Apr 3 at 21:05

asked Apr 3 at 20:33

rajendra

192

edited Apr 3 at 21:05

asked Apr 3 at 20:33

rajendra

192

asked Apr 3 at 20:33

rajendra

192

asked Apr 3 at 20:33

rajendra

192

1

Why do you have time.sleep(0.001)?
â€“Â hjpotter92
Apr 4 at 0:39

@hjpotter92 That is for simulating some data-processing time.
â€“Â rajendra
Apr 4 at 19:28

1

I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
â€“Â Daniel
Apr 5 at 5:56

add a commentÂ |Â

1

Why do you have time.sleep(0.001)?
â€“Â hjpotter92
Apr 4 at 0:39

@hjpotter92 That is for simulating some data-processing time.
â€“Â rajendra
Apr 4 at 19:28

1

I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
â€“Â Daniel
Apr 5 at 5:56

Why do you have time.sleep(0.001)?
â€“Â hjpotter92
Apr 4 at 0:39

@hjpotter92 That is for simulating some data-processing time.
â€“Â rajendra
Apr 4 at 19:28

I've flagged this question as off-topic, because you have no concrete implementation of processData, and provided no 'real' URL.
â€“Â Daniel
Apr 5 at 5:56

add a commentÂ |Â

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f191198%2fmaking-10000-http-requests-as-fast-as-possible%23new-answer', 'question_page');

);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

T pS4XC9w522,PNsbADi1K7nuRqCjr,L3W XjA DimkRv xjYpEJrccYiEr2Ll8qrAM,GVWJevK k7

搜尋此網誌

trjhtr