Python Multiprocessing-Process is mixing output [closed]
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
-2
down vote
favorite
My input_file.txt contains 3000 lines, where each line is:
['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']
['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...
I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.
import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool
from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string
file_path='/home/'
def link_status(link):
try:
r=requests.get(link,timeout=10)
data=r.text
soup=BeautifulSoup(data,"html.parser")
if soup:
return soup
except Exception as e:
return None
def imp_images(authr,urllist):
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
The above code is working fine in term of saving images but mixing up images in folders(each folder contains images of one author). I tried using Lock() available in multiprocessing and changed my code to :
def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
l.release()
if __name__ == '__main__':
lock=Lock()
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder
I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).
python multiprocessing
closed as off-topic by ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52
This question appears to be off-topic. The users who voted to close gave this specific reason:
- "Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri
add a comment |Â
up vote
-2
down vote
favorite
My input_file.txt contains 3000 lines, where each line is:
['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']
['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...
I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.
import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool
from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string
file_path='/home/'
def link_status(link):
try:
r=requests.get(link,timeout=10)
data=r.text
soup=BeautifulSoup(data,"html.parser")
if soup:
return soup
except Exception as e:
return None
def imp_images(authr,urllist):
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
The above code is working fine in term of saving images but mixing up images in folders(each folder contains images of one author). I tried using Lock() available in multiprocessing and changed my code to :
def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
l.release()
if __name__ == '__main__':
lock=Lock()
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder
I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).
python multiprocessing
closed as off-topic by ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52
This question appears to be off-topic. The users who voted to close gave this specific reason:
- "Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri
add a comment |Â
up vote
-2
down vote
favorite
up vote
-2
down vote
favorite
My input_file.txt contains 3000 lines, where each line is:
['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']
['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...
I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.
import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool
from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string
file_path='/home/'
def link_status(link):
try:
r=requests.get(link,timeout=10)
data=r.text
soup=BeautifulSoup(data,"html.parser")
if soup:
return soup
except Exception as e:
return None
def imp_images(authr,urllist):
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
The above code is working fine in term of saving images but mixing up images in folders(each folder contains images of one author). I tried using Lock() available in multiprocessing and changed my code to :
def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
l.release()
if __name__ == '__main__':
lock=Lock()
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder
I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).
python multiprocessing
My input_file.txt contains 3000 lines, where each line is:
['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']
['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...
I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.
import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool
from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string
file_path='/home/'
def link_status(link):
try:
r=requests.get(link,timeout=10)
data=r.text
soup=BeautifulSoup(data,"html.parser")
if soup:
return soup
except Exception as e:
return None
def imp_images(authr,urllist):
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
The above code is working fine in term of saving images but mixing up images in folders(each folder contains images of one author). I tried using Lock() available in multiprocessing and changed my code to :
def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
os.makedirs(directory,exist_ok=True)
os.chdir(directory)
for each_link in urllist:
status=link_status(each_link)
if status:
for link in status.find_all('img'):
image_link=link.get('src')
if image_link is not None:
if re.findall(r'(bhttp|bhttps)',image_link):
try:
urllib.request.urlretrieve(image_link,authr+str(i))
i=i+1
except:
pass
else:
try:
finallink=each_link+image_link
urllib.request.urlretrieve(finallink,authr+str(i))
i=i+1
except:
pass
os.chdir(file_path)
l.release()
if __name__ == '__main__':
lock=Lock()
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
for each in f:
each=each.split('t')
authr=ast.literal_eval(each[0])
urls=ast.literal_eval(each[2])
p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
processes.append(p)
for proc in processes:
proc.start()
for proc in processes:
proc.join()
in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder
I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).
python multiprocessing
edited Aug 1 at 8:51
asked Aug 1 at 6:16
user7238503
11
11
closed as off-topic by ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52
This question appears to be off-topic. The users who voted to close gave this specific reason:
- "Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri
closed as off-topic by ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52
This question appears to be off-topic. The users who voted to close gave this specific reason:
- "Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â ÃÂìýÃÂñ á¿¥Ã栨Â, Janne Karila, Mast, 200_success, yuri
add a comment |Â
add a comment |Â
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes