Python Multiprocessing-Process is mixing output [closed]

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
-2
down vote

favorite

My input_file.txt contains 3000 lines, where each line is:

['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']

['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...

I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.

import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool

from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string

file_path='/home/'
def link_status(link):
 try:
 r=requests.get(link,timeout=10)
 data=r.text
 soup=BeautifulSoup(data,"html.parser")
 if soup:
 return soup
 except Exception as e:
 return None
def imp_images(authr,urllist):
 i=0
 final_path=file_path+str(authr)+"/"
 directory = os.path.dirname(final_path)
 if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)
 for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
 os.chdir(file_path)

if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

The above code is working fine in term of saving images but mixing up images in folders(each folder contains images of one author). I tried using Lock() available in multiprocessing and changed my code to :

def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)

for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
os.chdir(file_path)
l.release()

if __name__ == '__main__':
 lock=Lock()
 q=mp.Queue()
 processes=
 with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder

I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).

edited Aug 1 at 8:51

asked Aug 1 at 6:16

user7238503

closed as off-topic by ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52

This question appears to be off-topic. The users who voted to close gave this specific reason:

"Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â€“ ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri

If this question can be reworded to fit the rules in the help center, please edit the question.

add a commentÂ |Â

up vote
-2
down vote

favorite

My input_file.txt contains 3000 lines, where each line is:

['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']

['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...

I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.

import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool

from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string

file_path='/home/'
def link_status(link):
 try:
 r=requests.get(link,timeout=10)
 data=r.text
 soup=BeautifulSoup(data,"html.parser")
 if soup:
 return soup
 except Exception as e:
 return None
def imp_images(authr,urllist):
 i=0
 final_path=file_path+str(authr)+"/"
 directory = os.path.dirname(final_path)
 if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)
 for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
 os.chdir(file_path)

if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)

for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
os.chdir(file_path)
l.release()

if __name__ == '__main__':
 lock=Lock()
 q=mp.Queue()
 processes=
 with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder

I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).

edited Aug 1 at 8:51

asked Aug 1 at 6:16

user7238503

closed as off-topic by ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52

This question appears to be off-topic. The users who voted to close gave this specific reason:

"Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â€“ ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri

If this question can be reworded to fit the rules in the help center, please edit the question.

add a commentÂ |Â

up vote
-2
down vote

favorite

My input_file.txt contains 3000 lines, where each line is:

['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']

['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...

I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.

import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool

from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string

file_path='/home/'
def link_status(link):
 try:
 r=requests.get(link,timeout=10)
 data=r.text
 soup=BeautifulSoup(data,"html.parser")
 if soup:
 return soup
 except Exception as e:
 return None
def imp_images(authr,urllist):
 i=0
 final_path=file_path+str(authr)+"/"
 directory = os.path.dirname(final_path)
 if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)
 for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
 os.chdir(file_path)

if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)

for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
os.chdir(file_path)
l.release()

if __name__ == '__main__':
 lock=Lock()
 q=mp.Queue()
 processes=
 with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder

I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).

edited Aug 1 at 8:51

asked Aug 1 at 6:16

user7238503

My input_file.txt contains 3000 lines, where each line is:

['author1'] (tabspace)xxxxxx (tabspace)['url1','url2']

['author2'] (tabspace)xxxxxx (tabspace)['url3','url4',....] . .. ...

I am interested in extracting images of each author, with images of one author in his own separate folder. I used the following code and it is mixing up images in folders.

import ast
import re
import json
import os
import multiprocessing as mp
from multiprocessing import Pool

from multiprocessing import Process,Lock
import random
import time
import urllib.request
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import string

file_path='/home/'
def link_status(link):
 try:
 r=requests.get(link,timeout=10)
 data=r.text
 soup=BeautifulSoup(data,"html.parser")
 if soup:
 return soup
 except Exception as e:
 return None
def imp_images(authr,urllist):
 i=0
 final_path=file_path+str(authr)+"/"
 directory = os.path.dirname(final_path)
 if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)
 for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
 os.chdir(file_path)

if __name__ == '__main__':
q=mp.Queue()
processes=
with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

def imp_images(l,authr,urllist):
l.acquire()
i=0
final_path=file_path+str(authr)+"/"
directory = os.path.dirname(final_path)
if not os.path.exists(directory):
 os.makedirs(directory,exist_ok=True)
 os.chdir(directory)

for each_link in urllist:
 status=link_status(each_link)
 if status:
 for link in status.find_all('img'):
 image_link=link.get('src')
 if image_link is not None:
 if re.findall(r'(bhttp|bhttps)',image_link):
 try:
 urllib.request.urlretrieve(image_link,authr+str(i))
 i=i+1
 except:
 pass
 else:
 try:
 finallink=each_link+image_link
 urllib.request.urlretrieve(finallink,authr+str(i))
 i=i+1
 except:
 pass
os.chdir(file_path)
l.release()

if __name__ == '__main__':
 lock=Lock()
 q=mp.Queue()
 processes=
 with open('/home/input.txt') as f:
 for each in f:
 each=each.split('t')
 authr=ast.literal_eval(each[0])
 urls=ast.literal_eval(each[2])
 p=mp.Process(target=imp_images,args=(lock,authr[0].replace(" ","_"),urls))
 processes.append(p)
 for proc in processes:
 proc.start()
 for proc in processes:
 proc.join()

in this case I am getting "Cannot allocate memory" error. How can I change the code such that the code save images of person in his own folder

I also tried using umlimit -n 4096, which works in the code without lock and does not help when i use Lock() ( I use a google cloud instance with 8gb of RAM).

edited Aug 1 at 8:51

asked Aug 1 at 6:16

user7238503

edited Aug 1 at 8:51

asked Aug 1 at 6:16

user7238503

asked Aug 1 at 6:16

user7238503

asked Aug 1 at 6:16

user7238503

closed as off-topic by ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52

This question appears to be off-topic. The users who voted to close gave this specific reason:

"Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â€“ ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri

If this question can be reworded to fit the rules in the help center, please edit the question.

closed as off-topic by ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri Aug 1 at 6:52

This question appears to be off-topic. The users who voted to close gave this specific reason:

"Code not implemented or not working as intended: Code Review is a community where programmers peer-review your working code to address issues such as security, maintainability, performance, and scalability. We require that the code be working correctly, to the best of the author's knowledge, before proceeding with a review." â€“ ÃÂ€ÃŽÂ¬ÃŽÂ½ÃÂ„ÃŽÂ± Ã¡Â¿Â¥ÃŽÂµÃ¡Â¿Â–, Janne Karila, Mast, 200_success, yuri

If this question can be reworded to fit the rules in the help center, please edit the question.

add a commentÂ |Â

active

oldest

votes

搜尋此網誌

trjhtr