def get_gallery(url):
html = requests.get(url, cookies=cookies)
bs = BeautifulSoup(html.text, 'lxml')
first_img_url = bs.find('a', href=re.compile('regex'))['href']
img_url = first_img_url
counter = 0
while img_url:
html = requests.get(img_url, cookies=cookies)
bs = BeautifulSoup(html.text, 'lxml')
counter += 1
get_image(img_url, counter)
next_page = bs.find('a', id='next')
if next_page and 'href' in next_page.attrs and next_page['href'] != img_url:
img_url = next_page['href']
else:
img_url = None
# have to figure out how to check for proper file extensions
def get_image(url, page_counter):
html = requests.get(url, cookies=cookies)
bs = BeautifulSoup(html.text, 'lxml')
original_img = bs.find('a', string=lambda t: t and 'Download original' in t)
if original_img:
img_response = requests.get(original_img['href'], cookies=cookies)
with open(f'{page_counter}.png', mode='wb') as file:
file.write(img_response.content)
I'm trying to write a scraper for sadpanda with about a week worth of learning to program. Besides the fact that I'm scraping porn, how fucked is it?