Files
Hentai_manga_parser/full_img_manga.py
2025-11-03 00:18:30 +03:00

49 lines
1.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from bs4 import BeautifulSoup as bs
import requests
import re
#Поиск фулл страниц манги цауц
def process_hentai_data(hent_data, max_retries=50):
data_hantai = {}
for name, data in hent_data.items():
retries = 0
images = []
print(f"Обрабатываем: {name}")
print(f"URL: {data['manga_link']}")
# Запрос страницы с мангой
while retries < max_retries:
try:
response = requests.get(data['manga_link'])
if response.status_code == 200:
break
retries += 1
except:
retries += 1
# Парсинг изображений
soup = bs(response.text, 'html.parser')
images = re.findall(r'https:\/\/\S+\/manganew\/\S+\.jpg', str(soup))
print(f"Найдено изображений: {len(images)}")
print(images)
print(data['original_id'])
# Формируем результат
data_hantai[name] = {
'img': data['img'],
'link': data['link'],
'tags': data['tags'],
'date': data['date'],
'manga_link': data['manga_link'],
'imgs_manga': images,
'len_manga': len(images),
'original_id': data['original_id']
}
return data_hantai