49 lines
1.4 KiB
Python
49 lines
1.4 KiB
Python
from bs4 import BeautifulSoup as bs
|
||
import requests
|
||
import re
|
||
|
||
#Поиск фулл страниц манги цауц
|
||
def process_hentai_data(hent_data, max_retries=50):
|
||
|
||
data_hantai = {}
|
||
|
||
for name, data in hent_data.items():
|
||
retries = 0
|
||
images = []
|
||
|
||
print(f"Обрабатываем: {name}")
|
||
print(f"URL: {data['manga_link']}")
|
||
|
||
# Запрос страницы с мангой
|
||
while retries < max_retries:
|
||
try:
|
||
response = requests.get(data['manga_link'])
|
||
if response.status_code == 200:
|
||
break
|
||
retries += 1
|
||
except:
|
||
retries += 1
|
||
|
||
# Парсинг изображений
|
||
soup = bs(response.text, 'html.parser')
|
||
images = re.findall(r'https:\/\/\S+\/manganew\/\S+\.jpg', str(soup))
|
||
|
||
print(f"Найдено изображений: {len(images)}")
|
||
print(images)
|
||
print(data['original_id'])
|
||
|
||
# Формируем результат
|
||
data_hantai[name] = {
|
||
'img': data['img'],
|
||
'link': data['link'],
|
||
'tags': data['tags'],
|
||
'date': data['date'],
|
||
'manga_link': data['manga_link'],
|
||
'imgs_manga': images,
|
||
'len_manga': len(images),
|
||
'original_id': data['original_id']
|
||
}
|
||
return data_hantai
|
||
|
||
|