from bs4 import BeautifulSoup as bs import requests import HBD link = 'https://x8.h-chan.me/manga/newest?offset=' #https://x8.h-chan.me/manga/ result = HBD.find_doc_with_max_id(HBD.connect_to_mongo()) max_id = '51196' max_num = 29915 def form_date(date_str): months = { "января": "01", "февраля": "02", "марта": "03", "апреля": "04", "мая": "05", "июня": "06", "июля": "07", "августа": "08", "сентября": "09", "октября": "10", "ноября": "11", "декабря": "12", } day, month_str, year = date_str.split() # Получаем числовое значение месяца month = months[month_str] # Формируем дату в нужном формате formatted_date = f"{day}.{month}.{year}" return formatted_date def try_request(link, max_retries=50): retries = 0 while retries < max_retries: try: response = requests.get(link) if response.status_code == 200: return response else: retries += 1 except: retries += 1 def pars(link, flag, count): data_hantai = {} response = try_request(link) soup = bs(response.text, 'html.parser') main_container = soup.find_all(class_='content_row') for item in main_container: img = item.find(class_='manga_images') img = img.find('img') img = img['src'] if img else None id = img.split('/')[-2][:10] row_container = item.find(class_='title_link') link_manga = 'https://hentaichan.live' + row_container.get('href') title = row_container.text tags = item.find(class_='genre') tags = [tag.strip() for tag in tags.text.split(',')] date = item.find(class_='row4_right').find('b').text date = form_date(date) manga_link = link_manga + '?cacheId=' + id manga_link = manga_link.replace('/manga/', '/online/') ID = link_manga.split('/')[-1].split('-')[0] if int(ID) == int(max_id): flag = True return data_hantai, flag data_hantai[title] = { 'img': img, 'link': link_manga, 'tags': tags, 'date': date, 'manga_link': manga_link, 'original_id' : ID, } print(f'стр - {count}', data_hantai) for i, j in data_hantai.items(): print('-'*10, i, j['manga_link']) return data_hantai, flag def get_data(): data = {} count = 1 for i in range(0, 5000, 20): flag = False data_hantai, flag = pars(link + str(i), flag, count) count +=1 data.update(data_hantai) if flag is True: return data