import requests from bs4 import BeautifulSoup as bs import re import function as f import json import import_in_BD as ib link = 'https://povar.ru/list/' import json total_type_recip = {} def save_to_json(new_data, filename='total_type_recip.json'): # Загружаем существующие данные, если файл существует if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as f: try: existing_data = json.load(f) except json.JSONDecodeError: existing_data = {} else: existing_data = {} # Сливаем new_data в existing_data for category, groups in new_data.items(): if category not in existing_data: existing_data[category] = {} for group, recipes in groups.items(): # Перезаписываем только если ещё не было или чтобы не дублировать — можно использовать set позже existing_data[category][group] = recipes # Сохраняем обратно with open(filename, 'w', encoding='utf-8') as f: json.dump(existing_data, f, ensure_ascii=False, indent=4) def pars_group(link): #Сбор видов блюд response = f.try_request(link) soup = bs(response.text, 'html.parser') main_container = soup.find_all(class_='ingredientItem') for items in main_container: item = items.find_all('a') title = items.find_all('h2') title = title[0].get_text() #if title == 'Выпечка': break # Инициализируем категорию, если ещё не создана if title not in total_type_recip: total_type_recip[title] = {} print(title) for i in item[1::]: name_group = i.get_text() link_group = 'https://povar.ru' + i.get('href') print('-'*5, name_group, link_group) total_type_recip[title][name_group] = [] pars_dishs(title, name_group, link_group) print('-'*50) def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0): global total_type_recip #Сбор списка рецептов recipes = [] while True: page += 1 new_link = link + str(page) soup = f.try_soup(f.try_request(new_link)) if soup == False: break main_container = soup.find_all(class_='listRecipieTitle') for items in main_container: recipe_name = items.get_text() recipe_link = 'https://povar.ru' + items.get('href') print('-'*10,recipe_name, recipe_link) #pars_recipie(title, name_group, recipe_name, recipe_link) recipes.append({'name': recipe_name, 'url': recipe_link}) print('-'*50) # После сбора всех страниц — записываем в глобальную структуру total_type_recip[title][name_group] = recipes # И сразу сохраняем ВЕСЬ словарь в JSON save_to_json(total_type_recip) def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'): response = f.try_request(link) soup = bs(response.text, 'html.parser') main_container = soup.find(class_='cont_area hrecipe') name_id = link.split('/')[-1] try: name_id = name_id.replace('.html', '') except: pass print(name_id) photo = main_container.find(class_='photo').get('src') recipies = {'recipes': {}} detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ steps = f.extr_steps(main_container) #Сборка шагов recip = {'_id' : name_id, 'recipe_name':recipe_name, 'url':link, 'preview_img':photo, 'tags':detailed_tags, 'ingredients':ingredients, 'nutritional_value':calories_info, 'steps':steps} print('Шагов - ',len(steps)) #ib.import_json_in_mongo(recip) pars_group(link) #pars_dishs() #pars_recipie()