Parser_recipes/parser.py

import requests
from bs4 import BeautifulSoup as bs
import function as f
import json
import os

import import_in_BD as ib

link = 'https://povar.ru/list/'


total_type_recip = {}

def save_to_json(new_data, filename='total_type_recip.json'):
    # Загружаем существующие данные, если файл существует
    if os.path.exists(filename):
        with open(filename, 'r', encoding='utf-8') as f:
            try:
                existing_data = json.load(f)
            except json.JSONDecodeError:
                existing_data = {}
    else:
        existing_data = {}

    # Сливаем new_data в existing_data
    for category, groups in new_data.items():
        if category not in existing_data:
            existing_data[category] = {}
        for group, recipes in groups.items():
            # Перезаписываем только если ещё не было или чтобы не дублировать — можно использовать set позже
            existing_data[category][group] = recipes

    # Сохраняем обратно
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(existing_data, f, ensure_ascii=False, indent=4)


def pars_group(link):
    #Сбор видов блюд


    response = f.try_request(link)
    soup = bs(response.text, 'html.parser')

    main_container = soup.find_all(class_='ingredientItem')

    for items in main_container:

        item = items.find_all('a')

        title = items.find_all('h2')
        title = title[0].get_text()

        #if title == 'Выпечка': break

        # Инициализируем категорию, если ещё не создана
        if title not in total_type_recip:
            total_type_recip[title] = {}

        print(title)

        for i in item[1::]:
            name_group = i.get_text()
            link_group = 'https://povar.ru' + i.get('href')
            print('-'*5, name_group, link_group)

            total_type_recip[title][name_group] = []

            pars_dishs(title, name_group, link_group)


        print('-'*50)


def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0):


    global total_type_recip

    #Сбор списка рецептов
    recipes = []

    while True:

        page += 1
        new_link = link + str(page)
        soup = f.try_soup(f.try_request(new_link))

        if soup == False: break


        main_container = soup.find_all(class_='listRecipieTitle')

        for items in main_container:
            recipe_name = items.get_text()
            recipe_link = 'https://povar.ru' + items.get('href')

            print('-'*10,recipe_name, recipe_link)

            #pars_recipie(title, name_group, recipe_name, recipe_link)

            recipes.append({'name': recipe_name, 'url': recipe_link})


        print('-'*50)

    # После сбора всех страниц — записываем в глобальную структуру
    total_type_recip[title][name_group] = recipes

    # И сразу сохраняем ВЕСЬ словарь в JSON
    save_to_json(total_type_recip)


def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):

    response = f.try_request(link)
    soup = bs(response.text, 'html.parser')

    main_container = soup.find(class_='cont_area hrecipe')

    name_id = link.split('/')[-1]
    try:
        name_id = name_id.replace('.html', '')
    except: pass

    print(name_id)

    photo = main_container.find(class_='photo').get('src')

    recipies = {'recipes': {}}

    detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги
    ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
    calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
    steps = f.extr_steps(main_container) #Сборка шагов


    recip = {'_id' : name_id,
                'recipe_name':recipe_name,
                'url':link,
                'preview_img':photo,
                'tags':detailed_tags,
                'ingredients':ingredients,
                'nutritional_value':calories_info,
                'steps':steps}

    print('Шагов - ',len(steps))

    #ib.import_json_in_mongo(recip)


pars_group(link)
#pars_dishs()
#pars_recipie()