Сбор и сохранение рецептом и ссылок в json

2025-11-23 04:09:34 +03:00
parent cbb56871e8
commit 80eab4e9bd
3 changed files with 77 additions and 23 deletions
--- a/parser.py
+++ b/parser.py
@@ -9,6 +9,35 @@ import import_in_BD as ib
 link = 'https://povar.ru/list/'


+import json
+
+
+total_type_recip = {}
+
+def save_to_json(new_data, filename='total_type_recip.json'):
+    # Загружаем существующие данные, если файл существует
+    if os.path.exists(filename):
+        with open(filename, 'r', encoding='utf-8') as f:
+            try:
+                existing_data = json.load(f)
+            except json.JSONDecodeError:
+                existing_data = {}
+    else:
+        existing_data = {}
+
+    # Сливаем new_data в existing_data
+    for category, groups in new_data.items():
+        if category not in existing_data:
+            existing_data[category] = {}
+        for group, recipes in groups.items():
+            # Перезаписываем только если ещё не было или чтобы не дублировать — можно использовать set позже
+            existing_data[category][group] = recipes
+
+    # Сохраняем обратно
+    with open(filename, 'w', encoding='utf-8') as f:
+        json.dump(existing_data, f, ensure_ascii=False, indent=4)
+
+

 def pars_group(link):
    #Сбор видов блюд
@@ -19,13 +48,18 @@ def pars_group(link):

    main_container = soup.find_all(class_='ingredientItem')

-
    for items in main_container:

        item = items.find_all('a')
-        title = item[0].get_text()

-        if title == 'Салаты': break
+        title = items.find_all('h2')
+        title = title[0].get_text()
+
+        #if title == 'Выпечка': break
+
+        # Инициализируем категорию, если ещё не создана
+        if title not in total_type_recip:
+            total_type_recip[title] = {}

        print(title)

@@ -34,33 +68,55 @@ def pars_group(link):
            link_group = 'https://povar.ru' + i.get('href')
            print('-'*5, name_group, link_group)

-            pars_dishs(title, name_group, link_group)
+            total_type_recip[title][name_group] = []

+            pars_dishs(title, name_group, link_group)


        print('-'*50)


+
+
 def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0):
+
+
+    global total_type_recip
+
    #Сбор списка рецептов
+    recipes = []
+
    while True:
+
        page += 1
        new_link = link + str(page)
        soup = f.try_soup(f.try_request(new_link))

        if soup == False: break

+
        main_container = soup.find_all(class_='listRecipieTitle')

        for items in main_container:
            recipe_name = items.get_text()
            recipe_link = 'https://povar.ru' + items.get('href')

-            print(recipe_name, recipe_link)
-            pars_recipie(title, name_group, recipe_name, recipe_link)
+            print('-'*10,recipe_name, recipe_link)
+
+            #pars_recipie(title, name_group, recipe_name, recipe_link)
+
+            recipes.append({'name': recipe_name, 'url': recipe_link})
+
+

        print('-'*50)

+    # После сбора всех страниц — записываем в глобальную структуру
+    total_type_recip[title][name_group] = recipes
+
+    # И сразу сохраняем ВЕСЬ словарь в JSON
+    save_to_json(total_type_recip)
+


 def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
@@ -82,16 +138,9 @@ def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/re
    recipies = {'recipes': {}}

    detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги
-    #print(detailed_tags)
-
    ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
-    #print(ingredients)
-
    calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
-    #print(calories_info)
-
    steps = f.extr_steps(main_container) #Сборка шагов
-    #print(steps)


    recip = {'_id' : name_id,
@@ -103,16 +152,15 @@ def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/re
                'nutritional_value':calories_info,
                'steps':steps}

-    print(recip)
-    print(len(steps))
+    print('Шагов - ',len(steps))

-    #ib.import_json_in_mongo(recipies)
+    #ib.import_json_in_mongo(recip)



-#pars_group(link)
+pars_group(link)
 #pars_dishs()
-pars_recipie(link="https://povar.ru/recipes/podjarka_k_makaronam-60879.html")
+#pars_recipie()