Нормализация данных, привидение к общему json

2025-11-18 03:37:47 +03:00
parent 768ab3c4ef
commit 8c53c35dec
2 changed files with 30 additions and 10 deletions
--- a/function.py
+++ b/function.py
@@ -54,6 +54,11 @@ def extract_tags_from_detailed_tags(main_container):
        else:
            tag_list = []
        if label == 'Назначение': label = 'occasion'
        elif label == 'Основной ингредиент': continue
        elif label == 'Блюдо': label = 'type_dish'
        elif label == 'География кухни': label = 'cuisine'
        tags[label] = tag_list
    return tags
--- a/parser.py
+++ b/parser.py
@@ -2,6 +2,7 @@ import requests
 from bs4 import BeautifulSoup as bs
 import re
 import function as f
 import json
 link = 'https://povar.ru/list/'
@@ -9,6 +10,8 @@ link = 'https://povar.ru/list/'
 def pars_group(link):
    #Сбор видов блюд
    response = f.try_request(link)
    soup = bs(response.text, 'html.parser')
@@ -29,10 +32,14 @@ def pars_group(link):
            link_group = 'https://povar.ru' + i.get('href')
            print('-'*5, name_group, link_group)
            pars_dishs(title, name_group, link_group)
        print('-'*50)
-def pars_dishs(link='https://povar.ru/list/spagetti/', page=0):
+def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0):
    #Сбор списка рецептов
    while True:
        page += 1
@@ -48,19 +55,25 @@ def pars_dishs(link='https://povar.ru/list/spagetti/', page=0):
            recipe_link = 'https://povar.ru' + items.get('href')
            print(recipe_name, recipe_link)
            pars_recipie(title, name_group, recipe_name, recipe_link)
        print('-'*50)
-def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
+def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
    response = f.try_request(link)
    soup = bs(response.text, 'html.parser')
    main_container = soup.find(class_='cont_area hrecipe')
-    name = main_container.find(class_='detailed fn').get_text()
+    name_id = link.split('/')[-1]
    try:
        name_id = name_id.replace('.html', '')
    except: pass
    print(name_id)
    recipies = {'recipes': {}}
@@ -68,17 +81,19 @@ def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html
    print(detailed_tags)
    ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
-    print(ingredients)
+    #print(ingredients)
    calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
-    print(calories_info)
+    #print(calories_info)
    steps = f.extr_steps(main_container) #Сборка шагов
-    print(steps)
+    #print(steps)
    preview_img = steps[-1]['img']
-    recipies['recipes'] = { 'name':name,
+    recipies['recipes'] = { '_id' : name_id,
                            'recipe_name':recipe_name,
                            'url':link,
                            'preview_img':preview_img,
                            'tags':detailed_tags,
                            'ingredients':ingredients,
@@ -88,9 +103,9 @@ def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html
-#pars_group(link)
+pars_group(link)
-#pars_dish()
+#pars_dishs()
-pars_recipie()
+#pars_recipie()