Нормализация данных, привидение к общему json

This commit is contained in:
zein
2025-11-18 03:37:47 +03:00
parent 768ab3c4ef
commit 8c53c35dec
2 changed files with 30 additions and 10 deletions

View File

@@ -54,6 +54,11 @@ def extract_tags_from_detailed_tags(main_container):
else: else:
tag_list = [] tag_list = []
if label == 'Назначение': label = 'occasion'
elif label == 'Основной ингредиент': continue
elif label == 'Блюдо': label = 'type_dish'
elif label == 'География кухни': label = 'cuisine'
tags[label] = tag_list tags[label] = tag_list
return tags return tags

View File

@@ -2,6 +2,7 @@ import requests
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
import re import re
import function as f import function as f
import json
link = 'https://povar.ru/list/' link = 'https://povar.ru/list/'
@@ -9,6 +10,8 @@ link = 'https://povar.ru/list/'
def pars_group(link): def pars_group(link):
#Сбор видов блюд #Сбор видов блюд
response = f.try_request(link) response = f.try_request(link)
soup = bs(response.text, 'html.parser') soup = bs(response.text, 'html.parser')
@@ -29,10 +32,14 @@ def pars_group(link):
link_group = 'https://povar.ru' + i.get('href') link_group = 'https://povar.ru' + i.get('href')
print('-'*5, name_group, link_group) print('-'*5, name_group, link_group)
pars_dishs(title, name_group, link_group)
print('-'*50) print('-'*50)
def pars_dishs(link='https://povar.ru/list/spagetti/', page=0): def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0):
#Сбор списка рецептов #Сбор списка рецептов
while True: while True:
page += 1 page += 1
@@ -48,19 +55,25 @@ def pars_dishs(link='https://povar.ru/list/spagetti/', page=0):
recipe_link = 'https://povar.ru' + items.get('href') recipe_link = 'https://povar.ru' + items.get('href')
print(recipe_name, recipe_link) print(recipe_name, recipe_link)
pars_recipie(title, name_group, recipe_name, recipe_link)
print('-'*50) print('-'*50)
def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'): def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
response = f.try_request(link) response = f.try_request(link)
soup = bs(response.text, 'html.parser') soup = bs(response.text, 'html.parser')
main_container = soup.find(class_='cont_area hrecipe') main_container = soup.find(class_='cont_area hrecipe')
name = main_container.find(class_='detailed fn').get_text() name_id = link.split('/')[-1]
try:
name_id = name_id.replace('.html', '')
except: pass
print(name_id)
recipies = {'recipes': {}} recipies = {'recipes': {}}
@@ -68,17 +81,19 @@ def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html
print(detailed_tags) print(detailed_tags)
ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
print(ingredients) #print(ingredients)
calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
print(calories_info) #print(calories_info)
steps = f.extr_steps(main_container) #Сборка шагов steps = f.extr_steps(main_container) #Сборка шагов
print(steps) #print(steps)
preview_img = steps[-1]['img'] preview_img = steps[-1]['img']
recipies['recipes'] = { 'name':name, recipies['recipes'] = { '_id' : name_id,
'recipe_name':recipe_name,
'url':link,
'preview_img':preview_img, 'preview_img':preview_img,
'tags':detailed_tags, 'tags':detailed_tags,
'ingredients':ingredients, 'ingredients':ingredients,
@@ -88,9 +103,9 @@ def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html
#pars_group(link) pars_group(link)
#pars_dish() #pars_dishs()
pars_recipie() #pars_recipie()