Files
Parser_recipes/parser.py
2025-11-23 04:10:34 +03:00

177 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import requests
from bs4 import BeautifulSoup as bs
import function as f
import json
import os
import import_in_BD as ib
link = 'https://povar.ru/list/'
total_type_recip = {}
def save_to_json(new_data, filename='total_type_recip.json'):
# Загружаем существующие данные, если файл существует
if os.path.exists(filename):
with open(filename, 'r', encoding='utf-8') as f:
try:
existing_data = json.load(f)
except json.JSONDecodeError:
existing_data = {}
else:
existing_data = {}
# Сливаем new_data в existing_data
for category, groups in new_data.items():
if category not in existing_data:
existing_data[category] = {}
for group, recipes in groups.items():
# Перезаписываем только если ещё не было или чтобы не дублировать — можно использовать set позже
existing_data[category][group] = recipes
# Сохраняем обратно
with open(filename, 'w', encoding='utf-8') as f:
json.dump(existing_data, f, ensure_ascii=False, indent=4)
def pars_group(link):
#Сбор видов блюд
response = f.try_request(link)
soup = bs(response.text, 'html.parser')
main_container = soup.find_all(class_='ingredientItem')
for items in main_container:
item = items.find_all('a')
title = items.find_all('h2')
title = title[0].get_text()
#if title == 'Выпечка': break
# Инициализируем категорию, если ещё не создана
if title not in total_type_recip:
total_type_recip[title] = {}
print(title)
for i in item[1::]:
name_group = i.get_text()
link_group = 'https://povar.ru' + i.get('href')
print('-'*5, name_group, link_group)
total_type_recip[title][name_group] = []
pars_dishs(title, name_group, link_group)
print('-'*50)
def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0):
global total_type_recip
#Сбор списка рецептов
recipes = []
while True:
page += 1
new_link = link + str(page)
soup = f.try_soup(f.try_request(new_link))
if soup == False: break
main_container = soup.find_all(class_='listRecipieTitle')
for items in main_container:
recipe_name = items.get_text()
recipe_link = 'https://povar.ru' + items.get('href')
print('-'*10,recipe_name, recipe_link)
#pars_recipie(title, name_group, recipe_name, recipe_link)
recipes.append({'name': recipe_name, 'url': recipe_link})
print('-'*50)
# После сбора всех страниц — записываем в глобальную структуру
total_type_recip[title][name_group] = recipes
# И сразу сохраняем ВЕСЬ словарь в JSON
save_to_json(total_type_recip)
def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
response = f.try_request(link)
soup = bs(response.text, 'html.parser')
main_container = soup.find(class_='cont_area hrecipe')
name_id = link.split('/')[-1]
try:
name_id = name_id.replace('.html', '')
except: pass
print(name_id)
photo = main_container.find(class_='photo').get('src')
recipies = {'recipes': {}}
detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги
ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
steps = f.extr_steps(main_container) #Сборка шагов
recip = {'_id' : name_id,
'recipe_name':recipe_name,
'url':link,
'preview_img':photo,
'tags':detailed_tags,
'ingredients':ingredients,
'nutritional_value':calories_info,
'steps':steps}
print('Шагов - ',len(steps))
#ib.import_json_in_mongo(recip)
pars_group(link)
#pars_dishs()
#pars_recipie()