177 lines
4.4 KiB
Python
177 lines
4.4 KiB
Python
import requests
|
||
from bs4 import BeautifulSoup as bs
|
||
import re
|
||
import function as f
|
||
import json
|
||
|
||
import import_in_BD as ib
|
||
|
||
link = 'https://povar.ru/list/'
|
||
|
||
|
||
import json
|
||
|
||
|
||
total_type_recip = {}
|
||
|
||
def save_to_json(new_data, filename='total_type_recip.json'):
|
||
# Загружаем существующие данные, если файл существует
|
||
if os.path.exists(filename):
|
||
with open(filename, 'r', encoding='utf-8') as f:
|
||
try:
|
||
existing_data = json.load(f)
|
||
except json.JSONDecodeError:
|
||
existing_data = {}
|
||
else:
|
||
existing_data = {}
|
||
|
||
# Сливаем new_data в existing_data
|
||
for category, groups in new_data.items():
|
||
if category not in existing_data:
|
||
existing_data[category] = {}
|
||
for group, recipes in groups.items():
|
||
# Перезаписываем только если ещё не было или чтобы не дублировать — можно использовать set позже
|
||
existing_data[category][group] = recipes
|
||
|
||
# Сохраняем обратно
|
||
with open(filename, 'w', encoding='utf-8') as f:
|
||
json.dump(existing_data, f, ensure_ascii=False, indent=4)
|
||
|
||
|
||
|
||
def pars_group(link):
|
||
#Сбор видов блюд
|
||
|
||
|
||
response = f.try_request(link)
|
||
soup = bs(response.text, 'html.parser')
|
||
|
||
main_container = soup.find_all(class_='ingredientItem')
|
||
|
||
for items in main_container:
|
||
|
||
item = items.find_all('a')
|
||
|
||
title = items.find_all('h2')
|
||
title = title[0].get_text()
|
||
|
||
#if title == 'Выпечка': break
|
||
|
||
# Инициализируем категорию, если ещё не создана
|
||
if title not in total_type_recip:
|
||
total_type_recip[title] = {}
|
||
|
||
print(title)
|
||
|
||
for i in item[1::]:
|
||
name_group = i.get_text()
|
||
link_group = 'https://povar.ru' + i.get('href')
|
||
print('-'*5, name_group, link_group)
|
||
|
||
total_type_recip[title][name_group] = []
|
||
|
||
pars_dishs(title, name_group, link_group)
|
||
|
||
|
||
print('-'*50)
|
||
|
||
|
||
|
||
|
||
def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0):
|
||
|
||
|
||
global total_type_recip
|
||
|
||
#Сбор списка рецептов
|
||
recipes = []
|
||
|
||
while True:
|
||
|
||
page += 1
|
||
new_link = link + str(page)
|
||
soup = f.try_soup(f.try_request(new_link))
|
||
|
||
if soup == False: break
|
||
|
||
|
||
main_container = soup.find_all(class_='listRecipieTitle')
|
||
|
||
for items in main_container:
|
||
recipe_name = items.get_text()
|
||
recipe_link = 'https://povar.ru' + items.get('href')
|
||
|
||
print('-'*10,recipe_name, recipe_link)
|
||
|
||
#pars_recipie(title, name_group, recipe_name, recipe_link)
|
||
|
||
recipes.append({'name': recipe_name, 'url': recipe_link})
|
||
|
||
|
||
|
||
print('-'*50)
|
||
|
||
# После сбора всех страниц — записываем в глобальную структуру
|
||
total_type_recip[title][name_group] = recipes
|
||
|
||
# И сразу сохраняем ВЕСЬ словарь в JSON
|
||
save_to_json(total_type_recip)
|
||
|
||
|
||
|
||
def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
|
||
|
||
response = f.try_request(link)
|
||
soup = bs(response.text, 'html.parser')
|
||
|
||
main_container = soup.find(class_='cont_area hrecipe')
|
||
|
||
name_id = link.split('/')[-1]
|
||
try:
|
||
name_id = name_id.replace('.html', '')
|
||
except: pass
|
||
|
||
print(name_id)
|
||
|
||
photo = main_container.find(class_='photo').get('src')
|
||
|
||
recipies = {'recipes': {}}
|
||
|
||
detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги
|
||
ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
|
||
calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
|
||
steps = f.extr_steps(main_container) #Сборка шагов
|
||
|
||
|
||
recip = {'_id' : name_id,
|
||
'recipe_name':recipe_name,
|
||
'url':link,
|
||
'preview_img':photo,
|
||
'tags':detailed_tags,
|
||
'ingredients':ingredients,
|
||
'nutritional_value':calories_info,
|
||
'steps':steps}
|
||
|
||
print('Шагов - ',len(steps))
|
||
|
||
#ib.import_json_in_mongo(recip)
|
||
|
||
|
||
|
||
pars_group(link)
|
||
#pars_dishs()
|
||
#pars_recipie()
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|