Добавили обработку исключения шагов приготовления, где нету фото

This commit is contained in:
zein
2025-11-23 02:02:01 +03:00
parent 8c53c35dec
commit cbb56871e8
3 changed files with 82 additions and 20 deletions

View File

@@ -2,6 +2,9 @@ import requests
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
import re import re
from dns.name import empty
def try_request(link, max_retries=5): def try_request(link, max_retries=5):
retries = 0 retries = 0
while retries < max_retries: while retries < max_retries:
@@ -93,24 +96,35 @@ def extr_ingredient(main_container):
return tags return tags
def extr_steps(main_container): def extr_steps(main_container):
# На сайте есть страницы исключения по шагам готовки. Фото есть не везде, тогда ищем класс detailed_step_description_big noPhotoStep
main_container = main_container.find_all(class_='stepphotos') # Класс detailed_step_description_big noPhotoStep ищет текст через get_text(), а не через тег title
steps = [] steps = []
count = 1
for items in main_container[1:]: recipeInstructions = main_container.find(class_='instructions')
main_container = recipeInstructions.find_all(class_='stepphotos')
# Проверяем страницу исключение
if not main_container:
main_container = recipeInstructions.find_all(class_='detailed_step_description_big noPhotoStep')
for items in main_container:
img = items.get('href') img = items.get('href')
title = items.get('title') title = items.get('title')
# Если класс detailed_step_description_big noPhotoStep, то ищем через get_text. Сейчас title пустой, тк его нет на странице
if title is None:
title = items.get_text() #Теперь тайтл заполнен
print(title)
steps.append({ steps.append({
'img': img, 'img': img,
'title': title 'title': title
}) })
return steps return steps
@@ -135,6 +149,5 @@ def extr_steps(main_container):

42
import_in_BD.py Normal file
View File

@@ -0,0 +1,42 @@
from pymongo import MongoClient
def connect_to_mongo():
"""Подключение к MongoDB"""
client = MongoClient("mongodb://localhost:27017/")
db = client["Food"]
return db["Test"]
def import_json_in_mongo(data):
collection = connect_to_mongo()
collection.insert_one(data)

View File

@@ -4,6 +4,8 @@ import re
import function as f import function as f
import json import json
import import_in_BD as ib
link = 'https://povar.ru/list/' link = 'https://povar.ru/list/'
@@ -75,10 +77,12 @@ def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/re
print(name_id) print(name_id)
photo = main_container.find(class_='photo').get('src')
recipies = {'recipes': {}} recipies = {'recipes': {}}
detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги
print(detailed_tags) #print(detailed_tags)
ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
#print(ingredients) #print(ingredients)
@@ -89,23 +93,26 @@ def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/re
steps = f.extr_steps(main_container) #Сборка шагов steps = f.extr_steps(main_container) #Сборка шагов
#print(steps) #print(steps)
preview_img = steps[-1]['img']
recipies['recipes'] = { '_id' : name_id, recip = {'_id' : name_id,
'recipe_name':recipe_name, 'recipe_name':recipe_name,
'url':link, 'url':link,
'preview_img':preview_img, 'preview_img':photo,
'tags':detailed_tags, 'tags':detailed_tags,
'ingredients':ingredients, 'ingredients':ingredients,
'nutritional_value':calories_info, 'nutritional_value':calories_info,
'steps':steps} 'steps':steps}
print(recipies)
print(recip)
print(len(steps))
#ib.import_json_in_mongo(recipies)
pars_group(link) #pars_group(link)
#pars_dishs() #pars_dishs()
#pars_recipie() pars_recipie(link="https://povar.ru/recipes/podjarka_k_makaronam-60879.html")