From cbb56871e893ebf07e4f82b0564c9454f9ce2b6e Mon Sep 17 00:00:00 2001 From: zein Date: Sun, 23 Nov 2025 02:02:01 +0300 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB?= =?UTF-8?q?=D0=B8=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D1=83?= =?UTF-8?q?=20=D0=B8=D1=81=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D1=8F?= =?UTF-8?q?=20=D1=88=D0=B0=D0=B3=D0=BE=D0=B2=20=D0=BF=D1=80=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE=D1=82=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D1=8F,=20?= =?UTF-8?q?=D0=B3=D0=B4=D0=B5=20=D0=BD=D0=B5=D1=82=D1=83=20=D1=84=D0=BE?= =?UTF-8?q?=D1=82=D0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- function.py | 27 ++++++++++++++++++++------- import_in_BD.py | 42 ++++++++++++++++++++++++++++++++++++++++++ parser.py | 33 ++++++++++++++++++++------------- 3 files changed, 82 insertions(+), 20 deletions(-) create mode 100644 import_in_BD.py diff --git a/function.py b/function.py index e68e43a..09d5420 100644 --- a/function.py +++ b/function.py @@ -2,6 +2,9 @@ import requests from bs4 import BeautifulSoup as bs import re +from dns.name import empty + + def try_request(link, max_retries=5): retries = 0 while retries < max_retries: @@ -93,24 +96,35 @@ def extr_ingredient(main_container): return tags - - def extr_steps(main_container): - - main_container = main_container.find_all(class_='stepphotos') + # На сайте есть страницы исключения по шагам готовки. Фото есть не везде, тогда ищем класс detailed_step_description_big noPhotoStep + # Класс detailed_step_description_big noPhotoStep ищет текст через get_text(), а не через тег title steps = [] + count = 1 - for items in main_container[1:]: + recipeInstructions = main_container.find(class_='instructions') + main_container = recipeInstructions.find_all(class_='stepphotos') + + # Проверяем страницу исключение + if not main_container: + main_container = recipeInstructions.find_all(class_='detailed_step_description_big noPhotoStep') + + + for items in main_container: img = items.get('href') title = items.get('title') + # Если класс detailed_step_description_big noPhotoStep, то ищем через get_text. Сейчас title пустой, тк его нет на странице + if title is None: + title = items.get_text() #Теперь тайтл заполнен + print(title) + steps.append({ 'img': img, 'title': title }) - return steps @@ -135,6 +149,5 @@ def extr_steps(main_container): - diff --git a/import_in_BD.py b/import_in_BD.py new file mode 100644 index 0000000..a0f0837 --- /dev/null +++ b/import_in_BD.py @@ -0,0 +1,42 @@ +from pymongo import MongoClient + +def connect_to_mongo(): + """Подключение к MongoDB""" + client = MongoClient("mongodb://localhost:27017/") + db = client["Food"] + return db["Test"] + + +def import_json_in_mongo(data): + collection = connect_to_mongo() + collection.insert_one(data) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/parser.py b/parser.py index 056587c..2dc6772 100644 --- a/parser.py +++ b/parser.py @@ -4,6 +4,8 @@ import re import function as f import json +import import_in_BD as ib + link = 'https://povar.ru/list/' @@ -75,10 +77,12 @@ def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/re print(name_id) + photo = main_container.find(class_='photo').get('src') + recipies = {'recipes': {}} detailed_tags = f.extract_tags_from_detailed_tags(main_container) #Собираем теги - print(detailed_tags) + #print(detailed_tags) ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты #print(ingredients) @@ -89,23 +93,26 @@ def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/re steps = f.extr_steps(main_container) #Сборка шагов #print(steps) - preview_img = steps[-1]['img'] - recipies['recipes'] = { '_id' : name_id, - 'recipe_name':recipe_name, - 'url':link, - 'preview_img':preview_img, - 'tags':detailed_tags, - 'ingredients':ingredients, - 'nutritional_value':calories_info, - 'steps':steps} - print(recipies) + recip = {'_id' : name_id, + 'recipe_name':recipe_name, + 'url':link, + 'preview_img':photo, + 'tags':detailed_tags, + 'ingredients':ingredients, + 'nutritional_value':calories_info, + 'steps':steps} + + print(recip) + print(len(steps)) + + #ib.import_json_in_mongo(recipies) -pars_group(link) +#pars_group(link) #pars_dishs() -#pars_recipie() +pars_recipie(link="https://povar.ru/recipes/podjarka_k_makaronam-60879.html")