From 9a707a2f95de5e1193d1613fa3f1542f4e006dc7 Mon Sep 17 00:00:00 2001 From: zein Date: Sun, 16 Nov 2025 05:29:50 +0300 Subject: [PATCH] =?UTF-8?q?=D0=93=D0=BE=D1=82=D0=BE=D0=B2=D1=8B=D0=B5=20?= =?UTF-8?q?=D0=BC=D0=BE=D0=B4=D1=83=D0=BB=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- function.py | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++ parser.py | 91 +++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 function.py create mode 100644 parser.py diff --git a/function.py b/function.py new file mode 100644 index 0000000..91ae38e --- /dev/null +++ b/function.py @@ -0,0 +1,134 @@ +import requests +from bs4 import BeautifulSoup as bs +import re + +def try_request(link, max_retries=5): + retries = 0 + while retries < max_retries: + try: + response = requests.get(link) + + if response.status_code == 200: + return response + else: + retries += 1 + except: + retries += 1 + +def try_soup(response): + try: return bs(response.text, 'html.parser') + except: + print('404') + return False + + +def extract_nutrition(calories_info): + # Собираем БЖУ + + numbers = [] + try: + for item in calories_info: + text = item.get_text() + match = re.search(r'\d+', text) + if match: + numbers.append(int(match.group())) + + if len(numbers) != 4: + raise ValueError(f"Ожидалось 4 числа, найдено: {len(numbers)}") + + return dict(zip(['calories', 'proteins', 'fats', 'carbs'], numbers)) + except: + return print('БЖУ не найдены') + +def extract_tags_from_detailed_tags(detailed_tags): + + tags = {} + + for span_b in detailed_tags.find_all('span', class_='b'): + label = span_b.get_text(strip=True).rstrip(':') + next_span = span_b.find_next_sibling('span') + + if next_span: + tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')] + else: + tag_list = [] + + tags[label] = tag_list + #print(f"{label}: {', '.join(tag_list) if tag_list else '—'}") + + return tags + + +def try_extr_ingredient(span_b, class_, portions=1): + try: + item = span_b.find(class_=class_).get_text(strip=True) + + try: item = float(item)/portions + except ValueError: pass + + return item + except AttributeError: return None + + +def extr_ingredient(main_container): + + portions = int(main_container.find(class_='yield value').get_text(strip=True)) + + tags = {} + + for span_b in main_container.find_all(class_='ingredient flex-dot-line'): + + label = try_extr_ingredient(span_b, class_='name') + value_ingredient = try_extr_ingredient(span_b, 'value', portions) + unit_name = try_extr_ingredient(span_b, 'u-unit-name') + + #print(label, value_ingredient, unit_name) + + tags[label] = {'unit':unit_name, 'amount':value_ingredient} + + return tags + + + +def extr_steps(main_container): + + main_container = main_container.find_all(class_='stepphotos') + + steps = {} + count_step = 0 + + for items in main_container[1:]: + count_step += 1 + + img = items.get('href') + title = items.get('title') + + steps[f'step_{count_step}'] = {'img':img, 'title':title} + + return steps + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..a53201d --- /dev/null +++ b/parser.py @@ -0,0 +1,91 @@ +import requests +from bs4 import BeautifulSoup as bs +import re +import function as f + +link = 'https://povar.ru/list/' + + + +def pars_group(link): + #Сбор видов блюд + response = f.try_request(link) + soup = bs(response.text, 'html.parser') + + main_container = soup.find_all(class_='ingredientItem') + + + for items in main_container: + + item = items.find_all('a') + title = item[0].get_text() + + if title == 'Салаты': break + + print(title) + + for i in item[1::]: + name_group = i.get_text() + link_group = 'https://povar.ru' + i.get('href') + print('-'*5, name_group, link_group) + + print('-'*50) + + +def pars_dishs(link='https://povar.ru/list/spagetti/', page=0): + #Сбор списка рецептов + while True: + page += 1 + new_link = link + str(page) + soup = f.try_soup(f.try_request(new_link)) + + if soup == False: break + + main_container = soup.find_all(class_='listRecipieTitle') + + for items in main_container: + recipe_name = items.get_text() + recipe_link = 'https://povar.ru' + items.get('href') + + print(recipe_name, recipe_link) + print('-'*50) + + + +def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'): + + response = f.try_request(link) + soup = bs(response.text, 'html.parser') + + main_container = soup.find(class_='cont_area hrecipe') + + #detailed_tags = main_container.find(class_='detailed_tags') #Собираем теги + #detailed_tags = f.extract_tags_from_detailed_tags(detailed_tags) + + #ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты + #print(ingredients) + + #calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ + #print(calories_info) + + steps = f.extr_steps(main_container) + print(steps) + + + +#pars_group(link) +#pars_dish() +pars_recipie() + + + + + + + + + + + + +