136 lines
2.7 KiB
Python
136 lines
2.7 KiB
Python
import requests
|
||
from bs4 import BeautifulSoup as bs
|
||
import re
|
||
|
||
def try_request(link, max_retries=5):
|
||
retries = 0
|
||
while retries < max_retries:
|
||
try:
|
||
response = requests.get(link)
|
||
|
||
if response.status_code == 200:
|
||
return response
|
||
else:
|
||
retries += 1
|
||
except:
|
||
retries += 1
|
||
|
||
def try_soup(response):
|
||
try: return bs(response.text, 'html.parser')
|
||
except:
|
||
print('404')
|
||
return False
|
||
|
||
|
||
def extract_nutrition(calories_info):
|
||
# Собираем БЖУ
|
||
|
||
numbers = []
|
||
try:
|
||
for item in calories_info:
|
||
text = item.get_text()
|
||
match = re.search(r'\d+', text)
|
||
if match:
|
||
numbers.append(int(match.group()))
|
||
|
||
if len(numbers) != 4:
|
||
raise ValueError(f"Ожидалось 4 числа, найдено: {len(numbers)}")
|
||
|
||
return dict(zip(['calories', 'proteins', 'fats', 'carbs'], numbers))
|
||
except:
|
||
return print('БЖУ не найдены')
|
||
|
||
def extract_tags_from_detailed_tags(main_container):
|
||
|
||
detailed_tags = main_container.find(class_='detailed_tags')
|
||
tags = {}
|
||
|
||
for span_b in detailed_tags.find_all('span', class_='b'):
|
||
label = span_b.get_text(strip=True).rstrip(':')
|
||
next_span = span_b.find_next_sibling('span')
|
||
|
||
if next_span:
|
||
tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')]
|
||
else:
|
||
tag_list = []
|
||
|
||
tags[label] = tag_list
|
||
|
||
return tags
|
||
|
||
|
||
def try_extr_ingredient(span_b, class_, portions=1):
|
||
try:
|
||
item = span_b.find(class_=class_).get_text(strip=True)
|
||
|
||
try: item = float(item)/portions
|
||
except ValueError: pass
|
||
|
||
return item
|
||
except AttributeError: return None
|
||
|
||
|
||
def extr_ingredient(main_container):
|
||
#Сбор ингредиентов
|
||
portions = int(main_container.find(class_='yield value').get_text(strip=True))
|
||
|
||
tags = {}
|
||
|
||
for span_b in main_container.find_all(class_='ingredient flex-dot-line'):
|
||
|
||
label = try_extr_ingredient(span_b, class_='name')
|
||
value_ingredient = try_extr_ingredient(span_b, 'value', portions)
|
||
unit_name = try_extr_ingredient(span_b, 'u-unit-name')
|
||
|
||
#print(label, value_ingredient, unit_name)
|
||
|
||
tags[label] = {'unit':unit_name, 'amount':value_ingredient}
|
||
|
||
return tags
|
||
|
||
|
||
|
||
def extr_steps(main_container):
|
||
|
||
main_container = main_container.find_all(class_='stepphotos')
|
||
|
||
steps = []
|
||
|
||
for items in main_container[1:]:
|
||
|
||
img = items.get('href')
|
||
title = items.get('title')
|
||
|
||
steps.append({
|
||
'img': img,
|
||
'title': title
|
||
})
|
||
|
||
return steps
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|