import requests from bs4 import BeautifulSoup as bs import re def try_request(link, max_retries=5): retries = 0 while retries < max_retries: try: response = requests.get(link) if response.status_code == 200: return response else: retries += 1 except: retries += 1 def try_soup(response): try: return bs(response.text, 'html.parser') except: print('404') return False def extract_nutrition(calories_info): # Собираем БЖУ numbers = [] try: for item in calories_info: text = item.get_text() match = re.search(r'\d+', text) if match: numbers.append(int(match.group())) if len(numbers) != 4: raise ValueError(f"Ожидалось 4 числа, найдено: {len(numbers)}") return dict(zip(['calories', 'proteins', 'fats', 'carbs'], numbers)) except: return print('БЖУ не найдены') def extract_tags_from_detailed_tags(main_container): detailed_tags = main_container.find(class_='detailed_tags') tags = {} for span_b in detailed_tags.find_all('span', class_='b'): label = span_b.get_text(strip=True).rstrip(':') next_span = span_b.find_next_sibling('span') if next_span: tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')] else: tag_list = [] tags[label] = tag_list return tags def try_extr_ingredient(span_b, class_, portions=1): try: item = span_b.find(class_=class_).get_text(strip=True) try: item = float(item)/portions except ValueError: pass return item except AttributeError: return None def extr_ingredient(main_container): #Сбор ингредиентов portions = int(main_container.find(class_='yield value').get_text(strip=True)) tags = {} for span_b in main_container.find_all(class_='ingredient flex-dot-line'): label = try_extr_ingredient(span_b, class_='name') value_ingredient = try_extr_ingredient(span_b, 'value', portions) unit_name = try_extr_ingredient(span_b, 'u-unit-name') #print(label, value_ingredient, unit_name) tags[label] = {'unit':unit_name, 'amount':value_ingredient} return tags def extr_steps(main_container): main_container = main_container.find_all(class_='stepphotos') steps = [] for items in main_container[1:]: img = items.get('href') title = items.get('title') steps.append({ 'img': img, 'title': title }) return steps