import requests from bs4 import BeautifulSoup as bs import re from dns.name import empty def try_request(link, max_retries=5): retries = 0 while retries < max_retries: try: response = requests.get(link) if response.status_code == 200: return response else: retries += 1 except: retries += 1 def try_soup(response): try: return bs(response.text, 'html.parser') except: print('404') return False def extract_nutrition(calories_info): # Собираем БЖУ numbers = [] try: for item in calories_info: text = item.get_text() match = re.search(r'\d+', text) if match: numbers.append(int(match.group())) if len(numbers) != 4: raise ValueError(f"Ожидалось 4 числа, найдено: {len(numbers)}") return dict(zip(['calories', 'proteins', 'fats', 'carbs'], numbers)) except: return print('БЖУ не найдены') def extract_tags_from_detailed_tags(main_container): detailed_tags = main_container.find(class_='detailed_tags') tags = {} for span_b in detailed_tags.find_all('span', class_='b'): label = span_b.get_text(strip=True).rstrip(':') next_span = span_b.find_next_sibling('span') if next_span: tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')] else: tag_list = [] if label == 'Назначение': label = 'occasion' elif label == 'Основной ингредиент': continue elif label == 'Блюдо': label = 'type_dish' elif label == 'География кухни': label = 'cuisine' tags[label] = tag_list return tags def try_extr_ingredient(span_b, class_, portions=1): try: item = span_b.find(class_=class_).get_text(strip=True) try: item = float(item)/portions except ValueError: pass return item except AttributeError: return None def extr_ingredient(main_container): #Сбор ингредиентов portions = int(main_container.find(class_='yield value').get_text(strip=True)) tags = {} for span_b in main_container.find_all(class_='ingredient flex-dot-line'): label = try_extr_ingredient(span_b, class_='name') value_ingredient = try_extr_ingredient(span_b, 'value', portions) unit_name = try_extr_ingredient(span_b, 'u-unit-name') #print(label, value_ingredient, unit_name) tags[label] = {'unit':unit_name, 'amount':value_ingredient} return tags def extr_steps(main_container): # На сайте есть страницы исключения по шагам готовки. Фото есть не везде, тогда ищем класс detailed_step_description_big noPhotoStep # Класс detailed_step_description_big noPhotoStep ищет текст через get_text(), а не через тег title steps = [] count = 1 recipeInstructions = main_container.find(class_='instructions') main_container = recipeInstructions.find_all(class_='stepphotos') # Проверяем страницу исключение if not main_container: main_container = recipeInstructions.find_all(class_='detailed_step_description_big noPhotoStep') for items in main_container: img = items.get('href') title = items.get('title') # Если класс detailed_step_description_big noPhotoStep, то ищем через get_text. Сейчас title пустой, тк его нет на странице if title is None: title = items.get_text() #Теперь тайтл заполнен steps.append({ 'img': img, 'title': title }) return steps