Финальная сборка.

Собраны все ссылки, удалены дубликаты. Осталось 85к ссылок. Парсим рецепты
2025-11-24 03:24:08 +03:00
parent 8ff6022d8c
commit 12c4dcf33b
5 changed files with 85221 additions and 191 deletions
--- a/function.py
+++ b/function.py
@@ -1,6 +1,9 @@
+from os import times
+
 import requests
 from bs4 import BeautifulSoup as bs
 import re
+import time

 from dns.name import empty

@@ -15,8 +18,10 @@ def try_request(link, max_retries=5):
                return response
            else:
                retries += 1
+                time.sleep(2)
        except:
            retries += 1
+            time.sleep(2)

 def try_soup(response):
    try:
@@ -42,30 +47,33 @@ def extract_nutrition(calories_info):

        return dict(zip(['calories', 'proteins', 'fats', 'carbs'], numbers))
    except:
-        return print('БЖУ не найдены')
+        return None

 def extract_tags_from_detailed_tags(main_container):
+    try:

-    detailed_tags = main_container.find(class_='detailed_tags')
-    tags = {}
+        detailed_tags = main_container.find(class_='detailed_tags')
+        tags = {}

-    for span_b in detailed_tags.find_all('span', class_='b'):
-        label = span_b.get_text(strip=True).rstrip(':')
-        next_span = span_b.find_next_sibling('span')
+        for span_b in detailed_tags.find_all('span', class_='b'):
+            label = span_b.get_text(strip=True).rstrip(':')
+            next_span = span_b.find_next_sibling('span')

-        if next_span:
-            tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')]
-        else:
-            tag_list = []
+            if next_span:
+                tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')]
+            else:
+                tag_list = []

-        if label == 'Назначение': label = 'occasion'
-        elif label == 'Основной ингредиент': continue
-        elif label == 'Блюдо': label = 'type_dish'
-        elif label == 'География кухни': label = 'cuisine'
+            if label == 'Назначение': label = 'occasion'
+            elif label == 'Основной ингредиент': continue
+            elif label == 'Блюдо': label = 'type_dish'
+            elif label == 'География кухни': label = 'cuisine'

-        tags[label] = tag_list
+            tags[label] = tag_list

-    return tags
+        return tags
+    except Exception as e:
+        print(e)


 def try_extr_ingredient(span_b, class_, portions=1):
@@ -80,52 +88,60 @@ def try_extr_ingredient(span_b, class_, portions=1):


 def extr_ingredient(main_container):
-    #Сбор ингредиентов
-    portions = int(main_container.find(class_='yield value').get_text(strip=True))
+    try:
+        #Сбор ингредиентов
+        portions = int(main_container.find(class_='yield value').get_text(strip=True))

-    tags = {}
+        tags = {}

-    for span_b in main_container.find_all(class_='ingredient flex-dot-line'):
+        for span_b in main_container.find_all(class_='ingredient flex-dot-line'):

-        label = try_extr_ingredient(span_b, class_='name')
-        value_ingredient = try_extr_ingredient(span_b, 'value', portions)
-        unit_name = try_extr_ingredient(span_b, 'u-unit-name')
+            label = try_extr_ingredient(span_b, class_='name')
+            value_ingredient = try_extr_ingredient(span_b, 'value', portions)
+            unit_name = try_extr_ingredient(span_b, 'u-unit-name')

-        #print(label, value_ingredient, unit_name)
+            #print(label, value_ingredient, unit_name)

-        tags[label] = {'unit':unit_name, 'amount':value_ingredient}
+            tags[label] = {'unit':unit_name, 'amount':value_ingredient}

-    return tags
+        return tags
+    except Exception as e:
+        print(e)

 def extr_steps(main_container):
-    # На сайте есть страницы исключения по шагам готовки. Фото есть не везде, тогда ищем класс detailed_step_description_big noPhotoStep
-    # Класс detailed_step_description_big noPhotoStep ищет текст через get_text(), а не через тег title

-    steps = []
-    count = 1
+    try:
+        # На сайте есть страницы исключения по шагам готовки. Фото есть не везде, тогда ищем класс detailed_step_description_big noPhotoStep
+        # Класс detailed_step_description_big noPhotoStep ищет текст через get_text(), а не через тег title

-    recipeInstructions = main_container.find(class_='instructions')
+        steps = []
+        count = 1

-    main_container = recipeInstructions.find_all(class_='stepphotos')
+        recipeInstructions = main_container.find(class_='instructions')

-    # Проверяем страницу исключение
-    if not main_container:
-        main_container = recipeInstructions.find_all(class_='detailed_step_description_big noPhotoStep')
+        main_container = recipeInstructions.find_all(class_='stepphotos')
+
+        # Проверяем страницу исключение
+        if not main_container:
+            main_container = recipeInstructions.find_all(class_='detailed_step_description_big noPhotoStep')


-    for items in main_container:
-        img = items.get('href')
-        title = items.get('title')
+        for items in main_container:
+            img = items.get('href')
+            title = items.get('title')

-        # Если класс detailed_step_description_big noPhotoStep, то ищем через get_text.  Сейчас title пустой, тк его нет на странице
-        if title is None:
-            title = items.get_text() #Теперь тайтл заполнен
+            # Если класс detailed_step_description_big noPhotoStep, то ищем через get_text.  Сейчас title пустой, тк его нет на странице
+            if title is None:
+                title = items.get_text() #Теперь тайтл заполнен

-        steps.append({
-            'img': img,
-            'title': title
-        })
-    return steps
+            steps.append({
+                'img': img,
+                'title': title
+            })
+
+        return steps
+    except Exception as e:
+        return None