Готовые модули

2025-11-16 05:29:50 +03:00
parent a7574c3c4b
commit 9a707a2f95
2 changed files with 225 additions and 0 deletions
--- a/function.py
+++ b/function.py
@@ -0,0 +1,134 @@
+import requests
+from bs4 import BeautifulSoup as bs
+import re
+
+def try_request(link, max_retries=5):
+    retries = 0
+    while retries < max_retries:
+        try:
+            response = requests.get(link)
+
+            if response.status_code == 200:
+                return response
+            else:
+                retries += 1
+        except:
+            retries += 1
+
+def try_soup(response):
+    try: return bs(response.text, 'html.parser')
+    except:
+        print('404')
+        return False
+
+
+def extract_nutrition(calories_info):
+    # Собираем БЖУ
+
+    numbers = []
+    try:
+        for item in calories_info:
+            text = item.get_text()
+            match = re.search(r'\d+', text)
+            if match:
+                numbers.append(int(match.group()))
+
+        if len(numbers) != 4:
+            raise ValueError(f"Ожидалось 4 числа, найдено: {len(numbers)}")
+
+        return dict(zip(['calories', 'proteins', 'fats', 'carbs'], numbers))
+    except:
+        return print('БЖУ не найдены')
+
+def extract_tags_from_detailed_tags(detailed_tags):
+
+    tags = {}
+
+    for span_b in detailed_tags.find_all('span', class_='b'):
+        label = span_b.get_text(strip=True).rstrip(':')
+        next_span = span_b.find_next_sibling('span')
+
+        if next_span:
+            tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')]
+        else:
+            tag_list = []
+
+        tags[label] = tag_list
+        #print(f"{label}: {', '.join(tag_list) if tag_list else '—'}")
+
+    return tags
+
+
+def try_extr_ingredient(span_b, class_, portions=1):
+    try:
+        item = span_b.find(class_=class_).get_text(strip=True)
+
+        try: item = float(item)/portions
+        except ValueError: pass
+
+        return item
+    except AttributeError: return None
+
+
+def extr_ingredient(main_container):
+
+    portions = int(main_container.find(class_='yield value').get_text(strip=True))
+
+    tags = {}
+
+    for span_b in main_container.find_all(class_='ingredient flex-dot-line'):
+
+        label = try_extr_ingredient(span_b, class_='name')
+        value_ingredient = try_extr_ingredient(span_b, 'value', portions)
+        unit_name = try_extr_ingredient(span_b, 'u-unit-name')
+
+        #print(label, value_ingredient, unit_name)
+
+        tags[label] = {'unit':unit_name, 'amount':value_ingredient}
+
+    return tags
+
+
+
+def extr_steps(main_container):
+
+    main_container = main_container.find_all(class_='stepphotos')
+
+    steps = {}
+    count_step = 0
+
+    for items in main_container[1:]:
+        count_step += 1
+
+        img = items.get('href')
+        title = items.get('title')
+
+        steps[f'step_{count_step}'] = {'img':img, 'title':title}
+
+    return steps
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/parser.py
+++ b/parser.py
@@ -0,0 +1,91 @@
+import requests
+from bs4 import BeautifulSoup as bs
+import re
+import function as f
+
+link = 'https://povar.ru/list/'
+
+
+
+def pars_group(link):
+    #Сбор видов блюд
+    response = f.try_request(link)
+    soup = bs(response.text, 'html.parser')
+
+    main_container = soup.find_all(class_='ingredientItem')
+
+
+    for items in main_container:
+
+        item = items.find_all('a')
+        title = item[0].get_text()
+
+        if title == 'Салаты': break
+
+        print(title)
+
+        for i in item[1::]:
+            name_group = i.get_text()
+            link_group = 'https://povar.ru' + i.get('href')
+            print('-'*5, name_group, link_group)
+
+        print('-'*50)
+
+
+def pars_dishs(link='https://povar.ru/list/spagetti/', page=0):
+    #Сбор списка рецептов
+    while True:
+        page += 1
+        new_link = link + str(page)
+        soup = f.try_soup(f.try_request(new_link))
+
+        if soup == False: break
+
+        main_container = soup.find_all(class_='listRecipieTitle')
+
+        for items in main_container:
+            recipe_name = items.get_text()
+            recipe_link = 'https://povar.ru' + items.get('href')
+
+            print(recipe_name, recipe_link)
+        print('-'*50)
+
+
+
+def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
+
+    response = f.try_request(link)
+    soup = bs(response.text, 'html.parser')
+
+    main_container = soup.find(class_='cont_area hrecipe')
+
+    #detailed_tags = main_container.find(class_='detailed_tags') #Собираем теги
+    #detailed_tags = f.extract_tags_from_detailed_tags(detailed_tags)
+
+    #ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
+    #print(ingredients)
+
+    #calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
+    #print(calories_info)
+
+    steps = f.extr_steps(main_container)
+    print(steps)
+
+
+
+#pars_group(link)
+#pars_dish()
+pars_recipie()
+
+
+
+
+
+
+
+
+
+
+
+
+