Готовые модули
This commit is contained in:
134
function.py
Normal file
134
function.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import re
|
||||
|
||||
def try_request(link, max_retries=5):
|
||||
retries = 0
|
||||
while retries < max_retries:
|
||||
try:
|
||||
response = requests.get(link)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response
|
||||
else:
|
||||
retries += 1
|
||||
except:
|
||||
retries += 1
|
||||
|
||||
def try_soup(response):
|
||||
try: return bs(response.text, 'html.parser')
|
||||
except:
|
||||
print('404')
|
||||
return False
|
||||
|
||||
|
||||
def extract_nutrition(calories_info):
|
||||
# Собираем БЖУ
|
||||
|
||||
numbers = []
|
||||
try:
|
||||
for item in calories_info:
|
||||
text = item.get_text()
|
||||
match = re.search(r'\d+', text)
|
||||
if match:
|
||||
numbers.append(int(match.group()))
|
||||
|
||||
if len(numbers) != 4:
|
||||
raise ValueError(f"Ожидалось 4 числа, найдено: {len(numbers)}")
|
||||
|
||||
return dict(zip(['calories', 'proteins', 'fats', 'carbs'], numbers))
|
||||
except:
|
||||
return print('БЖУ не найдены')
|
||||
|
||||
def extract_tags_from_detailed_tags(detailed_tags):
|
||||
|
||||
tags = {}
|
||||
|
||||
for span_b in detailed_tags.find_all('span', class_='b'):
|
||||
label = span_b.get_text(strip=True).rstrip(':')
|
||||
next_span = span_b.find_next_sibling('span')
|
||||
|
||||
if next_span:
|
||||
tag_list = [a.get_text(strip=True) for a in next_span.find_all('a')]
|
||||
else:
|
||||
tag_list = []
|
||||
|
||||
tags[label] = tag_list
|
||||
#print(f"{label}: {', '.join(tag_list) if tag_list else '—'}")
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
def try_extr_ingredient(span_b, class_, portions=1):
|
||||
try:
|
||||
item = span_b.find(class_=class_).get_text(strip=True)
|
||||
|
||||
try: item = float(item)/portions
|
||||
except ValueError: pass
|
||||
|
||||
return item
|
||||
except AttributeError: return None
|
||||
|
||||
|
||||
def extr_ingredient(main_container):
|
||||
|
||||
portions = int(main_container.find(class_='yield value').get_text(strip=True))
|
||||
|
||||
tags = {}
|
||||
|
||||
for span_b in main_container.find_all(class_='ingredient flex-dot-line'):
|
||||
|
||||
label = try_extr_ingredient(span_b, class_='name')
|
||||
value_ingredient = try_extr_ingredient(span_b, 'value', portions)
|
||||
unit_name = try_extr_ingredient(span_b, 'u-unit-name')
|
||||
|
||||
#print(label, value_ingredient, unit_name)
|
||||
|
||||
tags[label] = {'unit':unit_name, 'amount':value_ingredient}
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
|
||||
def extr_steps(main_container):
|
||||
|
||||
main_container = main_container.find_all(class_='stepphotos')
|
||||
|
||||
steps = {}
|
||||
count_step = 0
|
||||
|
||||
for items in main_container[1:]:
|
||||
count_step += 1
|
||||
|
||||
img = items.get('href')
|
||||
title = items.get('title')
|
||||
|
||||
steps[f'step_{count_step}'] = {'img':img, 'title':title}
|
||||
|
||||
return steps
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
91
parser.py
Normal file
91
parser.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import re
|
||||
import function as f
|
||||
|
||||
link = 'https://povar.ru/list/'
|
||||
|
||||
|
||||
|
||||
def pars_group(link):
|
||||
#Сбор видов блюд
|
||||
response = f.try_request(link)
|
||||
soup = bs(response.text, 'html.parser')
|
||||
|
||||
main_container = soup.find_all(class_='ingredientItem')
|
||||
|
||||
|
||||
for items in main_container:
|
||||
|
||||
item = items.find_all('a')
|
||||
title = item[0].get_text()
|
||||
|
||||
if title == 'Салаты': break
|
||||
|
||||
print(title)
|
||||
|
||||
for i in item[1::]:
|
||||
name_group = i.get_text()
|
||||
link_group = 'https://povar.ru' + i.get('href')
|
||||
print('-'*5, name_group, link_group)
|
||||
|
||||
print('-'*50)
|
||||
|
||||
|
||||
def pars_dishs(link='https://povar.ru/list/spagetti/', page=0):
|
||||
#Сбор списка рецептов
|
||||
while True:
|
||||
page += 1
|
||||
new_link = link + str(page)
|
||||
soup = f.try_soup(f.try_request(new_link))
|
||||
|
||||
if soup == False: break
|
||||
|
||||
main_container = soup.find_all(class_='listRecipieTitle')
|
||||
|
||||
for items in main_container:
|
||||
recipe_name = items.get_text()
|
||||
recipe_link = 'https://povar.ru' + items.get('href')
|
||||
|
||||
print(recipe_name, recipe_link)
|
||||
print('-'*50)
|
||||
|
||||
|
||||
|
||||
def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
|
||||
|
||||
response = f.try_request(link)
|
||||
soup = bs(response.text, 'html.parser')
|
||||
|
||||
main_container = soup.find(class_='cont_area hrecipe')
|
||||
|
||||
#detailed_tags = main_container.find(class_='detailed_tags') #Собираем теги
|
||||
#detailed_tags = f.extract_tags_from_detailed_tags(detailed_tags)
|
||||
|
||||
#ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
|
||||
#print(ingredients)
|
||||
|
||||
#calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
|
||||
#print(calories_info)
|
||||
|
||||
steps = f.extr_steps(main_container)
|
||||
print(steps)
|
||||
|
||||
|
||||
|
||||
#pars_group(link)
|
||||
#pars_dish()
|
||||
pars_recipie()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user