Нормализация данных, привидение к общему json
This commit is contained in:
@@ -54,6 +54,11 @@ def extract_tags_from_detailed_tags(main_container):
|
|||||||
else:
|
else:
|
||||||
tag_list = []
|
tag_list = []
|
||||||
|
|
||||||
|
if label == 'Назначение': label = 'occasion'
|
||||||
|
elif label == 'Основной ингредиент': continue
|
||||||
|
elif label == 'Блюдо': label = 'type_dish'
|
||||||
|
elif label == 'География кухни': label = 'cuisine'
|
||||||
|
|
||||||
tags[label] = tag_list
|
tags[label] = tag_list
|
||||||
|
|
||||||
return tags
|
return tags
|
||||||
|
|||||||
35
parser.py
35
parser.py
@@ -2,6 +2,7 @@ import requests
|
|||||||
from bs4 import BeautifulSoup as bs
|
from bs4 import BeautifulSoup as bs
|
||||||
import re
|
import re
|
||||||
import function as f
|
import function as f
|
||||||
|
import json
|
||||||
|
|
||||||
link = 'https://povar.ru/list/'
|
link = 'https://povar.ru/list/'
|
||||||
|
|
||||||
@@ -9,6 +10,8 @@ link = 'https://povar.ru/list/'
|
|||||||
|
|
||||||
def pars_group(link):
|
def pars_group(link):
|
||||||
#Сбор видов блюд
|
#Сбор видов блюд
|
||||||
|
|
||||||
|
|
||||||
response = f.try_request(link)
|
response = f.try_request(link)
|
||||||
soup = bs(response.text, 'html.parser')
|
soup = bs(response.text, 'html.parser')
|
||||||
|
|
||||||
@@ -29,10 +32,14 @@ def pars_group(link):
|
|||||||
link_group = 'https://povar.ru' + i.get('href')
|
link_group = 'https://povar.ru' + i.get('href')
|
||||||
print('-'*5, name_group, link_group)
|
print('-'*5, name_group, link_group)
|
||||||
|
|
||||||
|
pars_dishs(title, name_group, link_group)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print('-'*50)
|
print('-'*50)
|
||||||
|
|
||||||
|
|
||||||
def pars_dishs(link='https://povar.ru/list/spagetti/', page=0):
|
def pars_dishs(title='', name_group='', link='https://povar.ru/list/spagetti/', page=0):
|
||||||
#Сбор списка рецептов
|
#Сбор списка рецептов
|
||||||
while True:
|
while True:
|
||||||
page += 1
|
page += 1
|
||||||
@@ -48,19 +55,25 @@ def pars_dishs(link='https://povar.ru/list/spagetti/', page=0):
|
|||||||
recipe_link = 'https://povar.ru' + items.get('href')
|
recipe_link = 'https://povar.ru' + items.get('href')
|
||||||
|
|
||||||
print(recipe_name, recipe_link)
|
print(recipe_name, recipe_link)
|
||||||
|
pars_recipie(title, name_group, recipe_name, recipe_link)
|
||||||
|
|
||||||
print('-'*50)
|
print('-'*50)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
|
def pars_recipie(title=0, name_group=0, recipe_name=0 ,link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html'):
|
||||||
|
|
||||||
response = f.try_request(link)
|
response = f.try_request(link)
|
||||||
soup = bs(response.text, 'html.parser')
|
soup = bs(response.text, 'html.parser')
|
||||||
|
|
||||||
main_container = soup.find(class_='cont_area hrecipe')
|
main_container = soup.find(class_='cont_area hrecipe')
|
||||||
|
|
||||||
name = main_container.find(class_='detailed fn').get_text()
|
name_id = link.split('/')[-1]
|
||||||
|
try:
|
||||||
|
name_id = name_id.replace('.html', '')
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
print(name_id)
|
||||||
|
|
||||||
recipies = {'recipes': {}}
|
recipies = {'recipes': {}}
|
||||||
|
|
||||||
@@ -68,17 +81,19 @@ def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html
|
|||||||
print(detailed_tags)
|
print(detailed_tags)
|
||||||
|
|
||||||
ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
|
ingredients = f.extr_ingredient(main_container) #Собираем ингредиенты
|
||||||
print(ingredients)
|
#print(ingredients)
|
||||||
|
|
||||||
calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
|
calories_info = f.extract_nutrition(main_container.find_all(class_='circle')) #БЖУ
|
||||||
print(calories_info)
|
#print(calories_info)
|
||||||
|
|
||||||
steps = f.extr_steps(main_container) #Сборка шагов
|
steps = f.extr_steps(main_container) #Сборка шагов
|
||||||
print(steps)
|
#print(steps)
|
||||||
|
|
||||||
preview_img = steps[-1]['img']
|
preview_img = steps[-1]['img']
|
||||||
|
|
||||||
recipies['recipes'] = { 'name':name,
|
recipies['recipes'] = { '_id' : name_id,
|
||||||
|
'recipe_name':recipe_name,
|
||||||
|
'url':link,
|
||||||
'preview_img':preview_img,
|
'preview_img':preview_img,
|
||||||
'tags':detailed_tags,
|
'tags':detailed_tags,
|
||||||
'ingredients':ingredients,
|
'ingredients':ingredients,
|
||||||
@@ -88,9 +103,9 @@ def pars_recipie(link='https://povar.ru/recipes/slivochnaya_karbonara-73186.html
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#pars_group(link)
|
pars_group(link)
|
||||||
#pars_dish()
|
#pars_dishs()
|
||||||
pars_recipie()
|
#pars_recipie()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user