|
|
|
@ -25,36 +25,42 @@ def get_blog():
|
|
|
|
|
|
|
|
|
|
blog = []
|
|
|
|
|
|
|
|
|
|
feature_list_html = soup.find_all(class_='FeaturedArticle-link')
|
|
|
|
|
for feature_html in feature_list_html:
|
|
|
|
|
image_html = feature_html.find(class_='Card-image')
|
|
|
|
|
image_url_fragment = re.findall('url\("(.*?)"\)', image_html.attrs['style'])[0]
|
|
|
|
|
for featured_article in soup.select('#featured-articles article'):
|
|
|
|
|
image_html = featured_article.find(class_='Card-image')
|
|
|
|
|
image_url_fragment = re.findall(r'url\("(.*?)"\)', image_html.attrs['style'])[0]
|
|
|
|
|
image_url = 'https:'+image_url_fragment
|
|
|
|
|
|
|
|
|
|
text_list = feature_html.find_all(class_='text-truncate-ellipsis')
|
|
|
|
|
text_list = featured_article.select('.text-truncate-ellipsis')
|
|
|
|
|
category = text_list[0].contents[0].replace(' ', '').replace(':', '').lower()
|
|
|
|
|
title = text_list[1].contents[0]
|
|
|
|
|
|
|
|
|
|
url = base_url+featured_article.find('a').attrs['href']
|
|
|
|
|
|
|
|
|
|
blog.append({
|
|
|
|
|
'image': image_url,
|
|
|
|
|
'category': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(),
|
|
|
|
|
'title': text_list[1].contents[0],
|
|
|
|
|
'description': '',
|
|
|
|
|
'url': base_url+feature_html.attrs['href'],
|
|
|
|
|
'category': category,
|
|
|
|
|
'title': title,
|
|
|
|
|
'description': '', # featured articles don't have a description
|
|
|
|
|
'url': url,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
article_list_html = soup.find_all(class_='ArticleListItem')
|
|
|
|
|
for article_html in article_list_html:
|
|
|
|
|
image_html = article_html.find(class_='ArticleListItem-image')
|
|
|
|
|
image_url_fragment = re.findall('url\((.*?)\)', image_html.attrs['style'])[0]
|
|
|
|
|
for recent_article in soup.select('#recent-articles article'):
|
|
|
|
|
image_html = recent_article.find(class_='ArticleListItem-image')
|
|
|
|
|
image_url_fragment = re.findall(r'url\((.*?)\)', image_html.attrs['style'])[0]
|
|
|
|
|
image_url = 'https:'+image_url_fragment
|
|
|
|
|
|
|
|
|
|
content_html = article_html.find(class_='ArticleListItem-contentGrid')
|
|
|
|
|
category = recent_article.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').text.replace(' ', '').replace(':', '').lower()
|
|
|
|
|
title = recent_article.find(class_='ArticleListItem-title').text
|
|
|
|
|
description = recent_article.find(class_='ArticleListItem-description').find(class_='h6').text
|
|
|
|
|
|
|
|
|
|
url = base_url+recent_article.find('a').attrs['href']
|
|
|
|
|
|
|
|
|
|
blog.append({
|
|
|
|
|
'image': image_url,
|
|
|
|
|
'category': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(),
|
|
|
|
|
'title': content_html.find(class_='ArticleListItem-title').contents[0],
|
|
|
|
|
'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0],
|
|
|
|
|
'url': base_url+article_html.find(class_='ArticleLink').attrs['href'],
|
|
|
|
|
'category': category,
|
|
|
|
|
'title': title,
|
|
|
|
|
'description': description,
|
|
|
|
|
'url': url
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# reverse order so the oldest article is at [0]
|
|
|
|
|