|
|
@ -16,6 +16,13 @@ def get_accesstoken_from_file(accesstoken_path):
|
|
|
|
accesstoken_file.close()
|
|
|
|
accesstoken_file.close()
|
|
|
|
|
|
|
|
|
|
|
|
return single_accesstoken
|
|
|
|
return single_accesstoken
|
|
|
|
|
|
|
|
def extract_image_url(image_html):
|
|
|
|
|
|
|
|
# only recent articles use "" to escape the url, so we have to search for
|
|
|
|
|
|
|
|
# with quotes and without quotes
|
|
|
|
|
|
|
|
image_url_fragment = re.findall(r'url\("?(.*?)"?\)', image_html.attrs['style'])[0]
|
|
|
|
|
|
|
|
return 'https:'+image_url_fragment
|
|
|
|
|
|
|
|
def sanitize_category(raw_category):
|
|
|
|
|
|
|
|
return raw_category.replace(' ', '').replace(':', '').lower()
|
|
|
|
def get_blog():
|
|
|
|
def get_blog():
|
|
|
|
url = 'https://news.blizzard.com/en-us/'
|
|
|
|
url = 'https://news.blizzard.com/en-us/'
|
|
|
|
html = requests.get(url).text
|
|
|
|
html = requests.get(url).text
|
|
|
@ -26,13 +33,11 @@ def get_blog():
|
|
|
|
blog = []
|
|
|
|
blog = []
|
|
|
|
|
|
|
|
|
|
|
|
for featured_article in soup.select('#featured-articles article'):
|
|
|
|
for featured_article in soup.select('#featured-articles article'):
|
|
|
|
image_html = featured_article.find(class_='Card-image')
|
|
|
|
image_url = extract_image_url(featured_article.find(class_='Card-image'))
|
|
|
|
image_url_fragment = re.findall(r'url\("(.*?)"\)', image_html.attrs['style'])[0]
|
|
|
|
|
|
|
|
image_url = 'https:'+image_url_fragment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_list = featured_article.select('.text-truncate-ellipsis')
|
|
|
|
text_list = featured_article.select('.text-truncate-ellipsis')
|
|
|
|
category = text_list[0].contents[0].replace(' ', '').replace(':', '').lower()
|
|
|
|
category = sanitize_category(text_list[0].text)
|
|
|
|
title = text_list[1].contents[0]
|
|
|
|
title = text_list[1].text
|
|
|
|
|
|
|
|
|
|
|
|
url = base_url+featured_article.find('a').attrs['href']
|
|
|
|
url = base_url+featured_article.find('a').attrs['href']
|
|
|
|
|
|
|
|
|
|
|
@ -45,11 +50,9 @@ def get_blog():
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
for recent_article in soup.select('#recent-articles article'):
|
|
|
|
for recent_article in soup.select('#recent-articles article'):
|
|
|
|
image_html = recent_article.find(class_='ArticleListItem-image')
|
|
|
|
image_url = extract_image_url(recent_article.find(class_='ArticleListItem-image'))
|
|
|
|
image_url_fragment = re.findall(r'url\((.*?)\)', image_html.attrs['style'])[0]
|
|
|
|
|
|
|
|
image_url = 'https:'+image_url_fragment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
category = recent_article.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').text.replace(' ', '').replace(':', '').lower()
|
|
|
|
category = sanitize_category(recent_article.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').text)
|
|
|
|
title = recent_article.find(class_='ArticleListItem-title').text
|
|
|
|
title = recent_article.find(class_='ArticleListItem-title').text
|
|
|
|
description = recent_article.find(class_='ArticleListItem-description').find(class_='h6').text
|
|
|
|
description = recent_article.find(class_='ArticleListItem-description').find(class_='h6').text
|
|
|
|
|
|
|
|
|
|
|
|