You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
93 lines
2.8 KiB
Python
93 lines
2.8 KiB
Python
from os import environ
|
|
|
|
import requests
|
|
import re
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
def get_accesstoken_from_file(accesstoken_path):
|
|
accesstoken_file = open(accesstoken_path, 'r')
|
|
single_accesstoken = accesstoken_file.read().strip()
|
|
accesstoken_file.close()
|
|
|
|
return single_accesstoken
|
|
def get_blog():
|
|
url = 'https://news.blizzard.com/en-us/'
|
|
html = requests.get(url).text
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
base_url = 'https://news.blizzard.com'
|
|
|
|
blog = []
|
|
|
|
feature_list_html = soup.find_all(class_='FeaturedArticle-link')
|
|
for feature_html in feature_list_html:
|
|
image_html = feature_html.find(class_='Card-image')
|
|
image_url_fragment = re.findall('url\("(.*?)"\)', image_html.attrs['style'])[0]
|
|
image_url = 'https:'+image_url_fragment
|
|
|
|
text_list = feature_html.find_all(class_='text-truncate-ellipsis')
|
|
|
|
blog.append({
|
|
'image': image_url,
|
|
'game': text_list[0].contents[0],
|
|
'title': text_list[1].contents[0],
|
|
'description': '',
|
|
'url': base_url+feature_html.attrs['href'],
|
|
})
|
|
|
|
article_list_html = soup.find_all(class_='ArticleListItem')
|
|
for article_html in article_list_html:
|
|
image_html = article_html.find(class_='ArticleListItem-image')
|
|
image_url_fragment = re.findall('url\((.*?)\)', image_html.attrs['style'])[0]
|
|
image_url = 'https:'+image_url_fragment
|
|
|
|
content_html = article_html.find(class_='ArticleListItem-contentGrid')
|
|
|
|
blog.append({
|
|
'image': image_url,
|
|
'game': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0],
|
|
'title': content_html.find(class_='ArticleListItem-title').contents[0],
|
|
'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0],
|
|
'url': base_url+article_html.find(class_='ArticleLink').attrs['href'],
|
|
})
|
|
|
|
return blog
|
|
def get_body(post):
|
|
body = post['title']+"\n"
|
|
|
|
if post['description'] != '':
|
|
body += post['description']+"\n"
|
|
|
|
body += post['url']
|
|
|
|
return body
|
|
def get_formatted_body(post):
|
|
formatted_body = '<a href="'+post['url']+'">'
|
|
formatted_body += '<h5>'+post['title']+'</h5>'
|
|
formatted_body += '</a>'
|
|
|
|
if post['description'] != '':
|
|
formatted_body += '<p>'+post['description']+'</p>'
|
|
|
|
formatted_body += post['url']
|
|
|
|
return formatted_body
|
|
|
|
|
|
homeserver = environ['HOMESERVER_URL']
|
|
mxid = environ['MXID_PREFIX']
|
|
admin_room = environ['ADMIN_ROOM']
|
|
|
|
accesstoken = {}
|
|
for key in environ:
|
|
if (game := re.match('^ACCESSTOKEN_([A-Z]*)_FILE$', key)) is not None:
|
|
accesstoken[game[1].lower()] = get_accesstoken_from_file(environ[key])
|
|
|
|
|
|
blog = get_blog()
|
|
for post in blog:
|
|
print(get_body(post))
|
|
print(get_formatted_body(post))
|