diff --git a/scrape.py b/scrape.py index 5dba0f9..608d5cd 100644 --- a/scrape.py +++ b/scrape.py @@ -11,7 +11,7 @@ from nio import ClientConfig, AsyncClient, LoginResponse, InviteEvent def get_accesstoken_from_file(accesstoken_path): - accesstoken_file = open(accesstoken_path, 'r') + accesstoken_file = open(accesstoken_path, 'r', encoding='utf8') single_accesstoken = accesstoken_file.read().strip() accesstoken_file.close() @@ -22,10 +22,10 @@ def extract_image_url(image_html): image_url_fragment = re.findall(r'url\("?(.*?)"?\)', image_html.attrs['style'])[0] return 'https:'+image_url_fragment def sanitize_category(raw_category): - return raw_category.replace(' ', '').replace(':', '').lower() + return raw_category.replace(' ', '').replace(':', '').replace('.', '').lower() def get_blog(): url = 'https://news.blizzard.com/en-us/' - html = requests.get(url).text + html = requests.get(url, timeout=60).text soup = BeautifulSoup(html, 'html.parser') base_url = 'https://news.blizzard.com'