From 39dedbdc8e06ec5642ac108e2db2f0fa611d370c Mon Sep 17 00:00:00 2001 From: lub Date: Sun, 3 May 2020 20:04:17 +0200 Subject: [PATCH] filter out double colon because of call of duty: modern warfare ^ --- scrape.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrape.py b/scrape.py index 3fec691..3225acd 100644 --- a/scrape.py +++ b/scrape.py @@ -40,7 +40,7 @@ def get_blog(): blog.append({ 'image': image_url, - 'game': text_list[0].contents[0].replace(' ', '').lower(), + 'game': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(), 'title': text_list[1].contents[0], 'description': '', 'url': base_url+feature_html.attrs['href'], @@ -56,7 +56,7 @@ def get_blog(): blog.append({ 'image': image_url, - 'game': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').lower(), + 'game': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(), 'title': content_html.find(class_='ArticleListItem-title').contents[0], 'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0], 'url': base_url+article_html.find(class_='ArticleLink').attrs['href'],