diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ad514b3
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+beautifulsoup4==4.9.0
+certifi==2020.4.5.1
+chardet==3.0.4
+idna==2.9
+requests==2.23.0
+soupsieve==2.0
+urllib3==1.25.9
diff --git a/scrape.py b/scrape.py
new file mode 100644
index 0000000..277e7cd
--- /dev/null
+++ b/scrape.py
@@ -0,0 +1,66 @@
+import requests
+import re
+from bs4 import BeautifulSoup
+
+def get_blog():
+ url = 'https://news.blizzard.com/en-us/'
+ html = requests.get(url).text
+ soup = BeautifulSoup(html, 'html.parser')
+
+ base_url = 'https://news.blizzard.com'
+
+ blog = []
+
+ feature_list_html = soup.find_all(class_='FeaturedArticle-link')
+ for feature_html in feature_list_html:
+ image_html = feature_html.find(class_='Card-image')
+ image_url_fragment = re.findall('url\("(.*?)"\)', image_html.attrs['style'])[0]
+ image_url = 'https:'+image_url_fragment
+
+ text_list = feature_html.find_all(class_='text-truncate-ellipsis')
+
+ blog.append({
+ 'image': image_url,
+ 'game': text_list[0].contents[0],
+ 'title': text_list[1].contents[0],
+ 'description': '',
+ 'url': base_url+feature_html.attrs['href'],
+ })
+
+ article_list_html = soup.find_all(class_='ArticleListItem')
+ for article_html in article_list_html:
+ image_html = article_html.find(class_='ArticleListItem-image')
+ image_url_fragment = re.findall('url\((.*?)\)', image_html.attrs['style'])[0]
+ image_url = 'https:'+image_url_fragment
+
+ content_html = article_html.find(class_='ArticleListItem-contentGrid')
+
+ blog.append({
+ 'image': image_url,
+ 'game': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0],
+ 'title': content_html.find(class_='ArticleListItem-title').contents[0],
+ 'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0],
+ 'url': base_url+article_html.find(class_='ArticleLink').attrs['href'],
+ })
+
+ return blog
+def get_body(post):
+ return (
+ post['title']+
+ "\n"+
+ post['description']+
+ "\n"+
+ post['url']
+ )
+def get_formatted_body(post):
+ return (
+ ''+
+ ''+post['title']+'
'+
+ ''+
+ '
'+post['description']+'
' + ) + +blog = get_blog() +for post in blog: + print(get_body(post)) + print(get_formatted_body(post))