diff --git a/scrape.py b/scrape.py index 3f129a8..4bc1d7d 100644 --- a/scrape.py +++ b/scrape.py @@ -2,12 +2,13 @@ import requests from bs4 import BeautifulSoup from http.server import BaseHTTPRequestHandler, HTTPServer -overview_url = 'https://gruene-hohenlohe.de/kalender' -overview_html = requests.get(overview_url, timeout=60).text -overview_soup = BeautifulSoup(overview_html, 'html.parser') - - def scrape(): + output = '' + + overview_url = 'https://gruene-hohenlohe.de/kalender' + overview_html = requests.get(overview_url, timeout=60).text + overview_soup = BeautifulSoup(overview_html, 'html.parser') + for href in overview_soup.select('.media-body h2 a'): event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href'] event_html = requests.get(event_url, timeout=60).text @@ -15,8 +16,6 @@ def scrape(): data = event_soup.select('.calendarize dl dd') - output = '' - # date output += data[0].text.strip() output += ' ' @@ -30,7 +29,7 @@ def scrape(): output += ', ' # place - #output += data[2].text.strip() + output += data[2].text.strip() output += ', ' # title @@ -45,7 +44,9 @@ def scrape(): except IndexError: pass - return output + output += '
' + + return output @@ -54,7 +55,13 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): def do_GET(self): self.send_response(200) self.end_headers() - self.wfile.write(scrape().encode('utf-8')) + html = templateHtml.replace('--body--', scrape()) + self.wfile.write(html.encode('utf-8')) + + + +with open('template.html', 'r') as templateFile: + templateHtml = templateFile.read() httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler) httpd.serve_forever() diff --git a/template.html b/template.html new file mode 100644 index 0000000..c506013 --- /dev/null +++ b/template.html @@ -0,0 +1,5 @@ + + + + +--body-- \ No newline at end of file