import requests from bs4 import BeautifulSoup from http.server import BaseHTTPRequestHandler, HTTPServer def scrape(): output = '' overview_url = 'https://gruene-hohenlohe.de/kalender' overview_html = requests.get(overview_url, timeout=60).text overview_soup = BeautifulSoup(overview_html, 'html.parser') for href in overview_soup.select('.media-body h2 a'): event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href'] event_html = requests.get(event_url, timeout=60).text event_soup = BeautifulSoup(event_html, 'html.parser') data = event_soup.select('.calendarize dl dd') # date output += data[0].text.strip() output += ' ' # time timespan = data[1].text.strip() time = timespan.split(' ')[0] output += time output += ' Uhr' output += ', ' # place output += data[2].text.strip() output += ', ' # title output += '' output += event_soup.select('.calendarize h1')[0].text.strip() output += '' try: description = event_soup.select('.calendarize .text p')[0].text.strip() output += ' ' output += description except IndexError: pass output += '
' return output class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): def do_GET(self): self.send_response(200) self.end_headers() html = templateHtml.replace('--body--', scrape()) self.wfile.write(html.encode('utf-8')) with open('template.html', 'r') as templateFile: templateHtml = templateFile.read() httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler) httpd.serve_forever()