import requests from bs4 import BeautifulSoup from time import time from http.server import BaseHTTPRequestHandler, HTTPServer def scrape(): output = '' overview_url = 'https://gruene-hohenlohe.de/kalender' overview_html = requests.get(overview_url, timeout=60).text overview_soup = BeautifulSoup(overview_html, 'html.parser') for href in overview_soup.select('.media-body h2 a'): event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href'] event_html = requests.get(event_url, timeout=60).text event_soup = BeautifulSoup(event_html, 'html.parser') data = event_soup.select('.calendarize dl dd') # date output += data[0].text.strip() output += ' ' # time timespan = data[1].text.strip() time = timespan.split(' ')[0] output += time if time != 'Ganztags': output += ' Uhr' output += ', ' # place if len(data) > 2: output += data[2].text.strip() output += ', ' # title output += '' output += event_soup.select('.calendarize h1')[0].text.strip() output += '' try: description = event_soup.select('.calendarize .text p')[0].text.strip() output += ' ' output += description except IndexError: pass output += '
' return output class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): def do_GET(self): current_time = time() if cache['time'] < current_time - 30: html = templateHtml.replace('--body--', scrape()) cache['output'] = html.encode('utf-8') cache['time'] = current_time self.send_response(200) self.end_headers() self.wfile.write(cache['output']) with open('template.html', 'r') as templateFile: templateHtml = templateFile.read() cache = { "time": 0, "output": None } httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler) httpd.serve_forever()