import requests from bs4 import BeautifulSoup from time import time from datetime import datetime from http.server import BaseHTTPRequestHandler, HTTPServer def scrape(): output = { 'print': '', 'digital': '' } overview_url = 'https://gruene-hohenlohe.de/kalender' overview_html = requests.get(overview_url, timeout=60).text overview_soup = BeautifulSoup(overview_html, 'html.parser') month = None for href in overview_soup.select('.media-body h2 a'): event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href'] event_html = requests.get(event_url, timeout=60).text event_soup = BeautifulSoup(event_html, 'html.parser') data = event_soup.select('.calendarize dl dd') # date date = data[0].text.strip() output['print'] += date output['print'] += ' ' # day of week date_splitted = date.split('.') year = int(date_splitted[2]) previous_month = month if (month := int(date_splitted[1].lstrip('0'))) != previous_month: output['digital'] += '
' match month: case 2: output['digital'] += 'Februar' case 12: output['digital'] += 'Dezember' output['digital'] += '
' day = int(date_splitted[0].lstrip('0')) match datetime(year, month, day).weekday(): case 0: output['digital'] += 'Montag' case 1: output['digital'] += 'Dienstag' case 2: output['digital'] += 'Mittwoch' case 3: output['digital'] += 'Donnerstag' case 4: output['digital'] += 'Freitag' case 5: output['digital'] += 'Samstag' case 6: output['digital'] += 'Sonntag' output['digital'] += ', ' output['digital'] += date output['digital'] += ' ' # time timespan = data[1].text.strip() time = timespan.split(' ')[0] time_formatted = time if time != 'Ganztags': time_formatted += ' Uhr' output['print'] += time_formatted output['print'] += ', ' output['digital'] += time_formatted output['digital'] += ', ' # place if len(data) > 2: place = data[2].text.strip() output['print'] += place output['print'] += ', ' output['digital'] += place output['digital'] += ', ' # title title = event_soup.select('.calendarize h1')[0].text.strip() output['print'] += '' output['print'] += title output['print'] += '' output['digital'] += '' output['digital'] += title output['digital'] += '' # description try: description = event_soup.select('.calendarize .text p')[0].text.strip() output['print'] += ' ' output['print'] += description output['digital'] += ' ' output['digital'] += description except IndexError: pass output['print'] += '
' output['digital'] += '
' return output['print'] + "
" + output['digital'] class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): def do_GET(self): current_time = time() if cache['time'] < current_time - 30: html = templateHtml.replace('--body--', scrape()) cache['output'] = html.encode('utf-8') cache['time'] = current_time self.send_response(200) self.end_headers() self.wfile.write(cache['output']) with open('template.html', 'r') as templateFile: templateHtml = templateFile.read() cache = { 'time': 0, 'output': None } httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler) httpd.serve_forever()