You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

182 lines
5.3 KiB
Python

import requests
from bs4 import BeautifulSoup
from time import time
from datetime import datetime
from http.server import BaseHTTPRequestHandler, HTTPServer
def scrape():
output = {
'print': '',
'digital': '',
'markdown': ''
}
overview_url = 'https://gruene-hohenlohe.de/kalender'
overview_html = requests.get(overview_url, timeout=60).text
overview_soup = BeautifulSoup(overview_html, 'html.parser')
month = None
for href in overview_soup.select('.media-body h2 a'):
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
event_html = requests.get(event_url, timeout=60).text
event_soup = BeautifulSoup(event_html, 'html.parser')
data = event_soup.select('.calendarize dl dd')
# date
date = data[0].text.strip()
output['print'] += date
output['print'] += ' '
# day of week and month
date_splitted = date.split('.')
year = int(date_splitted[2])
previous_month = month
if (month := int(date_splitted[1].lstrip('0'))) != previous_month:
output['digital'] += '<br>'
output['markdown'] += '<br>'
match month:
case 1:
month_formatted = 'Januar'
case 2:
month_formatted = 'Februar'
case 3:
month_formatted = 'März'
case 4:
month_formatted = 'April'
case 5:
month_formatted = 'Mai'
case 6:
month_formatted = 'Juni'
case 7:
month_formatted = 'Juli'
case 8:
month_formatted = 'August'
case 9:
month_formatted = 'September'
case 10:
month_formatted = 'Oktober'
case 11:
month_formatted = 'November'
case 12:
month_formatted = 'Dezember'
output['digital'] += '<b>' + month_formatted + ' ' + str(year) + '</b><br>'
output['markdown'] += '<b>*' + month_formatted + ' ' + str(year) + '*</b><br>'
day = int(date_splitted[0].lstrip('0'))
match datetime(year, month, day).weekday():
case 0:
weekday_formatted = 'Montag'
case 1:
weekday_formatted = 'Dienstag'
case 2:
weekday_formatted = 'Mittwoch'
case 3:
weekday_formatted = 'Donnerstag'
case 4:
weekday_formatted = 'Freitag'
case 5:
weekday_formatted = 'Samstag'
case 6:
weekday_formatted = 'Sonntag'
output['digital'] += weekday_formatted
output['digital'] += ', '
output['digital'] += date
output['digital'] += ' '
output['markdown'] += '* '
output['markdown'] += weekday_formatted
output['markdown'] += ', '
output['markdown'] += date
output['markdown'] += ', '
# time
timespan = data[1].text.strip()
time = timespan.split(' ')[0]
time_formatted = time
if time != 'Ganztags':
time_formatted += ' Uhr'
output['print'] += time_formatted
output['print'] += ', '
output['digital'] += time_formatted
output['digital'] += ', '
output['markdown'] += time_formatted
# place
if len(data) > 2:
place = data[2].text.strip()
output['print'] += place
output['print'] += ', '
output['digital'] += place
output['digital'] += ', '
output['markdown'] += ', '
output['markdown'] += place
output['markdown'] += ': '
# title
title = event_soup.select('.calendarize h1')[0].text.strip()
output['print'] += '<b>'
output['print'] += title
output['print'] += '</b>'
output['digital'] += '<b>'
output['digital'] += title
output['digital'] += '</b>'
output['markdown'] += title
# description
try:
description = event_soup.select('.calendarize .text p')[0].text.strip()
output['print'] += ' '
output['print'] += description
output['digital'] += ' '
output['digital'] += description
output['markdown'] += ' '
output['markdown'] += description
except IndexError:
pass
output['print'] += '<br>'
output['digital'] += '<br>'
output['markdown'] += '<br>'
return output['print'] + '<br><hr>' + output['digital'] + '<br><hr>' + output['markdown']
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
current_time = time()
if cache['time'] < current_time - 30:
html = templateHtml.replace('--body--', scrape())
cache['output'] = html.encode('utf-8')
cache['time'] = current_time
self.send_response(200)
self.end_headers()
self.wfile.write(cache['output'])
with open('template.html', 'r') as templateFile:
templateHtml = templateFile.read()
cache = {
'time': 0,
'output': None
}
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
httpd.serve_forever()