You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

182 lines
5.3 KiB
Python

7 months ago
import requests
from bs4 import BeautifulSoup
4 weeks ago
from time import time
from datetime import datetime
from http.server import BaseHTTPRequestHandler, HTTPServer
7 months ago
def scrape():
output = {
'print': '',
'digital': '',
'markdown': ''
}
7 months ago
overview_url = 'https://gruene-hohenlohe.de/kalender'
overview_html = requests.get(overview_url, timeout=60).text
overview_soup = BeautifulSoup(overview_html, 'html.parser')
7 months ago
month = None
for href in overview_soup.select('.media-body h2 a'):
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
event_html = requests.get(event_url, timeout=60).text
event_soup = BeautifulSoup(event_html, 'html.parser')
7 months ago
data = event_soup.select('.calendarize dl dd')
7 months ago
# date
date = data[0].text.strip()
output['print'] += date
output['print'] += ' '
# day of week and month
date_splitted = date.split('.')
year = int(date_splitted[2])
previous_month = month
if (month := int(date_splitted[1].lstrip('0'))) != previous_month:
output['digital'] += '<br>'
output['markdown'] += '<br>'
match month:
case 1:
month_formatted = 'Januar'
case 2:
month_formatted = 'Februar'
case 3:
month_formatted = 'März'
case 4:
month_formatted = 'April'
case 5:
month_formatted = 'Mai'
case 6:
month_formatted = 'Juni'
case 7:
month_formatted = 'Juli'
case 8:
month_formatted = 'August'
case 9:
month_formatted = 'September'
case 10:
month_formatted = 'Oktober'
case 11:
month_formatted = 'November'
case 12:
month_formatted = 'Dezember'
output['digital'] += '<b>' + month_formatted + ' ' + str(year) + '</b><br>'
output['markdown'] += '<b>*' + month_formatted + ' ' + str(year) + '*</b><br>'
day = int(date_splitted[0].lstrip('0'))
match datetime(year, month, day).weekday():
case 0:
weekday_formatted = 'Montag'
case 1:
weekday_formatted = 'Dienstag'
case 2:
weekday_formatted = 'Mittwoch'
case 3:
weekday_formatted = 'Donnerstag'
case 4:
weekday_formatted = 'Freitag'
case 5:
weekday_formatted = 'Samstag'
case 6:
weekday_formatted = 'Sonntag'
output['digital'] += weekday_formatted
output['digital'] += ', '
output['digital'] += date
output['digital'] += ' '
output['markdown'] += '* '
output['markdown'] += weekday_formatted
output['markdown'] += ', '
output['markdown'] += date
output['markdown'] += ', '
7 months ago
# time
timespan = data[1].text.strip()
time = timespan.split(' ')[0]
time_formatted = time
4 weeks ago
if time != 'Ganztags':
time_formatted += ' Uhr'
output['print'] += time_formatted
output['print'] += ', '
output['digital'] += time_formatted
output['digital'] += ', '
7 months ago
output['markdown'] += time_formatted
# place
4 weeks ago
if len(data) > 2:
place = data[2].text.strip()
output['print'] += place
output['print'] += ', '
output['digital'] += place
output['digital'] += ', '
7 months ago
output['markdown'] += ', '
output['markdown'] += place
output['markdown'] += ': '
# title
title = event_soup.select('.calendarize h1')[0].text.strip()
7 months ago
output['print'] += '<b>'
output['print'] += title
output['print'] += '</b>'
output['digital'] += '<b>'
output['digital'] += title
output['digital'] += '</b>'
output['markdown'] += title
# description
try:
description = event_soup.select('.calendarize .text p')[0].text.strip()
output['print'] += ' '
output['print'] += description
output['digital'] += ' '
output['digital'] += description
output['markdown'] += ' '
output['markdown'] += description
except IndexError:
pass
7 months ago
output['print'] += '<br>'
output['digital'] += '<br>'
output['markdown'] += '<br>'
return output['print'] + '<br><hr>' + output['digital'] + '<br><hr>' + output['markdown']
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
4 weeks ago
current_time = time()
if cache['time'] < current_time - 30:
html = templateHtml.replace('--body--', scrape())
cache['output'] = html.encode('utf-8')
cache['time'] = current_time
self.send_response(200)
self.end_headers()
4 weeks ago
self.wfile.write(cache['output'])
with open('template.html', 'r') as templateFile:
templateHtml = templateFile.read()
4 weeks ago
cache = {
'time': 0,
'output': None
4 weeks ago
}
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
httpd.serve_forever()