You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
1.7 KiB
Python

7 months ago
import requests
from bs4 import BeautifulSoup
from http.server import BaseHTTPRequestHandler, HTTPServer
7 months ago
def scrape():
output = ''
7 months ago
overview_url = 'https://gruene-hohenlohe.de/kalender'
overview_html = requests.get(overview_url, timeout=60).text
overview_soup = BeautifulSoup(overview_html, 'html.parser')
7 months ago
for href in overview_soup.select('.media-body h2 a'):
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
event_html = requests.get(event_url, timeout=60).text
event_soup = BeautifulSoup(event_html, 'html.parser')
7 months ago
data = event_soup.select('.calendarize dl dd')
7 months ago
# date
output += data[0].text.strip()
output += ' '
7 months ago
# time
timespan = data[1].text.strip()
time = timespan.split(' ')[0]
7 months ago
output += time
output += ' Uhr'
output += ', '
7 months ago
# place
output += data[2].text.strip()
output += ', '
7 months ago
# title
output += '<b>'
output += event_soup.select('.calendarize h1')[0].text.strip()
output += '</b>'
7 months ago
try:
description = event_soup.select('.calendarize .text p')[0].text.strip()
output += ' '
output += description
except IndexError:
pass
7 months ago
output += '<br>'
return output
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
html = templateHtml.replace('--body--', scrape())
self.wfile.write(html.encode('utf-8'))
with open('template.html', 'r') as templateFile:
templateHtml = templateFile.read()
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
httpd.serve_forever()