add different output formats

main
lub 3 weeks ago
parent e40c1cbd3f
commit b3d8044157

@ -1,6 +1,6 @@
FROM python:3-slim FROM python:3-slim
WORKDIR /data WORKDIR /src
COPY requirements.txt ./ COPY requirements.txt ./

@ -1,15 +1,20 @@
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import time from time import time
from datetime import datetime
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer
def scrape(): def scrape():
output = '' output = {
'print': '',
'digital': ''
}
overview_url = 'https://gruene-hohenlohe.de/kalender' overview_url = 'https://gruene-hohenlohe.de/kalender'
overview_html = requests.get(overview_url, timeout=60).text overview_html = requests.get(overview_url, timeout=60).text
overview_soup = BeautifulSoup(overview_html, 'html.parser') overview_soup = BeautifulSoup(overview_html, 'html.parser')
month = None
for href in overview_soup.select('.media-body h2 a'): for href in overview_soup.select('.media-body h2 a'):
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href'] event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
event_html = requests.get(event_url, timeout=60).text event_html = requests.get(event_url, timeout=60).text
@ -18,38 +23,95 @@ def scrape():
data = event_soup.select('.calendarize dl dd') data = event_soup.select('.calendarize dl dd')
# date # date
output += data[0].text.strip() date = data[0].text.strip()
output += ' '
output['print'] += date
output['print'] += ' '
# day of week
date_splitted = date.split('.')
year = int(date_splitted[2])
previous_month = month
if (month := int(date_splitted[1].lstrip('0'))) != previous_month:
output['digital'] += '<br>'
match month:
case 2:
output['digital'] += 'Februar'
case 12:
output['digital'] += 'Dezember'
output['digital'] += '<br>'
day = int(date_splitted[0].lstrip('0'))
match datetime(year, month, day).weekday():
case 0:
output['digital'] += 'Montag'
case 1:
output['digital'] += 'Dienstag'
case 2:
output['digital'] += 'Mittwoch'
case 3:
output['digital'] += 'Donnerstag'
case 4:
output['digital'] += 'Freitag'
case 5:
output['digital'] += 'Samstag'
case 6:
output['digital'] += 'Sonntag'
output['digital'] += ', '
output['digital'] += date
output['digital'] += ' '
# time # time
timespan = data[1].text.strip() timespan = data[1].text.strip()
time = timespan.split(' ')[0] time = timespan.split(' ')[0]
time_formatted = time
output += time
if time != 'Ganztags': if time != 'Ganztags':
output += ' Uhr' time_formatted += ' Uhr'
output += ', '
output['print'] += time_formatted
output['print'] += ', '
output['digital'] += time_formatted
output['digital'] += ', '
# place # place
if len(data) > 2: if len(data) > 2:
output += data[2].text.strip() place = data[2].text.strip()
output += ', '
output['print'] += place
output['print'] += ', '
output['digital'] += place
output['digital'] += ', '
# title # title
output += '<b>' title = event_soup.select('.calendarize h1')[0].text.strip()
output += event_soup.select('.calendarize h1')[0].text.strip()
output += '</b>'
output['print'] += '<b>'
output['print'] += title
output['print'] += '</b>'
output['digital'] += '<b>'
output['digital'] += title
output['digital'] += '</b>'
# description
try: try:
description = event_soup.select('.calendarize .text p')[0].text.strip() description = event_soup.select('.calendarize .text p')[0].text.strip()
output += ' '
output += description output['print'] += ' '
output['print'] += description
output['digital'] += ' '
output['digital'] += description
except IndexError: except IndexError:
pass pass
output += '<br>' output['print'] += '<br>'
return output output['digital'] += '<br>'
return output['print'] + "<hr>" + output['digital']
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
def do_GET(self): def do_GET(self):
@ -69,8 +131,8 @@ with open('template.html', 'r') as templateFile:
templateHtml = templateFile.read() templateHtml = templateFile.read()
cache = { cache = {
"time": 0, 'time': 0,
"output": None 'output': None
} }
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler) httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)

Loading…
Cancel
Save