From acfdcdca59020bc832f3265b5fa73877e97f5b94 Mon Sep 17 00:00:00 2001 From: lub Date: Tue, 3 Dec 2024 18:38:18 +0100 Subject: [PATCH] add markdown format --- scrape.py | 68 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 13 deletions(-) diff --git a/scrape.py b/scrape.py index fdcaf34..967e499 100644 --- a/scrape.py +++ b/scrape.py @@ -7,7 +7,8 @@ from http.server import BaseHTTPRequestHandler, HTTPServer def scrape(): output = { 'print': '', - 'digital': '' + 'digital': '', + 'markdown': '' } overview_url = 'https://gruene-hohenlohe.de/kalender' @@ -28,38 +29,66 @@ def scrape(): output['print'] += date output['print'] += ' ' - # day of week + # day of week and month date_splitted = date.split('.') year = int(date_splitted[2]) previous_month = month if (month := int(date_splitted[1].lstrip('0'))) != previous_month: output['digital'] += '
' + output['markdown'] += '
' match month: + case 1: + month_formatted = 'Januar' case 2: - output['digital'] += 'Februar' + month_formatted = 'Februar' + case 3: + month_formatted = 'März' + case 4: + month_formatted = 'April' + case 5: + month_formatted = 'Mai' + case 6: + month_formatted = 'Juni' + case 7: + month_formatted = 'Juli' + case 8: + month_formatted = 'August' + case 9: + month_formatted = 'September' + case 10: + month_formatted = 'Oktober' + case 11: + month_formatted = 'November' case 12: - output['digital'] += 'Dezember' - output['digital'] += '
' + month_formatted = 'Dezember' + output['digital'] += '' + month_formatted + ' ' + str(year) + '
' + output['markdown'] += '*' + month_formatted + ' ' + str(year) + '*
' day = int(date_splitted[0].lstrip('0')) match datetime(year, month, day).weekday(): case 0: - output['digital'] += 'Montag' + weekday_formatted = 'Montag' case 1: - output['digital'] += 'Dienstag' + weekday_formatted = 'Dienstag' case 2: - output['digital'] += 'Mittwoch' + weekday_formatted = 'Mittwoch' case 3: - output['digital'] += 'Donnerstag' + weekday_formatted = 'Donnerstag' case 4: - output['digital'] += 'Freitag' + weekday_formatted = 'Freitag' case 5: - output['digital'] += 'Samstag' + weekday_formatted = 'Samstag' case 6: - output['digital'] += 'Sonntag' + weekday_formatted = 'Sonntag' + output['digital'] += weekday_formatted output['digital'] += ', ' output['digital'] += date output['digital'] += ' ' + output['markdown'] += '* ' + output['markdown'] += weekday_formatted + output['markdown'] += ', ' + output['markdown'] += date + output['markdown'] += ', ' # time timespan = data[1].text.strip() @@ -74,6 +103,8 @@ def scrape(): output['digital'] += time_formatted output['digital'] += ', ' + output['markdown'] += time_formatted + # place if len(data) > 2: place = data[2].text.strip() @@ -84,6 +115,10 @@ def scrape(): output['digital'] += place output['digital'] += ', ' + output['markdown'] += ', ' + output['markdown'] += place + output['markdown'] += ': ' + # title title = event_soup.select('.calendarize h1')[0].text.strip() @@ -95,6 +130,8 @@ def scrape(): output['digital'] += title output['digital'] += '' + output['markdown'] += title + # description try: description = event_soup.select('.calendarize .text p')[0].text.strip() @@ -104,6 +141,9 @@ def scrape(): output['digital'] += ' ' output['digital'] += description + + output['markdown'] += ' ' + output['markdown'] += description except IndexError: pass @@ -111,7 +151,9 @@ def scrape(): output['digital'] += '
' - return output['print'] + "
" + output['digital'] + output['markdown'] += '
' + + return output['print'] + '

' + output['digital'] + '

' + output['markdown'] class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): def do_GET(self):