You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			182 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			182 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Python
		
	
| import requests
 | |
| from bs4 import BeautifulSoup
 | |
| from time import time
 | |
| from datetime import datetime
 | |
| from http.server import BaseHTTPRequestHandler, HTTPServer
 | |
| 
 | |
| def scrape():
 | |
|     output = {
 | |
|         'print': '',
 | |
|         'digital': '',
 | |
|         'markdown': ''
 | |
|     }
 | |
| 
 | |
|     overview_url = 'https://gruene-hohenlohe.de/kalender'
 | |
|     overview_html = requests.get(overview_url, timeout=60).text
 | |
|     overview_soup = BeautifulSoup(overview_html, 'html.parser')
 | |
| 
 | |
|     month = None
 | |
|     for href in overview_soup.select('.media-body h2 a'):
 | |
|         event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
 | |
|         event_html = requests.get(event_url, timeout=60).text
 | |
|         event_soup = BeautifulSoup(event_html, 'html.parser')
 | |
| 
 | |
|         data = event_soup.select('.calendarize dl dd')
 | |
| 
 | |
|         # date
 | |
|         date = data[0].text.strip()
 | |
| 
 | |
|         output['print'] += date
 | |
|         output['print'] += ' '
 | |
| 
 | |
|         # day of week and month
 | |
|         date_splitted = date.split('.')
 | |
|         year = int(date_splitted[2])
 | |
|         previous_month = month
 | |
|         if (month := int(date_splitted[1].lstrip('0'))) != previous_month:
 | |
|             output['digital'] += '<br>'
 | |
|             output['markdown'] += '<br>'
 | |
|             match month:
 | |
|                 case 1:
 | |
|                     month_formatted = 'Januar'
 | |
|                 case 2:
 | |
|                     month_formatted = 'Februar'
 | |
|                 case 3:
 | |
|                     month_formatted = 'März'
 | |
|                 case 4:
 | |
|                     month_formatted = 'April'
 | |
|                 case 5:
 | |
|                     month_formatted = 'Mai'
 | |
|                 case 6:
 | |
|                     month_formatted = 'Juni'
 | |
|                 case 7:
 | |
|                     month_formatted = 'Juli'
 | |
|                 case 8:
 | |
|                     month_formatted = 'August'
 | |
|                 case 9:
 | |
|                     month_formatted = 'September'
 | |
|                 case 10:
 | |
|                     month_formatted = 'Oktober'
 | |
|                 case 11:
 | |
|                     month_formatted = 'November'
 | |
|                 case 12:
 | |
|                     month_formatted = 'Dezember'
 | |
|             output['digital'] += '<b>' + month_formatted + ' ' + str(year) + '</b><br>'
 | |
|             output['markdown'] += '<b>*' + month_formatted + ' ' + str(year) + '*</b><br>'
 | |
|         day = int(date_splitted[0].lstrip('0'))
 | |
| 
 | |
|         match datetime(year, month, day).weekday():
 | |
|             case 0:
 | |
|                 weekday_formatted = 'Montag'
 | |
|             case 1:
 | |
|                 weekday_formatted = 'Dienstag'
 | |
|             case 2:
 | |
|                 weekday_formatted = 'Mittwoch'
 | |
|             case 3:
 | |
|                 weekday_formatted = 'Donnerstag'
 | |
|             case 4:
 | |
|                 weekday_formatted = 'Freitag'
 | |
|             case 5:
 | |
|                 weekday_formatted = 'Samstag'
 | |
|             case 6:
 | |
|                 weekday_formatted = 'Sonntag'
 | |
|         output['digital'] += weekday_formatted
 | |
|         output['digital'] += ', '
 | |
|         output['digital'] += date
 | |
|         output['digital'] += ' '
 | |
|         output['markdown'] += '* '
 | |
|         output['markdown'] += weekday_formatted
 | |
|         output['markdown'] += ', '
 | |
|         output['markdown'] += date
 | |
|         output['markdown'] += ', '
 | |
| 
 | |
|         # time
 | |
|         timespan = data[1].text.strip()
 | |
|         time = timespan.split(' ')[0]
 | |
|         time_formatted = time
 | |
|         if time != 'Ganztags':
 | |
|             time_formatted += ' Uhr'
 | |
| 
 | |
|         output['print'] += time_formatted
 | |
|         output['print'] += ', '
 | |
| 
 | |
|         output['digital'] += time_formatted
 | |
|         output['digital'] += ', '
 | |
| 
 | |
|         output['markdown'] += time_formatted
 | |
| 
 | |
|         # place
 | |
|         if len(data) > 2:
 | |
|             place = data[2].text.strip()
 | |
| 
 | |
|             output['print'] += place
 | |
|             output['print'] += ', '
 | |
| 
 | |
|             output['digital'] += place
 | |
|             output['digital'] += ', '
 | |
| 
 | |
|             output['markdown'] += ', '
 | |
|             output['markdown'] += place
 | |
|         output['markdown'] += ': '
 | |
| 
 | |
|         # title
 | |
|         title = event_soup.select('.calendarize h1')[0].text.strip()
 | |
| 
 | |
|         output['print'] += '<b>'
 | |
|         output['print'] += title
 | |
|         output['print'] += '</b>'
 | |
| 
 | |
|         output['digital'] += '<b>'
 | |
|         output['digital'] += title
 | |
|         output['digital'] += '</b>'
 | |
| 
 | |
|         output['markdown'] += title
 | |
| 
 | |
|         # description
 | |
|         try:
 | |
|             description = event_soup.select('.calendarize .text p')[0].text.strip()
 | |
| 
 | |
|             output['print'] += ' '
 | |
|             output['print'] += description
 | |
| 
 | |
|             output['digital'] += ' '
 | |
|             output['digital'] += description
 | |
| 
 | |
|             output['markdown'] += ' '
 | |
|             output['markdown'] += description
 | |
|         except IndexError:
 | |
|             pass
 | |
| 
 | |
|         output['print'] += '<br>'
 | |
| 
 | |
|         output['digital'] += '<br>'
 | |
| 
 | |
|         output['markdown'] += '<br>'
 | |
| 
 | |
|     return output['print'] + '<br><hr>' + output['digital'] + '<br><hr>' + output['markdown']
 | |
| 
 | |
| class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
 | |
|     def do_GET(self):
 | |
|         current_time = time()
 | |
|         if cache['time'] < current_time - 30:
 | |
|             html = templateHtml.replace('--body--', scrape())
 | |
|             cache['output'] = html.encode('utf-8')
 | |
|             cache['time'] = current_time
 | |
| 
 | |
|         self.send_response(200)
 | |
|         self.end_headers()
 | |
|         self.wfile.write(cache['output'])
 | |
| 
 | |
| 
 | |
| 
 | |
| with open('template.html', 'r') as templateFile:
 | |
|     templateHtml = templateFile.read()
 | |
| 
 | |
| cache = {
 | |
|     'time': 0,
 | |
|     'output': None
 | |
| }
 | |
| 
 | |
| httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
 | |
| httpd.serve_forever()
 |