|
|
@ -1,15 +1,20 @@
|
|
|
|
import requests
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from time import time
|
|
|
|
from time import time
|
|
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
|
|
|
|
|
|
|
|
def scrape():
|
|
|
|
def scrape():
|
|
|
|
output = ''
|
|
|
|
output = {
|
|
|
|
|
|
|
|
'print': '',
|
|
|
|
|
|
|
|
'digital': ''
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
overview_url = 'https://gruene-hohenlohe.de/kalender'
|
|
|
|
overview_url = 'https://gruene-hohenlohe.de/kalender'
|
|
|
|
overview_html = requests.get(overview_url, timeout=60).text
|
|
|
|
overview_html = requests.get(overview_url, timeout=60).text
|
|
|
|
overview_soup = BeautifulSoup(overview_html, 'html.parser')
|
|
|
|
overview_soup = BeautifulSoup(overview_html, 'html.parser')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
month = None
|
|
|
|
for href in overview_soup.select('.media-body h2 a'):
|
|
|
|
for href in overview_soup.select('.media-body h2 a'):
|
|
|
|
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
|
|
|
|
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
|
|
|
|
event_html = requests.get(event_url, timeout=60).text
|
|
|
|
event_html = requests.get(event_url, timeout=60).text
|
|
|
@ -18,38 +23,95 @@ def scrape():
|
|
|
|
data = event_soup.select('.calendarize dl dd')
|
|
|
|
data = event_soup.select('.calendarize dl dd')
|
|
|
|
|
|
|
|
|
|
|
|
# date
|
|
|
|
# date
|
|
|
|
output += data[0].text.strip()
|
|
|
|
date = data[0].text.strip()
|
|
|
|
output += ' '
|
|
|
|
|
|
|
|
|
|
|
|
output['print'] += date
|
|
|
|
|
|
|
|
output['print'] += ' '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# day of week
|
|
|
|
|
|
|
|
date_splitted = date.split('.')
|
|
|
|
|
|
|
|
year = int(date_splitted[2])
|
|
|
|
|
|
|
|
previous_month = month
|
|
|
|
|
|
|
|
if (month := int(date_splitted[1].lstrip('0'))) != previous_month:
|
|
|
|
|
|
|
|
output['digital'] += '<br>'
|
|
|
|
|
|
|
|
match month:
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
|
|
output['digital'] += 'Februar'
|
|
|
|
|
|
|
|
case 12:
|
|
|
|
|
|
|
|
output['digital'] += 'Dezember'
|
|
|
|
|
|
|
|
output['digital'] += '<br>'
|
|
|
|
|
|
|
|
day = int(date_splitted[0].lstrip('0'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
match datetime(year, month, day).weekday():
|
|
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
|
|
output['digital'] += 'Montag'
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
output['digital'] += 'Dienstag'
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
|
|
output['digital'] += 'Mittwoch'
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
|
|
|
|
output['digital'] += 'Donnerstag'
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
|
|
|
|
output['digital'] += 'Freitag'
|
|
|
|
|
|
|
|
case 5:
|
|
|
|
|
|
|
|
output['digital'] += 'Samstag'
|
|
|
|
|
|
|
|
case 6:
|
|
|
|
|
|
|
|
output['digital'] += 'Sonntag'
|
|
|
|
|
|
|
|
output['digital'] += ', '
|
|
|
|
|
|
|
|
output['digital'] += date
|
|
|
|
|
|
|
|
output['digital'] += ' '
|
|
|
|
|
|
|
|
|
|
|
|
# time
|
|
|
|
# time
|
|
|
|
timespan = data[1].text.strip()
|
|
|
|
timespan = data[1].text.strip()
|
|
|
|
time = timespan.split(' ')[0]
|
|
|
|
time = timespan.split(' ')[0]
|
|
|
|
|
|
|
|
time_formatted = time
|
|
|
|
output += time
|
|
|
|
|
|
|
|
if time != 'Ganztags':
|
|
|
|
if time != 'Ganztags':
|
|
|
|
output += ' Uhr'
|
|
|
|
time_formatted += ' Uhr'
|
|
|
|
output += ', '
|
|
|
|
|
|
|
|
|
|
|
|
output['print'] += time_formatted
|
|
|
|
|
|
|
|
output['print'] += ', '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output['digital'] += time_formatted
|
|
|
|
|
|
|
|
output['digital'] += ', '
|
|
|
|
|
|
|
|
|
|
|
|
# place
|
|
|
|
# place
|
|
|
|
if len(data) > 2:
|
|
|
|
if len(data) > 2:
|
|
|
|
output += data[2].text.strip()
|
|
|
|
place = data[2].text.strip()
|
|
|
|
output += ', '
|
|
|
|
|
|
|
|
|
|
|
|
output['print'] += place
|
|
|
|
|
|
|
|
output['print'] += ', '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output['digital'] += place
|
|
|
|
|
|
|
|
output['digital'] += ', '
|
|
|
|
|
|
|
|
|
|
|
|
# title
|
|
|
|
# title
|
|
|
|
output += '<b>'
|
|
|
|
title = event_soup.select('.calendarize h1')[0].text.strip()
|
|
|
|
output += event_soup.select('.calendarize h1')[0].text.strip()
|
|
|
|
|
|
|
|
output += '</b>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output['print'] += '<b>'
|
|
|
|
|
|
|
|
output['print'] += title
|
|
|
|
|
|
|
|
output['print'] += '</b>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output['digital'] += '<b>'
|
|
|
|
|
|
|
|
output['digital'] += title
|
|
|
|
|
|
|
|
output['digital'] += '</b>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# description
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
description = event_soup.select('.calendarize .text p')[0].text.strip()
|
|
|
|
description = event_soup.select('.calendarize .text p')[0].text.strip()
|
|
|
|
output += ' '
|
|
|
|
|
|
|
|
output += description
|
|
|
|
output['print'] += ' '
|
|
|
|
|
|
|
|
output['print'] += description
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output['digital'] += ' '
|
|
|
|
|
|
|
|
output['digital'] += description
|
|
|
|
except IndexError:
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
output += '<br>'
|
|
|
|
output['print'] += '<br>'
|
|
|
|
|
|
|
|
|
|
|
|
return output
|
|
|
|
output['digital'] += '<br>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return output['print'] + "<hr>" + output['digital']
|
|
|
|
|
|
|
|
|
|
|
|
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
|
|
|
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
|
|
|
def do_GET(self):
|
|
|
|
def do_GET(self):
|
|
|
@ -69,8 +131,8 @@ with open('template.html', 'r') as templateFile:
|
|
|
|
templateHtml = templateFile.read()
|
|
|
|
templateHtml = templateFile.read()
|
|
|
|
|
|
|
|
|
|
|
|
cache = {
|
|
|
|
cache = {
|
|
|
|
"time": 0,
|
|
|
|
'time': 0,
|
|
|
|
"output": None
|
|
|
|
'output': None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
|
|
|
|
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
|
|
|
|