|
|
|
@ -1,12 +1,14 @@
|
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
|
|
|
|
|
|
overview_url = 'https://gruene-hohenlohe.de/kalender'
|
|
|
|
|
overview_html = requests.get(overview_url, timeout=60).text
|
|
|
|
|
overview_soup = BeautifulSoup(overview_html, 'html.parser')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for href in overview_soup.select('.media-body h2 a'):
|
|
|
|
|
def scrape():
|
|
|
|
|
for href in overview_soup.select('.media-body h2 a'):
|
|
|
|
|
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
|
|
|
|
|
event_html = requests.get(event_url, timeout=60).text
|
|
|
|
|
event_soup = BeautifulSoup(event_html, 'html.parser')
|
|
|
|
@ -28,19 +30,31 @@ for href in overview_soup.select('.media-body h2 a'):
|
|
|
|
|
output += ', '
|
|
|
|
|
|
|
|
|
|
# place
|
|
|
|
|
output += data[2].text.strip()
|
|
|
|
|
#output += data[2].text.strip()
|
|
|
|
|
output += ', '
|
|
|
|
|
|
|
|
|
|
# title
|
|
|
|
|
#output += '<b>'
|
|
|
|
|
#output += event_soup.select('.calendarize h1')[0].text.strip()
|
|
|
|
|
#output += '</b>'
|
|
|
|
|
output += '<b>'
|
|
|
|
|
output += event_soup.select('.calendarize h1')[0].text.strip()
|
|
|
|
|
output += '</b>'
|
|
|
|
|
|
|
|
|
|
#try:
|
|
|
|
|
try:
|
|
|
|
|
description = event_soup.select('.calendarize .text p')[0].text.strip()
|
|
|
|
|
#output += ' '
|
|
|
|
|
output += ' '
|
|
|
|
|
output += description
|
|
|
|
|
#except IndexError:
|
|
|
|
|
# pass
|
|
|
|
|
except IndexError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
print(output)
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
|
|
|
|
def do_GET(self):
|
|
|
|
|
self.send_response(200)
|
|
|
|
|
self.end_headers()
|
|
|
|
|
self.wfile.write(scrape().encode('utf-8'))
|
|
|
|
|
|
|
|
|
|
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
|
|
|
|
|
httpd.serve_forever()
|
|
|
|
|