add some rudimentary webserver
parent
e678081a77
commit
0aa90f86d6
@ -1,46 +1,60 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
|
||||
overview_url = 'https://gruene-hohenlohe.de/kalender'
|
||||
overview_html = requests.get(overview_url, timeout=60).text
|
||||
overview_soup = BeautifulSoup(overview_html, 'html.parser')
|
||||
|
||||
|
||||
for href in overview_soup.select('.media-body h2 a'):
|
||||
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
|
||||
event_html = requests.get(event_url, timeout=60).text
|
||||
event_soup = BeautifulSoup(event_html, 'html.parser')
|
||||
def scrape():
|
||||
for href in overview_soup.select('.media-body h2 a'):
|
||||
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
|
||||
event_html = requests.get(event_url, timeout=60).text
|
||||
event_soup = BeautifulSoup(event_html, 'html.parser')
|
||||
|
||||
data = event_soup.select('.calendarize dl dd')
|
||||
data = event_soup.select('.calendarize dl dd')
|
||||
|
||||
output = ''
|
||||
output = ''
|
||||
|
||||
# date
|
||||
output += data[0].text.strip()
|
||||
output += ' '
|
||||
# date
|
||||
output += data[0].text.strip()
|
||||
output += ' '
|
||||
|
||||
# time
|
||||
timespan = data[1].text.strip()
|
||||
time = timespan.split(' ')[0]
|
||||
# time
|
||||
timespan = data[1].text.strip()
|
||||
time = timespan.split(' ')[0]
|
||||
|
||||
output += time
|
||||
output += ' Uhr'
|
||||
output += ', '
|
||||
output += time
|
||||
output += ' Uhr'
|
||||
output += ', '
|
||||
|
||||
# place
|
||||
output += data[2].text.strip()
|
||||
output += ', '
|
||||
# place
|
||||
#output += data[2].text.strip()
|
||||
output += ', '
|
||||
|
||||
# title
|
||||
#output += '<b>'
|
||||
#output += event_soup.select('.calendarize h1')[0].text.strip()
|
||||
#output += '</b>'
|
||||
# title
|
||||
output += '<b>'
|
||||
output += event_soup.select('.calendarize h1')[0].text.strip()
|
||||
output += '</b>'
|
||||
|
||||
#try:
|
||||
description = event_soup.select('.calendarize .text p')[0].text.strip()
|
||||
#output += ' '
|
||||
output += description
|
||||
#except IndexError:
|
||||
# pass
|
||||
try:
|
||||
description = event_soup.select('.calendarize .text p')[0].text.strip()
|
||||
output += ' '
|
||||
output += description
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
print(output)
|
||||
return output
|
||||
|
||||
|
||||
|
||||
|
||||
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
self.wfile.write(scrape().encode('utf-8'))
|
||||
|
||||
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
|
||||
httpd.serve_forever()
|
||||
|
Loading…
Reference in New Issue