add some rudimentary webserver

main
lub 7 months ago
parent e678081a77
commit 0aa90f86d6

@ -1,46 +1,60 @@
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from http.server import BaseHTTPRequestHandler, HTTPServer
overview_url = 'https://gruene-hohenlohe.de/kalender' overview_url = 'https://gruene-hohenlohe.de/kalender'
overview_html = requests.get(overview_url, timeout=60).text overview_html = requests.get(overview_url, timeout=60).text
overview_soup = BeautifulSoup(overview_html, 'html.parser') overview_soup = BeautifulSoup(overview_html, 'html.parser')
for href in overview_soup.select('.media-body h2 a'): def scrape():
event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href'] for href in overview_soup.select('.media-body h2 a'):
event_html = requests.get(event_url, timeout=60).text event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href']
event_soup = BeautifulSoup(event_html, 'html.parser') event_html = requests.get(event_url, timeout=60).text
event_soup = BeautifulSoup(event_html, 'html.parser')
data = event_soup.select('.calendarize dl dd') data = event_soup.select('.calendarize dl dd')
output = '' output = ''
# date # date
output += data[0].text.strip() output += data[0].text.strip()
output += ' ' output += ' '
# time # time
timespan = data[1].text.strip() timespan = data[1].text.strip()
time = timespan.split(' ')[0] time = timespan.split(' ')[0]
output += time output += time
output += ' Uhr' output += ' Uhr'
output += ', ' output += ', '
# place # place
output += data[2].text.strip() #output += data[2].text.strip()
output += ', ' output += ', '
# title # title
#output += '<b>' output += '<b>'
#output += event_soup.select('.calendarize h1')[0].text.strip() output += event_soup.select('.calendarize h1')[0].text.strip()
#output += '</b>' output += '</b>'
#try: try:
description = event_soup.select('.calendarize .text p')[0].text.strip() description = event_soup.select('.calendarize .text p')[0].text.strip()
#output += ' ' output += ' '
output += description output += description
#except IndexError: except IndexError:
# pass pass
print(output) return output
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write(scrape().encode('utf-8'))
httpd = HTTPServer(('', 8000), SimpleHTTPRequestHandler)
httpd.serve_forever()

Loading…
Cancel
Save