From e678081a779ac6ea55f731b2ae756614ffc57c6d Mon Sep 17 00:00:00 2001 From: lub Date: Wed, 5 Jun 2024 16:09:37 +0200 Subject: [PATCH] init --- .gitignore | 1 + requirements.txt | 2 ++ scrape.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 .gitignore create mode 100644 requirements.txt create mode 100644 scrape.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5ceb386 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9d981c3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +bs4 +requests diff --git a/scrape.py b/scrape.py new file mode 100644 index 0000000..815b06b --- /dev/null +++ b/scrape.py @@ -0,0 +1,46 @@ +import requests +from bs4 import BeautifulSoup + +overview_url = 'https://gruene-hohenlohe.de/kalender' +overview_html = requests.get(overview_url, timeout=60).text +overview_soup = BeautifulSoup(overview_html, 'html.parser') + + +for href in overview_soup.select('.media-body h2 a'): + event_url = 'https://gruene-hohenlohe.de/' + href.attrs['href'] + event_html = requests.get(event_url, timeout=60).text + event_soup = BeautifulSoup(event_html, 'html.parser') + + data = event_soup.select('.calendarize dl dd') + + output = '' + + # date + output += data[0].text.strip() + output += ' ' + + # time + timespan = data[1].text.strip() + time = timespan.split(' ')[0] + + output += time + output += ' Uhr' + output += ', ' + + # place + output += data[2].text.strip() + output += ', ' + + # title + #output += '' + #output += event_soup.select('.calendarize h1')[0].text.strip() + #output += '' + + #try: + description = event_soup.select('.calendarize .text p')[0].text.strip() + #output += ' ' + output += description + #except IndexError: + # pass + + print(output)