add support for multiple categories

master
lub 2 years ago
parent 4a531d98f5
commit ecf32bc114

@ -35,7 +35,7 @@ docker run --rm \
-e MIXD=@heeeroooooooes:example.org
-e ACCESSTOKEN_FILE=/heroesofthestorm \
-e ADMIN_ROOM='!iesofojasief90429ewiofj:example.org' \
-e CATEGORY=heroesofthestorm
-e CATEGORY=heroesofthestorm,insideblizzard,battlenet
gitea.lubiland.de/lub/snowstorm-matrix:latest
```
@ -52,7 +52,7 @@ Or via docker-compose/swarm:
- ACCESSTOKEN_FILE=/run/secrets/snowstorm-matrix_overwatch
- MXID=@bastionrulez:example.com
- ADMIN_ROOM=!jjpPluoxZoAOBQeYer:example.org
- CATEGORY=overwatch
- CATEGORY=overwatch,overwatch2
snowstorm-matrix_worldofwarcraft:
image: gitea.lubiland.de/lub/snowstorm-matrix:latest
deploy:

@ -125,6 +125,7 @@ async def main():
# use this event type to store our url cache
cache_event_type = 'de.lubiland.snowstorm-matrix.cache'
cache = {}
while True:
# do sync first to e.g. accept an admin room invite
sync = await matrix.sync(timeout=30000, sync_filter=sync_filter)
@ -136,20 +137,22 @@ async def main():
if next_update < datetime.now():
# refresh url cache
old_cache = cache
cache = {}
for category in category_list:
cache_state = await matrix.room_get_state_event(room_id=admin_room,
event_type=cache_event_type,
state_key=category)
if hasattr(cache_state, 'content') and 'url_list' in cache_state.content:
cache = cache_state.content['url_list']
else:
print('cache is empty')
cache = []
if not hasattr(cache, category):
cache[category] = []
cache[category] += cache_state.content['url_list']
# scrape all blog posts and process them
blog = get_blog()
for post in blog:
# check if post url is in cache and matches our category
if post['url'] not in cache and post['category'] == category:
if post['category'] in category_list and hasattr(cache, post['category']) and post['url'] not in cache[post['category']]:
# post url not found in cache
# announce new post to matrix rooms
print('new post: '+post['url'])
@ -169,21 +172,24 @@ async def main():
content=content)
# add url to cache
cache += [post['url']]
cache[post['category']] += [post['url']]
else:
# no new posts found
pass
# cleanup cache and push it as room state
for category in cache.keys():
# trim the cache
while len(cache) > 100:
cache.remove(cache[0])
while len(cache[category]) > 100:
cache[category].remove(cache[category][0])
# set new cache event
if hasattr(old_cache, 'category') and old_cache[category] != cache[category]:
await matrix.room_put_state(room_id=admin_room,
event_type=cache_event_type,
state_key=category,
content={'url_list': cache})
content={'url_list': cache[category]})
# wait between 15min and 30min to randomize scraping
next_update = datetime.now() + timedelta(minutes=randrange(15, 30))
@ -202,8 +208,9 @@ print('accesstoken_file: '+environ['ACCESSTOKEN_FILE'])
admin_room = environ['ADMIN_ROOM']
print('admin_room: '+admin_room)
category = environ['CATEGORY']
print('category: '+category)
category_list = environ['CATEGORY'].split(',')
print('categories:')
print(category_list)
asyncio.run(main())

Loading…
Cancel
Save