add support for multiple categories

master
lub 2 years ago
parent 4a531d98f5
commit ecf32bc114

@ -35,7 +35,7 @@ docker run --rm \
-e MIXD=@heeeroooooooes:example.org -e MIXD=@heeeroooooooes:example.org
-e ACCESSTOKEN_FILE=/heroesofthestorm \ -e ACCESSTOKEN_FILE=/heroesofthestorm \
-e ADMIN_ROOM='!iesofojasief90429ewiofj:example.org' \ -e ADMIN_ROOM='!iesofojasief90429ewiofj:example.org' \
-e CATEGORY=heroesofthestorm -e CATEGORY=heroesofthestorm,insideblizzard,battlenet
gitea.lubiland.de/lub/snowstorm-matrix:latest gitea.lubiland.de/lub/snowstorm-matrix:latest
``` ```
@ -52,7 +52,7 @@ Or via docker-compose/swarm:
- ACCESSTOKEN_FILE=/run/secrets/snowstorm-matrix_overwatch - ACCESSTOKEN_FILE=/run/secrets/snowstorm-matrix_overwatch
- MXID=@bastionrulez:example.com - MXID=@bastionrulez:example.com
- ADMIN_ROOM=!jjpPluoxZoAOBQeYer:example.org - ADMIN_ROOM=!jjpPluoxZoAOBQeYer:example.org
- CATEGORY=overwatch - CATEGORY=overwatch,overwatch2
snowstorm-matrix_worldofwarcraft: snowstorm-matrix_worldofwarcraft:
image: gitea.lubiland.de/lub/snowstorm-matrix:latest image: gitea.lubiland.de/lub/snowstorm-matrix:latest
deploy: deploy:

@ -125,6 +125,7 @@ async def main():
# use this event type to store our url cache # use this event type to store our url cache
cache_event_type = 'de.lubiland.snowstorm-matrix.cache' cache_event_type = 'de.lubiland.snowstorm-matrix.cache'
cache = {}
while True: while True:
# do sync first to e.g. accept an admin room invite # do sync first to e.g. accept an admin room invite
sync = await matrix.sync(timeout=30000, sync_filter=sync_filter) sync = await matrix.sync(timeout=30000, sync_filter=sync_filter)
@ -136,20 +137,22 @@ async def main():
if next_update < datetime.now(): if next_update < datetime.now():
# refresh url cache # refresh url cache
old_cache = cache
cache = {}
for category in category_list:
cache_state = await matrix.room_get_state_event(room_id=admin_room, cache_state = await matrix.room_get_state_event(room_id=admin_room,
event_type=cache_event_type, event_type=cache_event_type,
state_key=category) state_key=category)
if hasattr(cache_state, 'content') and 'url_list' in cache_state.content: if hasattr(cache_state, 'content') and 'url_list' in cache_state.content:
cache = cache_state.content['url_list'] if not hasattr(cache, category):
else: cache[category] = []
print('cache is empty') cache[category] += cache_state.content['url_list']
cache = []
# scrape all blog posts and process them # scrape all blog posts and process them
blog = get_blog() blog = get_blog()
for post in blog: for post in blog:
# check if post url is in cache and matches our category # check if post url is in cache and matches our category
if post['url'] not in cache and post['category'] == category: if post['category'] in category_list and hasattr(cache, post['category']) and post['url'] not in cache[post['category']]:
# post url not found in cache # post url not found in cache
# announce new post to matrix rooms # announce new post to matrix rooms
print('new post: '+post['url']) print('new post: '+post['url'])
@ -169,21 +172,24 @@ async def main():
content=content) content=content)
# add url to cache # add url to cache
cache += [post['url']] cache[post['category']] += [post['url']]
else: else:
# no new posts found # no new posts found
pass pass
# cleanup cache and push it as room state
for category in cache.keys():
# trim the cache # trim the cache
while len(cache) > 100: while len(cache[category]) > 100:
cache.remove(cache[0]) cache[category].remove(cache[category][0])
# set new cache event # set new cache event
if hasattr(old_cache, 'category') and old_cache[category] != cache[category]:
await matrix.room_put_state(room_id=admin_room, await matrix.room_put_state(room_id=admin_room,
event_type=cache_event_type, event_type=cache_event_type,
state_key=category, state_key=category,
content={'url_list': cache}) content={'url_list': cache[category]})
# wait between 15min and 30min to randomize scraping # wait between 15min and 30min to randomize scraping
next_update = datetime.now() + timedelta(minutes=randrange(15, 30)) next_update = datetime.now() + timedelta(minutes=randrange(15, 30))
@ -202,8 +208,9 @@ print('accesstoken_file: '+environ['ACCESSTOKEN_FILE'])
admin_room = environ['ADMIN_ROOM'] admin_room = environ['ADMIN_ROOM']
print('admin_room: '+admin_room) print('admin_room: '+admin_room)
category = environ['CATEGORY'] category_list = environ['CATEGORY'].split(',')
print('category: '+category) print('categories:')
print(category_list)
asyncio.run(main()) asyncio.run(main())

Loading…
Cancel
Save