|
|
|
@ -125,6 +125,7 @@ async def main():
|
|
|
|
|
# use this event type to store our url cache
|
|
|
|
|
cache_event_type = 'de.lubiland.snowstorm-matrix.cache'
|
|
|
|
|
|
|
|
|
|
cache = {}
|
|
|
|
|
while True:
|
|
|
|
|
# do sync first to e.g. accept an admin room invite
|
|
|
|
|
sync = await matrix.sync(timeout=30000, sync_filter=sync_filter)
|
|
|
|
@ -136,20 +137,22 @@ async def main():
|
|
|
|
|
|
|
|
|
|
if next_update < datetime.now():
|
|
|
|
|
# refresh url cache
|
|
|
|
|
old_cache = cache
|
|
|
|
|
cache = {}
|
|
|
|
|
for category in category_list:
|
|
|
|
|
cache_state = await matrix.room_get_state_event(room_id=admin_room,
|
|
|
|
|
event_type=cache_event_type,
|
|
|
|
|
state_key=category)
|
|
|
|
|
if hasattr(cache_state, 'content') and 'url_list' in cache_state.content:
|
|
|
|
|
cache = cache_state.content['url_list']
|
|
|
|
|
else:
|
|
|
|
|
print('cache is empty')
|
|
|
|
|
cache = []
|
|
|
|
|
if not hasattr(cache, category):
|
|
|
|
|
cache[category] = []
|
|
|
|
|
cache[category] += cache_state.content['url_list']
|
|
|
|
|
|
|
|
|
|
# scrape all blog posts and process them
|
|
|
|
|
blog = get_blog()
|
|
|
|
|
for post in blog:
|
|
|
|
|
# check if post url is in cache and matches our category
|
|
|
|
|
if post['url'] not in cache and post['category'] == category:
|
|
|
|
|
if post['category'] in category_list and hasattr(cache, post['category']) and post['url'] not in cache[post['category']]:
|
|
|
|
|
# post url not found in cache
|
|
|
|
|
# announce new post to matrix rooms
|
|
|
|
|
print('new post: '+post['url'])
|
|
|
|
@ -169,21 +172,24 @@ async def main():
|
|
|
|
|
content=content)
|
|
|
|
|
|
|
|
|
|
# add url to cache
|
|
|
|
|
cache += [post['url']]
|
|
|
|
|
cache[post['category']] += [post['url']]
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
# no new posts found
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# cleanup cache and push it as room state
|
|
|
|
|
for category in cache.keys():
|
|
|
|
|
# trim the cache
|
|
|
|
|
while len(cache) > 100:
|
|
|
|
|
cache.remove(cache[0])
|
|
|
|
|
while len(cache[category]) > 100:
|
|
|
|
|
cache[category].remove(cache[category][0])
|
|
|
|
|
|
|
|
|
|
# set new cache event
|
|
|
|
|
if hasattr(old_cache, 'category') and old_cache[category] != cache[category]:
|
|
|
|
|
await matrix.room_put_state(room_id=admin_room,
|
|
|
|
|
event_type=cache_event_type,
|
|
|
|
|
state_key=category,
|
|
|
|
|
content={'url_list': cache})
|
|
|
|
|
content={'url_list': cache[category]})
|
|
|
|
|
|
|
|
|
|
# wait between 15min and 30min to randomize scraping
|
|
|
|
|
next_update = datetime.now() + timedelta(minutes=randrange(15, 30))
|
|
|
|
@ -202,8 +208,9 @@ print('accesstoken_file: '+environ['ACCESSTOKEN_FILE'])
|
|
|
|
|
admin_room = environ['ADMIN_ROOM']
|
|
|
|
|
print('admin_room: '+admin_room)
|
|
|
|
|
|
|
|
|
|
category = environ['CATEGORY']
|
|
|
|
|
print('category: '+category)
|
|
|
|
|
category_list = environ['CATEGORY'].split(',')
|
|
|
|
|
print('categories:')
|
|
|
|
|
print(category_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
asyncio.run(main())
|
|
|
|
|