From 05f9da94c0f014923b8da1961a6beb70c1a19ae5 Mon Sep 17 00:00:00 2001 From: lub Date: Thu, 20 Oct 2022 00:05:40 +0200 Subject: [PATCH] optimize cache logic --- scrape.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scrape.py b/scrape.py index f6f5a11..1796f88 100644 --- a/scrape.py +++ b/scrape.py @@ -125,7 +125,6 @@ async def main(): # use this event type to store our url cache cache_event_type = 'de.lubiland.snowstorm-matrix.cache' - cache = {} while True: # do sync first to e.g. accept an admin room invite sync = await matrix.sync(timeout=30000, sync_filter=sync_filter) @@ -137,22 +136,22 @@ async def main(): if next_update < datetime.now(): # refresh url cache - old_cache = cache cache = {} for category in category_list: cache_state = await matrix.room_get_state_event(room_id=admin_room, event_type=cache_event_type, state_key=category) + if not hasattr(cache, category): + cache[category] = [] if hasattr(cache_state, 'content') and 'url_list' in cache_state.content: - if not hasattr(cache, category): - cache[category] = [] cache[category] += cache_state.content['url_list'] + old_cache = cache # scrape all blog posts and process them blog = get_blog() for post in blog: # check if post url is in cache and matches our category - if post['category'] in category_list and hasattr(cache, post['category']) and post['url'] not in cache[post['category']]: + if post['category'] in category_list and post['url'] not in cache[post['category']]: # post url not found in cache # announce new post to matrix rooms print('new post: '+post['url']) @@ -185,7 +184,7 @@ async def main(): cache[category].remove(cache[category][0]) # set new cache event - if hasattr(old_cache, 'category') and old_cache[category] != cache[category]: + if old_cache[category] != cache[category]: await matrix.room_put_state(room_id=admin_room, event_type=cache_event_type, state_key=category,