diff --git a/scrape.py b/scrape.py
index 2e140b1..de9ad70 100644
--- a/scrape.py
+++ b/scrape.py
@@ -116,22 +116,6 @@ async def main():
}
}
}
- next_batch_state = await matrix[mxid].room_get_state_event(room_id=admin_room,
- event_type=event_type_prefix+'next_batch',
- state_key=mxid)
- if 'token' in next_batch_state.content:
- try:
- sync = await matrix[mxid].sync(timeout=3000,
- sync_filter=sync_filter,
- since=next_batch_state.content['token'])
- next_batch[mxid] = sync.next_batch
-
- continue
- except:
- pass
- # when there is no next_batch token or first sync threw an error,
- # then do a first sync without next_batch
- print('doing first sync for '+mxid)
sync = await matrix[mxid].sync(timeout=3000,
sync_filter=sync_filter)
next_batch[mxid] = sync.next_batch
@@ -139,51 +123,66 @@ async def main():
next_update = datetime.now()
while True:
if next_update < datetime.now():
+ # refresh url cache
+ cache_state = await matrix[next(iter(matrix))].room_get_state_event(room_id=admin_room,
+ event_type=event_type_prefix+'cache',
+ state_key='')
+ if hasattr(cache_state, 'content') and 'url_list' in cache_state.content:
+ cache = cache_state.content['url_list']
+ else:
+ cache = []
+
+ # scape all blog posts and process them
blog = get_blog()
for post in blog:
- mxid = '@'+mxid_prefix+post['game']+':'+homeserver_name
- if mxid in matrix:
- content = {
- 'msgtype': 'm.notice',
- 'body': get_body(post),
- 'format': 'org.matrix.custom.html',
- 'formatted_body': get_formatted_body(post)
- }
- for room_id in matrix[mxid].rooms:
- await matrix[mxid].room_send(room_id=room_id,
- message_type='m.room.message',
- content=content)
+ if post['url'] not in cache:
+ # post url not found in cache
+ mxid = '@'+mxid_prefix+post['game']+':'+homeserver_name
+ if mxid in matrix:
+ # announce new post to matrix rooms
+ content = {
+ 'msgtype': 'm.notice',
+ 'body': get_body(post),
+ 'format': 'org.matrix.custom.html',
+ 'formatted_body': get_formatted_body(post)
+ }
+ for room_id in matrix[mxid].rooms:
+ await matrix[mxid].room_send(room_id=room_id,
+ message_type='m.room.message',
+ content=content)
+ else:
+ # no accesstoken for the calculated mxid
+ content = {
+ 'msgtype': 'm.notice',
+ 'body': 'No accesstoken for '+mxid+' available.',
+ 'format': 'org.matrix.custom.html',
+ 'formatted_body': ('No accesstoken
for '+
+ ''+mxid+'
available.')
+ }
+ # send the message with the first available matrix client,
+ # because we will always have at least one accesstoken
+ await matrix[next(iter(matrix))].room_send(room_id=admin_room,
+ message_type='m.room.message',
+ content=content)
+
+ # add url to cache
+ cache += [post['url']]
+ while len(cache) > len(blog):
+ cache.remove(cache[0])
+ set_state = await matrix[next(iter(matrix))].room_put_state(room_id=admin_room,
+ event_type=event_type_prefix+'cache',
+ content={'url_list': cache})
else:
- content = {
- 'msgtype': 'm.notice',
- 'body': 'No accesstoken for '+mxid+' available.',
- 'format': 'org.matrix.custom.html',
- 'formatted_body': ('No accesstoken
for '+
- ''+mxid+'
available.')
- }
- # send the message with the first available matrix client,
- # because we will always have at least one accesstoken
- await matrix[next(iter(matrix))].room_send(room_id=admin_room,
- message_type='m.room.message',
- content=content)
-
+ # no new posts found
+ pass
next_update = datetime.now() + timedelta(minutes=30)
- next_batch_state = await matrix[mxid].room_get_state_event(room_id=admin_room,
- event_type=event_type_prefix+'next_batch',
- state_key=mxid)
-
for mxid in next_batch:
sync = await matrix[mxid].sync(timeout=10000,
sync_filter=sync_filter,
since=next_batch[mxid])
next_batch[mxid] = sync.next_batch
- await matrix[mxid].room_put_state(room_id=admin_room,
- event_type=event_type_prefix+'next_batch',
- state_key=mxid,
- content={'token': next_batch[mxid]})
-
homeserver_name = environ['HOMESERVER_NAME']
homeserver_url = environ['HOMESERVER_URL']