diff --git a/README.md b/README.md
index 1bf5c67..0f4d093 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,6 @@
Matrix Room: [#snowstorm-matrix:imninja.net](https://matrix.to/#/#snowstorm-matrix:imninja.net)
This bot will send news about Blizzard games to Matrix room they got invited to.
-It will additionally alert an admin room about missing bots.
Information about which URLs already got posted is also saved into the admin room via state events.
@@ -31,10 +30,11 @@ After initial configuration you can run it manually:
```bash
docker run --rm \
-v $(pwd)/heroesofthestorm:/heroesofthestorm:ro \
- -v $(pwd)/worldofwarcraft:/worldofwarcraft:ro \
- -e ADMIN_ROOM='!iesofojasief90429ewiofj:matrix.org' \
- -e ACCESSTOKEN_HEROESOFTHESTORM_FILE=/heroesofthestorm \
- -e ACCESSTOKEN_WORLDOFWARCRAFT_FILE=/worldofwarcraft \
+ -e HOMESERVER=https://example.org
+ -e MIXD=@heeeroooooooes:example.org
+ -e ACCESSTOKEN_FILE=/heroesofthestorm \
+ -e ADMIN_ROOM='!iesofojasief90429ewiofj:example.org' \
+ -e CATEGORY=heroesofthestorm
snowstorm-matrix
```
@@ -45,16 +45,11 @@ Or via docker-compose/swarm:
deploy:
replicas: 1
secrets:
- - snowstorm-matrix_heroesofthestorm
- - snowstorm-matrix_insideblizzard
- - snowstorm-matrix_overwatch
- snowstorm-matrix_worldofwarcraft
environment:
- - HOMESERVER_URL=http://synapse:8008
- - HOMESERVER_NAME=matrix.org
- - ADMIN_ROOM=!jjpPluoxZoAOBQeYer:imninja.net
- - ACCESSTOKEN_HEROESOFTHESTORM_FILE=/run/secrets/snowstorm-matrix_heroesofthestorm
- - ACCESSTOKEN_INSIDEBLIZZARD_FILE=/run/secrets/snowstorm-matrix_insideblizzard
- - ACCESSTOKEN_OVERWATCH_FILE=/run/secrets/snowstorm-matrix_overwatch
- - ACCESSTOKEN_WORLDOFWARCRAFT_FILE=/run/secrets/snowstorm-matrix_worldofwarcraft
+ - HOMESERVER=http://synapse:8008
+ - ACCESSTOKEN_FILE=/run/secrets/snowstorm-matrix_worldofwarcraft
+ - MXID=@forthehorde:example.com
+ - ADMIN_ROOM=!jjpPluoxZoAOBQeYer:example.org
+ - CATEGORY=worldofwarcraft
```
diff --git a/scrape.py b/scrape.py
index 0055df3..5f36073 100644
--- a/scrape.py
+++ b/scrape.py
@@ -15,12 +15,6 @@ def get_accesstoken_from_file(accesstoken_path):
accesstoken_file.close()
return single_accesstoken
-async def on_event(room, event):
- if hasattr(event, 'membership'):
- if event.membership == 'invite':
- # automatically join invites
- print('joining '+room.room_id)
- join = await matrix[event.source['state_key']].join(room.room_id)
def get_blog():
url = 'https://news.blizzard.com/en-us/'
html = requests.get(url).text
@@ -40,7 +34,7 @@ def get_blog():
blog.append({
'image': image_url,
- 'game': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(),
+ 'category': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(),
'title': text_list[1].contents[0],
'description': '',
'url': base_url+feature_html.attrs['href'],
@@ -56,7 +50,7 @@ def get_blog():
blog.append({
'image': image_url,
- 'game': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(),
+ 'category': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(),
'title': content_html.find(class_='ArticleListItem-title').contents[0],
'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0],
'url': base_url+article_html.find(class_='ArticleLink').attrs['href'],
@@ -86,50 +80,53 @@ def get_formatted_body(post):
return formatted_body
async def main():
- event_type_prefix = 'de.lubiland.snowstorm-matrix.'
- next_batch = {}
- for game in accesstoken:
- # initialize new client
- mxid = '@'+mxid_prefix+game+':'+homeserver_name
- config = ClientConfig(store_sync_tokens=True)
- matrix[mxid] = AsyncClient(homeserver_url,
- config=config)
+ # initialize new client
+ config = ClientConfig(store_sync_tokens=True)
+ matrix = AsyncClient(homeserver,
+ config=config)
- # login
- login_response = LoginResponse(mxid,
- 'xxx',
- accesstoken[game])
- await matrix[mxid].receive_response(login_response)
+ # login
+ login_response = LoginResponse(mxid,
+ 'xxx',
+ accesstoken)
+ await matrix.receive_response(login_response)
- matrix[mxid].add_event_callback(on_event, InviteEvent)
- # do a first sync
- sync_filter = {
- 'room': {
- 'state': {
- 'types': ['m.room.member'],
- 'lazy_load_members': True
- },
- 'timeline': {
- 'types': ['invalid']
- },
- 'ephemeral': {
- 'types': ['invalid']
- }
+ # filter out everything except m.room.member (for invites)
+ sync_filter = {
+ 'room': {
+ 'state': {
+ 'types': ['m.room.member'],
+ 'lazy_load_members': True
+ },
+ 'timeline': {
+ 'types': ['invalid']
+ },
+ 'ephemeral': {
+ 'types': ['invalid']
}
}
- sync = await matrix[mxid].sync(timeout=3000,
- sync_filter=sync_filter)
- next_batch[mxid] = sync.next_batch
+ }
+ # setting this to enforce a scrape at first loop
next_update = datetime.now()
+
+ # use this event type to store our url cache
+ cache_event_type = 'de.lubiland.snowstorm-matrix.cache'
+
while True:
+ # do sync first to e.g. accept an admin room invite
+ sync = await matrix.sync(sync_filter=sync_filter)
+ for room_id in sync.rooms.invite:
+ print('joining '+room_id)
+ await matrix.join(room_id)
+
if next_update < datetime.now():
# refresh url cache
- cache_state = await matrix[next(iter(matrix))].room_get_state_event(room_id=admin_room,
- event_type=event_type_prefix+'cache',
- state_key='')
+ cache_state = await matrix.room_get_state_event(room_id=admin_room,
+ event_type=cache_event_type+'cache',
+ state_key=category)
if hasattr(cache_state, 'content') and 'url_list' in cache_state.content:
cache = cache_state.content['url_list']
else:
@@ -138,71 +135,50 @@ async def main():
# scape all blog posts and process them
blog = get_blog()
for post in blog:
- if post['url'] not in cache:
+ # check if post url is in cache and matches our category
+ if post['url'] not in cache and post['category'] == category:
# post url not found in cache
- mxid = '@'+mxid_prefix+post['game']+':'+homeserver_name
- if mxid in matrix:
- # announce new post to matrix rooms
- content = {
- 'msgtype': 'm.notice',
- 'body': get_body(post),
- 'format': 'org.matrix.custom.html',
- 'formatted_body': get_formatted_body(post)
- }
- for room_id in matrix[mxid].rooms:
- if room_id != admin_room:
- # don't send updates to the admin room
- await matrix[mxid].room_send(room_id=room_id,
- message_type='m.room.message',
- content=content)
- else:
- # no accesstoken for the calculated mxid
- content = {
- 'msgtype': 'm.notice',
- 'body': 'No accesstoken for '+mxid+' available.',
- 'format': 'org.matrix.custom.html',
- 'formatted_body': ('No accesstoken
for '+
- ''+mxid+'
available.')
- }
- # send the message with the first available matrix client,
- # because we will always have at least one accesstoken
- await matrix[next(iter(matrix))].room_send(room_id=admin_room,
- message_type='m.room.message',
- content=content)
+ # announce new post to matrix rooms
+ content = {
+ 'msgtype': 'm.notice',
+ 'body': get_body(post),
+ 'format': 'org.matrix.custom.html',
+ 'formatted_body': get_formatted_body(post)
+ }
+ for room_id in matrix.rooms:
+ # don't send updates to the admin room
+ if room_id != admin_room:
+ await matrix.room_send(room_id=room_id,
+ message_type='m.room.message',
+ content=content)
# add url to cache
cache += [post['url']]
- # check for double the post count, to have some buffer for manually purging URLs
- # otherwise the cache could reshuffle when you remove too many URLs at once
- while len(cache) > len(blog)*2:
- cache.remove(cache[0])
- set_state = await matrix[next(iter(matrix))].room_put_state(room_id=admin_room,
- event_type=event_type_prefix+'cache',
- content={'url_list': cache})
+
else:
# no new posts found
pass
+
+ # trim the cache
+ # len(blog) is usually bigger than the count of posts in our category,
+ # so with len(blog) instead of the latter we have some buffer
+ while len(cache) > len(blog):
+ cache.remove(cache[0])
+
+ # set new cache event
+ await matrix.room_put_state(room_id=admin_room,
+ event_type=cache_event_type,
+ state_key=category,
+ content={'url_list': cache})
+
next_update = datetime.now() + timedelta(minutes=15)
- for mxid in next_batch:
- sync = await matrix[mxid].sync(timeout=10000,
- sync_filter=sync_filter,
- since=next_batch[mxid])
- next_batch[mxid] = sync.next_batch
-
-homeserver_name = environ['HOMESERVER_NAME']
-homeserver_url = environ['HOMESERVER_URL']
-mxid_prefix = environ['MXID_PREFIX']
+homeserver = environ['HOMESERVER']
+mxid = environ['MXID']
+accesstoken = get_accesstoken_from_file(environ['ACCESSTOKEN_FILE'])
admin_room = environ['ADMIN_ROOM']
+category = environ['CATEGORY']
-accesstoken = {}
-for var in environ:
- if (game := re.match('^ACCESSTOKEN_([A-Z]*)_FILE$', var)) is not None:
- accesstoken[game[1].lower()] = get_accesstoken_from_file(environ[var])
-
-
-
-matrix = {}
asyncio.run(main())