refactor main() into single category bot

master
lub 5 years ago
parent 09b86f2951
commit 586dcf15de

@ -2,7 +2,6 @@
Matrix Room: [#snowstorm-matrix:imninja.net](https://matrix.to/#/#snowstorm-matrix:imninja.net) Matrix Room: [#snowstorm-matrix:imninja.net](https://matrix.to/#/#snowstorm-matrix:imninja.net)
This bot will send news about Blizzard games to Matrix room they got invited to. This bot will send news about Blizzard games to Matrix room they got invited to.
It will additionally alert an admin room about missing bots.
Information about which URLs already got posted is also saved into the admin room via state events. Information about which URLs already got posted is also saved into the admin room via state events.
@ -31,10 +30,11 @@ After initial configuration you can run it manually:
```bash ```bash
docker run --rm \ docker run --rm \
-v $(pwd)/heroesofthestorm:/heroesofthestorm:ro \ -v $(pwd)/heroesofthestorm:/heroesofthestorm:ro \
-v $(pwd)/worldofwarcraft:/worldofwarcraft:ro \ -e HOMESERVER=https://example.org
-e ADMIN_ROOM='!iesofojasief90429ewiofj:matrix.org' \ -e MIXD=@heeeroooooooes:example.org
-e ACCESSTOKEN_HEROESOFTHESTORM_FILE=/heroesofthestorm \ -e ACCESSTOKEN_FILE=/heroesofthestorm \
-e ACCESSTOKEN_WORLDOFWARCRAFT_FILE=/worldofwarcraft \ -e ADMIN_ROOM='!iesofojasief90429ewiofj:example.org' \
-e CATEGORY=heroesofthestorm
snowstorm-matrix snowstorm-matrix
``` ```
@ -45,16 +45,11 @@ Or via docker-compose/swarm:
deploy: deploy:
replicas: 1 replicas: 1
secrets: secrets:
- snowstorm-matrix_heroesofthestorm
- snowstorm-matrix_insideblizzard
- snowstorm-matrix_overwatch
- snowstorm-matrix_worldofwarcraft - snowstorm-matrix_worldofwarcraft
environment: environment:
- HOMESERVER_URL=http://synapse:8008 - HOMESERVER=http://synapse:8008
- HOMESERVER_NAME=matrix.org - ACCESSTOKEN_FILE=/run/secrets/snowstorm-matrix_worldofwarcraft
- ADMIN_ROOM=!jjpPluoxZoAOBQeYer:imninja.net - MXID=@forthehorde:example.com
- ACCESSTOKEN_HEROESOFTHESTORM_FILE=/run/secrets/snowstorm-matrix_heroesofthestorm - ADMIN_ROOM=!jjpPluoxZoAOBQeYer:example.org
- ACCESSTOKEN_INSIDEBLIZZARD_FILE=/run/secrets/snowstorm-matrix_insideblizzard - CATEGORY=worldofwarcraft
- ACCESSTOKEN_OVERWATCH_FILE=/run/secrets/snowstorm-matrix_overwatch
- ACCESSTOKEN_WORLDOFWARCRAFT_FILE=/run/secrets/snowstorm-matrix_worldofwarcraft
``` ```

@ -15,12 +15,6 @@ def get_accesstoken_from_file(accesstoken_path):
accesstoken_file.close() accesstoken_file.close()
return single_accesstoken return single_accesstoken
async def on_event(room, event):
if hasattr(event, 'membership'):
if event.membership == 'invite':
# automatically join invites
print('joining '+room.room_id)
join = await matrix[event.source['state_key']].join(room.room_id)
def get_blog(): def get_blog():
url = 'https://news.blizzard.com/en-us/' url = 'https://news.blizzard.com/en-us/'
html = requests.get(url).text html = requests.get(url).text
@ -40,7 +34,7 @@ def get_blog():
blog.append({ blog.append({
'image': image_url, 'image': image_url,
'game': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(), 'category': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(),
'title': text_list[1].contents[0], 'title': text_list[1].contents[0],
'description': '', 'description': '',
'url': base_url+feature_html.attrs['href'], 'url': base_url+feature_html.attrs['href'],
@ -56,7 +50,7 @@ def get_blog():
blog.append({ blog.append({
'image': image_url, 'image': image_url,
'game': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(), 'category': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(),
'title': content_html.find(class_='ArticleListItem-title').contents[0], 'title': content_html.find(class_='ArticleListItem-title').contents[0],
'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0], 'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0],
'url': base_url+article_html.find(class_='ArticleLink').attrs['href'], 'url': base_url+article_html.find(class_='ArticleLink').attrs['href'],
@ -86,50 +80,53 @@ def get_formatted_body(post):
return formatted_body return formatted_body
async def main(): async def main():
event_type_prefix = 'de.lubiland.snowstorm-matrix.'
next_batch = {} # initialize new client
for game in accesstoken: config = ClientConfig(store_sync_tokens=True)
# initialize new client matrix = AsyncClient(homeserver,
mxid = '@'+mxid_prefix+game+':'+homeserver_name config=config)
config = ClientConfig(store_sync_tokens=True)
matrix[mxid] = AsyncClient(homeserver_url,
config=config)
# login # login
login_response = LoginResponse(mxid, login_response = LoginResponse(mxid,
'xxx', 'xxx',
accesstoken[game]) accesstoken)
await matrix[mxid].receive_response(login_response) await matrix.receive_response(login_response)
matrix[mxid].add_event_callback(on_event, InviteEvent)
# do a first sync # filter out everything except m.room.member (for invites)
sync_filter = { sync_filter = {
'room': { 'room': {
'state': { 'state': {
'types': ['m.room.member'], 'types': ['m.room.member'],
'lazy_load_members': True 'lazy_load_members': True
}, },
'timeline': { 'timeline': {
'types': ['invalid'] 'types': ['invalid']
}, },
'ephemeral': { 'ephemeral': {
'types': ['invalid'] 'types': ['invalid']
}
} }
} }
sync = await matrix[mxid].sync(timeout=3000, }
sync_filter=sync_filter)
next_batch[mxid] = sync.next_batch
# setting this to enforce a scrape at first loop
next_update = datetime.now() next_update = datetime.now()
# use this event type to store our url cache
cache_event_type = 'de.lubiland.snowstorm-matrix.cache'
while True: while True:
# do sync first to e.g. accept an admin room invite
sync = await matrix.sync(sync_filter=sync_filter)
for room_id in sync.rooms.invite:
print('joining '+room_id)
await matrix.join(room_id)
if next_update < datetime.now(): if next_update < datetime.now():
# refresh url cache # refresh url cache
cache_state = await matrix[next(iter(matrix))].room_get_state_event(room_id=admin_room, cache_state = await matrix.room_get_state_event(room_id=admin_room,
event_type=event_type_prefix+'cache', event_type=cache_event_type+'cache',
state_key='') state_key=category)
if hasattr(cache_state, 'content') and 'url_list' in cache_state.content: if hasattr(cache_state, 'content') and 'url_list' in cache_state.content:
cache = cache_state.content['url_list'] cache = cache_state.content['url_list']
else: else:
@ -138,71 +135,50 @@ async def main():
# scape all blog posts and process them # scape all blog posts and process them
blog = get_blog() blog = get_blog()
for post in blog: for post in blog:
if post['url'] not in cache: # check if post url is in cache and matches our category
if post['url'] not in cache and post['category'] == category:
# post url not found in cache # post url not found in cache
mxid = '@'+mxid_prefix+post['game']+':'+homeserver_name # announce new post to matrix rooms
if mxid in matrix: content = {
# announce new post to matrix rooms 'msgtype': 'm.notice',
content = { 'body': get_body(post),
'msgtype': 'm.notice', 'format': 'org.matrix.custom.html',
'body': get_body(post), 'formatted_body': get_formatted_body(post)
'format': 'org.matrix.custom.html', }
'formatted_body': get_formatted_body(post) for room_id in matrix.rooms:
} # don't send updates to the admin room
for room_id in matrix[mxid].rooms: if room_id != admin_room:
if room_id != admin_room: await matrix.room_send(room_id=room_id,
# don't send updates to the admin room message_type='m.room.message',
await matrix[mxid].room_send(room_id=room_id, content=content)
message_type='m.room.message',
content=content)
else:
# no accesstoken for the calculated mxid
content = {
'msgtype': 'm.notice',
'body': 'No accesstoken for '+mxid+' available.',
'format': 'org.matrix.custom.html',
'formatted_body': ('No <code>accesstoken</code> for '+
'<code>'+mxid+'</code> available.')
}
# send the message with the first available matrix client,
# because we will always have at least one accesstoken
await matrix[next(iter(matrix))].room_send(room_id=admin_room,
message_type='m.room.message',
content=content)
# add url to cache # add url to cache
cache += [post['url']] cache += [post['url']]
# check for double the post count, to have some buffer for manually purging URLs
# otherwise the cache could reshuffle when you remove too many URLs at once
while len(cache) > len(blog)*2:
cache.remove(cache[0])
set_state = await matrix[next(iter(matrix))].room_put_state(room_id=admin_room,
event_type=event_type_prefix+'cache',
content={'url_list': cache})
else: else:
# no new posts found # no new posts found
pass pass
# trim the cache
# len(blog) is usually bigger than the count of posts in our category,
# so with len(blog) instead of the latter we have some buffer
while len(cache) > len(blog):
cache.remove(cache[0])
# set new cache event
await matrix.room_put_state(room_id=admin_room,
event_type=cache_event_type,
state_key=category,
content={'url_list': cache})
next_update = datetime.now() + timedelta(minutes=15) next_update = datetime.now() + timedelta(minutes=15)
for mxid in next_batch:
sync = await matrix[mxid].sync(timeout=10000,
sync_filter=sync_filter,
since=next_batch[mxid])
next_batch[mxid] = sync.next_batch
homeserver = environ['HOMESERVER']
homeserver_name = environ['HOMESERVER_NAME'] mxid = environ['MXID']
homeserver_url = environ['HOMESERVER_URL'] accesstoken = get_accesstoken_from_file(environ['ACCESSTOKEN_FILE'])
mxid_prefix = environ['MXID_PREFIX']
admin_room = environ['ADMIN_ROOM'] admin_room = environ['ADMIN_ROOM']
category = environ['CATEGORY']
accesstoken = {}
for var in environ:
if (game := re.match('^ACCESSTOKEN_([A-Z]*)_FILE$', var)) is not None:
accesstoken[game[1].lower()] = get_accesstoken_from_file(environ[var])
matrix = {}
asyncio.run(main()) asyncio.run(main())

Loading…
Cancel
Save