refactor main() into single category bot

master
lub 4 years ago
parent 09b86f2951
commit 586dcf15de

@ -2,7 +2,6 @@
Matrix Room: [#snowstorm-matrix:imninja.net](https://matrix.to/#/#snowstorm-matrix:imninja.net)
This bot will send news about Blizzard games to Matrix room they got invited to.
It will additionally alert an admin room about missing bots.
Information about which URLs already got posted is also saved into the admin room via state events.
@ -31,10 +30,11 @@ After initial configuration you can run it manually:
```bash
docker run --rm \
-v $(pwd)/heroesofthestorm:/heroesofthestorm:ro \
-v $(pwd)/worldofwarcraft:/worldofwarcraft:ro \
-e ADMIN_ROOM='!iesofojasief90429ewiofj:matrix.org' \
-e ACCESSTOKEN_HEROESOFTHESTORM_FILE=/heroesofthestorm \
-e ACCESSTOKEN_WORLDOFWARCRAFT_FILE=/worldofwarcraft \
-e HOMESERVER=https://example.org
-e MIXD=@heeeroooooooes:example.org
-e ACCESSTOKEN_FILE=/heroesofthestorm \
-e ADMIN_ROOM='!iesofojasief90429ewiofj:example.org' \
-e CATEGORY=heroesofthestorm
snowstorm-matrix
```
@ -45,16 +45,11 @@ Or via docker-compose/swarm:
deploy:
replicas: 1
secrets:
- snowstorm-matrix_heroesofthestorm
- snowstorm-matrix_insideblizzard
- snowstorm-matrix_overwatch
- snowstorm-matrix_worldofwarcraft
environment:
- HOMESERVER_URL=http://synapse:8008
- HOMESERVER_NAME=matrix.org
- ADMIN_ROOM=!jjpPluoxZoAOBQeYer:imninja.net
- ACCESSTOKEN_HEROESOFTHESTORM_FILE=/run/secrets/snowstorm-matrix_heroesofthestorm
- ACCESSTOKEN_INSIDEBLIZZARD_FILE=/run/secrets/snowstorm-matrix_insideblizzard
- ACCESSTOKEN_OVERWATCH_FILE=/run/secrets/snowstorm-matrix_overwatch
- ACCESSTOKEN_WORLDOFWARCRAFT_FILE=/run/secrets/snowstorm-matrix_worldofwarcraft
- HOMESERVER=http://synapse:8008
- ACCESSTOKEN_FILE=/run/secrets/snowstorm-matrix_worldofwarcraft
- MXID=@forthehorde:example.com
- ADMIN_ROOM=!jjpPluoxZoAOBQeYer:example.org
- CATEGORY=worldofwarcraft
```

@ -15,12 +15,6 @@ def get_accesstoken_from_file(accesstoken_path):
accesstoken_file.close()
return single_accesstoken
async def on_event(room, event):
if hasattr(event, 'membership'):
if event.membership == 'invite':
# automatically join invites
print('joining '+room.room_id)
join = await matrix[event.source['state_key']].join(room.room_id)
def get_blog():
url = 'https://news.blizzard.com/en-us/'
html = requests.get(url).text
@ -40,7 +34,7 @@ def get_blog():
blog.append({
'image': image_url,
'game': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(),
'category': text_list[0].contents[0].replace(' ', '').replace(':', '').lower(),
'title': text_list[1].contents[0],
'description': '',
'url': base_url+feature_html.attrs['href'],
@ -56,7 +50,7 @@ def get_blog():
blog.append({
'image': image_url,
'game': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(),
'category': content_html.find(class_='ArticleListItem-subtitle').find(class_='ArticleListItem-labelInner').contents[0].replace(' ', '').replace(':', '').lower(),
'title': content_html.find(class_='ArticleListItem-title').contents[0],
'description': content_html.find(class_='ArticleListItem-description').find(class_='h6').contents[0],
'url': base_url+article_html.find(class_='ArticleLink').attrs['href'],
@ -86,50 +80,53 @@ def get_formatted_body(post):
return formatted_body
async def main():
event_type_prefix = 'de.lubiland.snowstorm-matrix.'
next_batch = {}
for game in accesstoken:
# initialize new client
mxid = '@'+mxid_prefix+game+':'+homeserver_name
config = ClientConfig(store_sync_tokens=True)
matrix[mxid] = AsyncClient(homeserver_url,
config=config)
# initialize new client
config = ClientConfig(store_sync_tokens=True)
matrix = AsyncClient(homeserver,
config=config)
# login
login_response = LoginResponse(mxid,
'xxx',
accesstoken[game])
await matrix[mxid].receive_response(login_response)
# login
login_response = LoginResponse(mxid,
'xxx',
accesstoken)
await matrix.receive_response(login_response)
matrix[mxid].add_event_callback(on_event, InviteEvent)
# do a first sync
sync_filter = {
'room': {
'state': {
'types': ['m.room.member'],
'lazy_load_members': True
},
'timeline': {
'types': ['invalid']
},
'ephemeral': {
'types': ['invalid']
}
# filter out everything except m.room.member (for invites)
sync_filter = {
'room': {
'state': {
'types': ['m.room.member'],
'lazy_load_members': True
},
'timeline': {
'types': ['invalid']
},
'ephemeral': {
'types': ['invalid']
}
}
sync = await matrix[mxid].sync(timeout=3000,
sync_filter=sync_filter)
next_batch[mxid] = sync.next_batch
}
# setting this to enforce a scrape at first loop
next_update = datetime.now()
# use this event type to store our url cache
cache_event_type = 'de.lubiland.snowstorm-matrix.cache'
while True:
# do sync first to e.g. accept an admin room invite
sync = await matrix.sync(sync_filter=sync_filter)
for room_id in sync.rooms.invite:
print('joining '+room_id)
await matrix.join(room_id)
if next_update < datetime.now():
# refresh url cache
cache_state = await matrix[next(iter(matrix))].room_get_state_event(room_id=admin_room,
event_type=event_type_prefix+'cache',
state_key='')
cache_state = await matrix.room_get_state_event(room_id=admin_room,
event_type=cache_event_type+'cache',
state_key=category)
if hasattr(cache_state, 'content') and 'url_list' in cache_state.content:
cache = cache_state.content['url_list']
else:
@ -138,71 +135,50 @@ async def main():
# scape all blog posts and process them
blog = get_blog()
for post in blog:
if post['url'] not in cache:
# check if post url is in cache and matches our category
if post['url'] not in cache and post['category'] == category:
# post url not found in cache
mxid = '@'+mxid_prefix+post['game']+':'+homeserver_name
if mxid in matrix:
# announce new post to matrix rooms
content = {
'msgtype': 'm.notice',
'body': get_body(post),
'format': 'org.matrix.custom.html',
'formatted_body': get_formatted_body(post)
}
for room_id in matrix[mxid].rooms:
if room_id != admin_room:
# don't send updates to the admin room
await matrix[mxid].room_send(room_id=room_id,
message_type='m.room.message',
content=content)
else:
# no accesstoken for the calculated mxid
content = {
'msgtype': 'm.notice',
'body': 'No accesstoken for '+mxid+' available.',
'format': 'org.matrix.custom.html',
'formatted_body': ('No <code>accesstoken</code> for '+
'<code>'+mxid+'</code> available.')
}
# send the message with the first available matrix client,
# because we will always have at least one accesstoken
await matrix[next(iter(matrix))].room_send(room_id=admin_room,
message_type='m.room.message',
content=content)
# announce new post to matrix rooms
content = {
'msgtype': 'm.notice',
'body': get_body(post),
'format': 'org.matrix.custom.html',
'formatted_body': get_formatted_body(post)
}
for room_id in matrix.rooms:
# don't send updates to the admin room
if room_id != admin_room:
await matrix.room_send(room_id=room_id,
message_type='m.room.message',
content=content)
# add url to cache
cache += [post['url']]
# check for double the post count, to have some buffer for manually purging URLs
# otherwise the cache could reshuffle when you remove too many URLs at once
while len(cache) > len(blog)*2:
cache.remove(cache[0])
set_state = await matrix[next(iter(matrix))].room_put_state(room_id=admin_room,
event_type=event_type_prefix+'cache',
content={'url_list': cache})
else:
# no new posts found
pass
# trim the cache
# len(blog) is usually bigger than the count of posts in our category,
# so with len(blog) instead of the latter we have some buffer
while len(cache) > len(blog):
cache.remove(cache[0])
# set new cache event
await matrix.room_put_state(room_id=admin_room,
event_type=cache_event_type,
state_key=category,
content={'url_list': cache})
next_update = datetime.now() + timedelta(minutes=15)
for mxid in next_batch:
sync = await matrix[mxid].sync(timeout=10000,
sync_filter=sync_filter,
since=next_batch[mxid])
next_batch[mxid] = sync.next_batch
homeserver_name = environ['HOMESERVER_NAME']
homeserver_url = environ['HOMESERVER_URL']
mxid_prefix = environ['MXID_PREFIX']
homeserver = environ['HOMESERVER']
mxid = environ['MXID']
accesstoken = get_accesstoken_from_file(environ['ACCESSTOKEN_FILE'])
admin_room = environ['ADMIN_ROOM']
category = environ['CATEGORY']
accesstoken = {}
for var in environ:
if (game := re.match('^ACCESSTOKEN_([A-Z]*)_FILE$', var)) is not None:
accesstoken[game[1].lower()] = get_accesstoken_from_file(environ[var])
matrix = {}
asyncio.run(main())

Loading…
Cancel
Save