From b9ec64f6a42b866682a5efba9a64525655ad73d8 Mon Sep 17 00:00:00 2001 From: Rob Siemborski Date: Sat, 18 Apr 2020 14:41:06 -0400 Subject: [PATCH 1/4] Use aiohttp instead of requests to download files from slack. Still uses the basic zulip client for uploads. --- __init__.py | 9 +++++++-- requirements.txt | 1 + slack_reformat.py | 27 +++++++++++++++------------ slack_reformat_test.py | 18 +++++++++--------- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/__init__.py b/__init__.py index 0515cec..e4edc0b 100644 --- a/__init__.py +++ b/__init__.py @@ -1,3 +1,4 @@ +import aiohttp import asyncio from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer import datetime @@ -248,8 +249,12 @@ async def receive_slack_msg(**payload): # Assumes that both markdown and plaintext need a newline together. needs_leading_newline = \ (len(msg) > 0 or len(formatted_attachments['markdown']) > 0) - formatted_files = slack_reformat.format_files_from_slack( - files, needs_leading_newline, SLACK_TOKEN, self.zulip_client) + + # TODO: We might not want a new client session for every message. + async with aiohttp.ClientSession() as session: + formatted_files = await slack_reformat.format_files_from_slack( + files, needs_leading_newline, + session, SLACK_TOKEN, self.zulip_client) zulip_message_text = \ msg + formatted_attachments['markdown'] + formatted_files['markdown'] diff --git a/requirements.txt b/requirements.txt index 54121d2..304a9f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +aiohttp==3.6.2 redis==3.2.1 requests==2.22.0 slackclient==2.5.0 diff --git a/slack_reformat.py b/slack_reformat.py index d020b3c..6d807c7 100644 --- a/slack_reformat.py +++ b/slack_reformat.py @@ -5,7 +5,6 @@ import logging import re import sys -import requests import traceback from io import BytesIO @@ -153,9 +152,9 @@ async def replace_markdown_link(m): _SLACK_LINK_MATCH, replace_markdown_link) - -def format_files_from_slack(files, needs_leading_newline, - slack_bearer_token=None, zulip_client=None): +async def format_files_from_slack(files, needs_leading_newline, + aiohttp_session=None, + slack_bearer_token=None, zulip_client=None): '''Given a list of files from the slack API, return both a markdown and plaintext string representation of those files. @@ -182,15 +181,19 @@ def format_files_from_slack(files, needs_leading_newline, if 'name' in file and file['name']: rendered_markdown_name = file['name'] - if slack_bearer_token and zulip_client and 'url_private' in file and file['url_private']: + if (aiohttp_session and slack_bearer_token and zulip_client + and 'url_private' in file and file['url_private']): file_private_url = file['url_private'] - r = requests.get(file_private_url, - headers={"Authorization": f"Bearer {slack_bearer_token}"}) - if r.status_code == 200: - if file_private_url != r.url: - # we were redirected! - _LOGGER.info( - f'Apparent slack redirect from {file_private_url} to {r.url} when bridging file. Skipping.') + r = await aiohttp_session.get(file_private_url, + headers={"Authorization": f"Bearer {slack_bearer_token}"}) + if r.status == 200: + uploadable_file = BytesIO(await r.content.read()) + uploadable_file.name = file['name'] + + # Note: Like other zulip methods, this will block and isn't async. + response = zulip_client.upload_file(uploadable_file) + if 'uri' in response and response['uri']: + rendered_markdown_name = f"[{file['name']}]({response['uri']})" else: uploadable_file = BytesIO(r.content) uploadable_file.name = file['name'] diff --git a/slack_reformat_test.py b/slack_reformat_test.py index 07825ee..4b6a952 100644 --- a/slack_reformat_test.py +++ b/slack_reformat_test.py @@ -199,17 +199,17 @@ def test_format_files_from_slack(self): # path for files. # None case - output = slack_reformat.format_files_from_slack(None, False) + output = do_await(slack_reformat.format_files_from_slack(None, False)) self.assertEqual(output['plaintext'], '') self.assertEqual(output['markdown'], '') # None case, leading newline - output = slack_reformat.format_files_from_slack(None, True) + output = do_await(slack_reformat.format_files_from_slack(None, True)) self.assertEqual(output['plaintext'], '') self.assertEqual(output['markdown'], '') # Base case - output = slack_reformat.format_files_from_slack([], False) + output = do_await(slack_reformat.format_files_from_slack([], False)) self.assertEqual(output['plaintext'], '') self.assertEqual(output['markdown'], '') @@ -236,17 +236,17 @@ def test_format_files_from_slack(self): "url_private": "https://files.slack.com/files-pri/T0000000-F0000000/filename.jpg" # ... and many omitted fields } - output = slack_reformat.format_files_from_slack([test_file], True) + output = do_await(slack_reformat.format_files_from_slack([test_file], True)) self.assertEqual(output['plaintext'], '\n(Bridged Message included file: filename.jpg)') self.assertEqual(output['markdown'], '\n*(Bridged Message included file: filename.jpg)*') # Same test, no leading newline. - output = slack_reformat.format_files_from_slack([test_file], False) + output = do_await(slack_reformat.format_files_from_slack([test_file], False)) self.assertEqual(output['plaintext'], '(Bridged Message included file: filename.jpg)') self.assertEqual(output['markdown'], '*(Bridged Message included file: filename.jpg)*') # Multiple files. - output = slack_reformat.format_files_from_slack([test_file, test_file], False) + output = do_await(slack_reformat.format_files_from_slack([test_file, test_file], False)) self.assertEqual(output['plaintext'], '(Bridged Message included file: filename.jpg)\n(Bridged Message included file: filename.jpg)') self.assertEqual(output['markdown'], @@ -254,13 +254,13 @@ def test_format_files_from_slack(self): # If we have a title that matches the filename, it should not be displayed. test_file['title'] = test_filename - output = slack_reformat.format_files_from_slack([test_file], True) + output = do_await(slack_reformat.format_files_from_slack([test_file], True)) self.assertEqual(output['plaintext'], '\n(Bridged Message included file: filename.jpg)') self.assertEqual(output['markdown'], '\n*(Bridged Message included file: filename.jpg)*') # Add a distinct title to the above: test_file['title'] = 'File Title' - output = slack_reformat.format_files_from_slack([test_file], True) + output = do_await(slack_reformat.format_files_from_slack([test_file], True)) self.assertEqual(output['plaintext'], '\n(Bridged Message included file: filename.jpg: \'File Title\')') self.assertEqual(output['markdown'], '\n*(Bridged Message included file: filename.jpg: \'File Title\')*') @@ -269,7 +269,7 @@ def test_format_files_from_slack(self): "id": "U0000000", "mode": "tombstone", } - output = slack_reformat.format_files_from_slack([test_file], False) + output = do_await(slack_reformat.format_files_from_slack([test_file], False)) self.assertEqual(output['plaintext'], '') self.assertEqual(output['markdown'], '') From 9aab76e95122a449ee99404c085e60900e575f2e Mon Sep 17 00:00:00 2001 From: Rob Siemborski Date: Sat, 18 Apr 2020 16:54:38 -0400 Subject: [PATCH 2/4] Use aiohttp for the upload of bridged files to zulip too. --- __init__.py | 3 ++- slack_reformat.py | 41 ++++++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/__init__.py b/__init__.py index e4edc0b..6daac2c 100644 --- a/__init__.py +++ b/__init__.py @@ -254,7 +254,8 @@ async def receive_slack_msg(**payload): async with aiohttp.ClientSession() as session: formatted_files = await slack_reformat.format_files_from_slack( files, needs_leading_newline, - session, SLACK_TOKEN, self.zulip_client) + session, SLACK_TOKEN, + ZULIP_URL, aiohttp.BasicAuth(ZULIP_BOT_EMAIL, ZULIP_API_KEY)) zulip_message_text = \ msg + formatted_attachments['markdown'] + formatted_files['markdown'] diff --git a/slack_reformat.py b/slack_reformat.py index 6d807c7..026d897 100644 --- a/slack_reformat.py +++ b/slack_reformat.py @@ -154,12 +154,15 @@ async def replace_markdown_link(m): async def format_files_from_slack(files, needs_leading_newline, aiohttp_session=None, - slack_bearer_token=None, zulip_client=None): + slack_bearer_token=None, + zulip_url=None, + aiohttp_zulip_basic_auth=None): '''Given a list of files from the slack API, return both a markdown and plaintext string representation of those files. - Assuming a bearer token and zulip client are provided, the files are mirrored to zulip - and those links are included in the markdown result. + Assuming a aiohttp Client Session, slack bearer token, root zulip URL, and an + aiohttp.BasicAuth for zulip are provided, the files are mirrored to zulip and + those links are included in the markdown result (but not the plaintext result). This method only uses the passed in message text to determine how to format its output caller must append as appropriate.''' @@ -181,7 +184,8 @@ async def format_files_from_slack(files, needs_leading_newline, if 'name' in file and file['name']: rendered_markdown_name = file['name'] - if (aiohttp_session and slack_bearer_token and zulip_client + if (aiohttp_session and slack_bearer_token + and zulip_url and aiohttp_zulip_basic_auth and 'url_private' in file and file['url_private']): file_private_url = file['url_private'] r = await aiohttp_session.get(file_private_url, @@ -190,19 +194,26 @@ async def format_files_from_slack(files, needs_leading_newline, uploadable_file = BytesIO(await r.content.read()) uploadable_file.name = file['name'] - # Note: Like other zulip methods, this will block and isn't async. - response = zulip_client.upload_file(uploadable_file) - if 'uri' in response and response['uri']: + file_dict = {'file': uploadable_file} + + # Because we want to use async io for this potentially long running request, + # we can't use the zulip client library. Instead, REST/OpenAPI it is. + upload_response = await aiohttp_session.post( + f'{zulip_url}/api/v1/user_uploads', + data=file_dict, + auth=aiohttp_zulip_basic_auth) + + response = {} + if upload_response.status == 200: + response = await upload_response.json() + else: + _LOGGER.info( + f"Upload to Zulip Failed for {file['name']}. Code {upload_response.status}.") + + if upload_response.status == 200 and 'uri' in response and response['uri']: rendered_markdown_name = f"[{file['name']}]({response['uri']})" else: - uploadable_file = BytesIO(r.content) - uploadable_file.name = file['name'] - - response = zulip_client.upload_file(uploadable_file) - if 'uri' in response and response['uri']: - rendered_markdown_name = f"[{file['name']}]({response['uri']})" - else: - _LOGGER.info('Got bad response when uploading to zulip: {}'.format(response)) + _LOGGER.info(f"Got bad response uploading to zulip for {file['name']}.. Body: {await upload_response.text()}") else: _LOGGER.info(f"Got code {r.status_code} when fetching {file_private_url} from slack.") From fc56cbe26383b6401c2b27ce457258682ccea5af Mon Sep 17 00:00:00 2001 From: Rob Siemborski Date: Sat, 18 Apr 2020 17:16:11 -0400 Subject: [PATCH 3/4] Remove obolete commented out code. --- __init__.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/__init__.py b/__init__.py index 6daac2c..dc53c38 100644 --- a/__init__.py +++ b/__init__.py @@ -229,18 +229,6 @@ async def receive_slack_msg(**payload): if 'files' in data: files = data['files'] - # TODO: When real support for 'files' is implemented, - # it should probably be in the format_attachments_for_zulip - # call. - - # if 'files' in data: - # for file in data['files']: - # web_client.files_sharedPublicURL(id=file['id']) - # if msg == '': - # msg = file['permalink_public'] - # else: - # msg += '\n' + file['permalink_public'] - formatted_attachments = \ await slack_reformat.format_attachments_from_slack( msg, attachments, From 862006b4858a77871b22aa8dec0577cc829826a6 Mon Sep 17 00:00:00 2001 From: Rob Siemborski Date: Mon, 20 Apr 2020 18:24:42 -0400 Subject: [PATCH 4/4] Detect if slack redirected us when bridging a file, async version. This likely indicates some failure, such as authentication. We do not retry, but that would be a nice addition as long as we don't block other handling. --- slack_reformat.py | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/slack_reformat.py b/slack_reformat.py index 026d897..b5d3df2 100644 --- a/slack_reformat.py +++ b/slack_reformat.py @@ -191,29 +191,34 @@ async def format_files_from_slack(files, needs_leading_newline, r = await aiohttp_session.get(file_private_url, headers={"Authorization": f"Bearer {slack_bearer_token}"}) if r.status == 200: - uploadable_file = BytesIO(await r.content.read()) - uploadable_file.name = file['name'] - - file_dict = {'file': uploadable_file} - - # Because we want to use async io for this potentially long running request, - # we can't use the zulip client library. Instead, REST/OpenAPI it is. - upload_response = await aiohttp_session.post( - f'{zulip_url}/api/v1/user_uploads', - data=file_dict, - auth=aiohttp_zulip_basic_auth) - - response = {} - if upload_response.status == 200: - response = await upload_response.json() - else: + if str(r.url) != file_private_url: + # we were redirected! _LOGGER.info( - f"Upload to Zulip Failed for {file['name']}. Code {upload_response.status}.") - - if upload_response.status == 200 and 'uri' in response and response['uri']: - rendered_markdown_name = f"[{file['name']}]({response['uri']})" + f'Apparent slack redirect from {file_private_url} to {str(r.url)} when bridging file. Skipping.') else: - _LOGGER.info(f"Got bad response uploading to zulip for {file['name']}.. Body: {await upload_response.text()}") + uploadable_file = BytesIO(await r.content.read()) + uploadable_file.name = file['name'] + + file_dict = {'file': uploadable_file} + + # Because we want to use async io for this potentially long running request, + # we can't use the zulip client library. Instead, REST/OpenAPI it is. + upload_response = await aiohttp_session.post( + f'{zulip_url}/api/v1/user_uploads', + data=file_dict, + auth=aiohttp_zulip_basic_auth) + + response = {} + if upload_response.status == 200: + response = await upload_response.json() + else: + _LOGGER.info( + f"Upload to Zulip Failed for {file['name']}. Code {upload_response.status}.") + + if upload_response.status == 200 and 'uri' in response and response['uri']: + rendered_markdown_name = f"[{file['name']}]({response['uri']})" + else: + _LOGGER.info(f"Got bad response uploading to zulip for {file['name']}.. Body: {await upload_response.text()}") else: _LOGGER.info(f"Got code {r.status_code} when fetching {file_private_url} from slack.")