diff --git a/tubular/amplitude_api.py b/tubular/amplitude_api.py deleted file mode 100644 index 5b31fb1d..00000000 --- a/tubular/amplitude_api.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Amplitude API class that is used to delete user from Amplitude. -""" -import logging -import requests -import json -import backoff -import os - -logger = logging.getLogger(__name__) -MAX_ATTEMPTS = int(os.environ.get("RETRY_MAX_ATTEMPTS", 5)) - - -class AmplitudeException(Exception): - """ - AmplitudeException will be raised there is fatal error and is not recoverable. - """ - pass - - -class AmplitudeRecoverableException(AmplitudeException): - """ - AmplitudeRecoverableException will be raised when request can be retryable. - """ - pass - - -class AmplitudeApi: - """ - Amplitude API is used to handle communication with Amplitude Api's. - """ - - def __init__(self, amplitude_api_key, amplitude_secret_key): - self.amplitude_api_key = amplitude_api_key - self.amplitude_secret_key = amplitude_secret_key - self.base_url = "https://amplitude.com/" - self.delete_user_path = "api/2/deletions/users" - - def auth(self): - """ - Returns auth credentials for Amplitude authorization. - - Returns: - Tuple: Returns authorization tuple. - """ - return (self.amplitude_api_key, self.amplitude_secret_key) - - - @backoff.on_exception( - backoff.expo, - AmplitudeRecoverableException, - max_tries = MAX_ATTEMPTS, - ) - def delete_user(self, user): - """ - This function send an API request to delete user from Amplitude. It then parse the response and - try again if it is recoverable. - - Returns: - None - - Args: - user (dict): raw data of user to delete. - - Raises: - AmplitudeException: if the error from amplitude is unrecoverable/unretryable. - AmplitudeRecoverableException: if the error from amplitude is recoverable/retryable. - """ - response = requests.post( - self.base_url + self.delete_user_path, - headers = {"Content-Type": "application/json"}, - json = { - "user_ids": [user["user"]["id"]], - 'ignore_invalid_id': 'true', # When true, the job ignores users that don't exist in the project. - "requester": "user-retirement-pipeline", - }, - auth = self.auth() - ) - - if response.status_code == 200: - logger.info("Amplitude user deletion succeeded") - return - - # We have some sort of error. Parse it, log it, and retry as needed. - error_msg = "Amplitude user deletion failed due to {reason}".format(reason=response.reason) - logger.error(error_msg) - # Status 429 is returned when there are too many requests and can be resolved in retrying sending - # request. - if response.status_code == 429 or 500 <= response.status_code < 600: - raise AmplitudeRecoverableException(error_msg) - else: - raise AmplitudeException(error_msg) diff --git a/tubular/braze_api.py b/tubular/braze_api.py deleted file mode 100644 index 247ceccc..00000000 --- a/tubular/braze_api.py +++ /dev/null @@ -1,85 +0,0 @@ -""" -Helper API classes for calling Braze APIs. -""" -import logging -import os - -import backoff -import requests - -LOG = logging.getLogger(__name__) -MAX_ATTEMPTS = int(os.environ.get('RETRY_BRAZE_MAX_ATTEMPTS', 5)) - - -class BrazeException(Exception): - pass - - -class BrazeRecoverableException(BrazeException): - pass - - -class BrazeApi: - """ - Braze API client used to make calls to Braze - """ - - def __init__(self, braze_api_key, braze_instance): - self.api_key = braze_api_key - - # https://www.braze.com/docs/api/basics/#endpoints - self.base_url = 'https://rest.{instance}.braze.com'.format(instance=braze_instance) - - def auth_headers(self): - """Returns authorization headers suitable for passing to the requests library""" - return { - 'Authorization': 'Bearer ' + self.api_key, - } - - @staticmethod - def get_error_message(response): - """Returns a string suitable for logging""" - try: - json = response.json() - except ValueError: - json = {} - - # https://www.braze.com/docs/api/errors - message = json.get('message') - - return message or response.reason - - def process_response(self, response, action): - """Log response status and raise an error as needed""" - if response.ok: - LOG.info('Braze {action} succeeded'.format(action=action)) - return - - # We have some sort of error. Parse it, log it, and retry as needed. - error_msg = 'Braze {action} failed due to {msg}'.format(action=action, msg=self.get_error_message(response)) - LOG.error(error_msg) - - if response.status_code == 429 or 500 <= response.status_code < 600: - raise BrazeRecoverableException(error_msg) - else: - raise BrazeException(error_msg) - - @backoff.on_exception( - backoff.expo, - BrazeRecoverableException, - max_tries=MAX_ATTEMPTS, - ) - def delete_user(self, learner): - """ - Delete a learner from Braze. - """ - # https://www.braze.com/docs/help/gdpr_compliance/#the-right-to-erasure - # https://www.braze.com/docs/api/endpoints/user_data/post_user_delete - response = requests.post( - self.base_url + '/users/delete', - headers=self.auth_headers(), - json={ - 'external_ids': [learner['user']['id']], # Braze external ids are LMS user ids - }, - ) - self.process_response(response, 'user deletion') diff --git a/tubular/hubspot_api.py b/tubular/hubspot_api.py deleted file mode 100644 index c196a8b9..00000000 --- a/tubular/hubspot_api.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Helper API classes for calling Hubspot APIs. -""" -import os -import logging - -import backoff -import requests - -from tubular.tubular_email import send_email - -LOG = logging.getLogger(__name__) -MAX_ATTEMPTS = int(os.environ.get('RETRY_HUBSPOT_MAX_ATTEMPTS', 5)) - -GET_VID_FROM_EMAIL_URL_TEMPLATE = "https://api.hubapi.com/contacts/v1/contact/email/{email}/profile" -DELETE_USER_FROM_VID_TEMPLATE = "https://api.hubapi.com/contacts/v1/contact/vid/{vid}" - - -class HubspotException(Exception): - pass - - -class HubspotAPI: - """ - Hubspot API client used to make calls to Hubspot - """ - - def __init__( - self, - hubspot_api_key, - aws_region, - from_address, - alert_email - ): - self.api_key = hubspot_api_key - self.aws_region = aws_region - self.from_address = from_address - self.alert_email = alert_email - - @backoff.on_exception( - backoff.expo, - HubspotException, - max_tries=MAX_ATTEMPTS - ) - def delete_user(self, learner): - """ - Delete a learner from hubspot using their email address. - """ - email = learner.get('original_email', None) - if not email: - raise TypeError('Expected an email address for user to delete, but received None.') - - user_vid = self.get_user_vid(email) - if user_vid: - self.delete_user_by_vid(user_vid) - - def delete_user_by_vid(self, vid): - """ - Delete a learner from hubspot using their Hubspot `vid` (unique identifier) - """ - headers = { - 'content-type': 'application/json', - 'authorization': f'Bearer {self.api_key}' - } - - req = requests.delete(DELETE_USER_FROM_VID_TEMPLATE.format( - vid=vid - ), headers=headers) - error_msg = "" - if req.status_code == 200: - LOG.info("User successfully deleted from Hubspot") - self.send_marketing_alert(vid) - elif req.status_code == 401: - error_msg = "Hubspot user deletion failed due to authorized API call" - elif req.status_code == 404: - error_msg = "Hubspot user deletion failed because vid doesn't match user" - elif req.status_code == 500: - error_msg = "Hubspot user deletion failed due to server-side (Hubspot) issues" - else: - error_msg = "Hubspot user deletion failed due to unknown reasons" - - if error_msg: - LOG.error(error_msg) - raise HubspotException(error_msg) - - def get_user_vid(self, email): - """ - Get a user's `vid` from Hubspot. `vid` is the terminology that hubspot uses for a user ids - """ - headers = { - 'content-type': 'application/json', - 'authorization': f'Bearer {self.api_key}' - } - - req = requests.get(GET_VID_FROM_EMAIL_URL_TEMPLATE.format( - email=email - ), headers=headers) - if req.status_code == 200: - req_data = req.json() - return req_data.get('vid') - elif req.status_code == 404: - LOG.info("No action taken because no user was found in Hubspot.") - return - else: - error_msg = "Error attempted to get user_vid from Hubspot. Error: {}".format( - req.text - ) - LOG.error(error_msg) - raise HubspotException(error_msg) - - def send_marketing_alert(self, vid): - """ - Notify marketing with user's Hubspot `vid` upon successful deletion. - """ - subject = "Alert: Hubspot Deletion" - body = "Learner with the VID \"{}\" has been deleted from Hubspot.".format(vid) - send_email( - self.aws_region, - self.from_address, - [self.alert_email], - subject, - body - ) diff --git a/tubular/scripts/get_learners_to_retire.py b/tubular/scripts/get_learners_to_retire.py deleted file mode 100755 index ef36008b..00000000 --- a/tubular/scripts/get_learners_to_retire.py +++ /dev/null @@ -1,105 +0,0 @@ -#! /usr/bin/env python3 - -""" -Command-line script to retrieve list of learners that have requested to be retired. -The script calls the appropriate LMS endpoint to get this list of learners. -""" - -from os import path -import io -import sys -import logging -import click -import yaml - -# Add top-level module path to sys.path before importing tubular code. -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - -from tubular.edx_api import LmsApi # pylint: disable=wrong-import-position -from tubular.jenkins import export_learner_job_properties # pylint: disable=wrong-import-position - -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) -LOG = logging.getLogger(__name__) - - -@click.command("get_learners_to_retire") -@click.option( - '--config_file', - help='File in which YAML config exists that overrides all other params.' -) -@click.option( - '--cool_off_days', - help='Number of days a learner should be in the retirement queue before being actually retired.', - default=7 -) -@click.option( - '--output_dir', - help="Directory in which to write the Jenkins properties files.", - default='./jenkins_props' -) -@click.option( - '--user_count_error_threshold', - help="If more users than this number are returned we will error out instead of retiring. This is a failsafe" - "against attacks that somehow manage to add users to the retirement queue.", - default=300 -) -@click.option( - '--max_user_batch_size', - help="This setting will only get at most X number of users. If this number is lower than the user_count_error_threshold" - "setting then it will not error.", - default=200 -) -def get_learners_to_retire(config_file, - cool_off_days, - output_dir, - user_count_error_threshold, - max_user_batch_size): - """ - Retrieves a JWT token as the retirement service user, then calls the LMS - endpoint to retrieve the list of learners awaiting retirement. - """ - if not config_file: - click.echo('A config file is required.') - sys.exit(-1) - - with io.open(config_file, 'r') as config: - config_yaml = yaml.safe_load(config) - - user_count_error_threshold = int(user_count_error_threshold) - cool_off_days = int(cool_off_days) - - client_id = config_yaml['client_id'] - client_secret = config_yaml['client_secret'] - lms_base_url = config_yaml['base_urls']['lms'] - retirement_pipeline = config_yaml['retirement_pipeline'] - end_states = [state[1] for state in retirement_pipeline] - states_to_request = ['PENDING'] + end_states - - api = LmsApi(lms_base_url, lms_base_url, client_id, client_secret) - - # Retrieve the learners to retire and export them to separate Jenkins property files. - learners_to_retire = api.learners_to_retire(states_to_request, cool_off_days, max_user_batch_size) - if max_user_batch_size: - learners_to_retire = learners_to_retire[:max_user_batch_size] - learners_to_retire_cnt = len(learners_to_retire) - - if learners_to_retire_cnt > user_count_error_threshold: - click.echo( - 'Too many learners to retire! Expected {} or fewer, got {}!'.format( - user_count_error_threshold, - learners_to_retire_cnt - ) - ) - sys.exit(-1) - - export_learner_job_properties( - learners_to_retire, - output_dir - ) - - -if __name__ == "__main__": - # pylint: disable=unexpected-keyword-arg, no-value-for-parameter - # If using env vars to provide params, prefix them with "RETIREMENT_", e.g. RETIREMENT_CLIENT_ID - get_learners_to_retire(auto_envvar_prefix='RETIREMENT') - diff --git a/tubular/scripts/helpers.py b/tubular/scripts/helpers.py index 06e0c4b1..cf720ae1 100644 --- a/tubular/scripts/helpers.py +++ b/tubular/scripts/helpers.py @@ -23,11 +23,6 @@ from tubular.edx_api import CredentialsApi, DemographicsApi, EcommerceApi, LicenseManagerApi, \ LmsApi # pylint: disable=wrong-import-position -from tubular.braze_api import BrazeApi # pylint: disable=wrong-import-position -from tubular.segment_api import SegmentApi # pylint: disable=wrong-import-position -from tubular.salesforce_api import SalesforceApi # pylint: disable=wrong-import-position -from tubular.hubspot_api import HubspotAPI # pylint: disable=wrong-import-position -from tubular.amplitude_api import AmplitudeApi # pylint: disable=wrong-import-position def _log(kind, message): @@ -151,35 +146,15 @@ def _setup_all_apis_or_exit(fail_func, fail_code, config): lms_base_url = config['base_urls']['lms'] ecommerce_base_url = config['base_urls'].get('ecommerce', None) credentials_base_url = config['base_urls'].get('credentials', None) - segment_base_url = config['base_urls'].get('segment', None) demographics_base_url = config['base_urls'].get('demographics', None) license_manager_base_url = config['base_urls'].get('license_manager', None) client_id = config['client_id'] client_secret = config['client_secret'] - braze_api_key = config.get('braze_api_key', None) - braze_instance = config.get('braze_instance', None) - amplitude_api_key = config.get('amplitude_api_key', None) - amplitude_secret_key = config.get('amplitude_secret_key', None) - salesforce_user = config.get('salesforce_user', None) - salesforce_password = config.get('salesforce_password', None) - salesforce_token = config.get('salesforce_token', None) - salesforce_domain = config.get('salesforce_domain', None) - salesforce_assignee = config.get('salesforce_assignee', None) - segment_auth_token = config.get('segment_auth_token', None) - segment_workspace_slug = config.get('segment_workspace_slug', None) - hubspot_api_key = config.get('hubspot_api_key', None) - hubspot_aws_region = config.get('hubspot_aws_region', None) - hubspot_from_address = config.get('hubspot_from_address', None) - hubspot_alert_email = config.get('hubspot_alert_email', None) for state in config['retirement_pipeline']: for service, service_url in ( - ('BRAZE', braze_api_key), - ('AMPLITUDE', amplitude_api_key), ('ECOMMERCE', ecommerce_base_url), ('CREDENTIALS', credentials_base_url), - ('SEGMENT', segment_base_url), - ('HUBSPOT', hubspot_api_key), ('DEMOGRAPHICS', demographics_base_url) ): if state[2] == service and service_url is None: @@ -187,35 +162,6 @@ def _setup_all_apis_or_exit(fail_func, fail_code, config): config['LMS'] = LmsApi(lms_base_url, lms_base_url, client_id, client_secret) - if braze_api_key: - config['BRAZE'] = BrazeApi( - braze_api_key, - braze_instance, - ) - - if amplitude_api_key and amplitude_secret_key: - config['AMPLITUDE'] = AmplitudeApi( - amplitude_api_key, - amplitude_secret_key, - ) - - if salesforce_user and salesforce_password and salesforce_token: - config['SALESFORCE'] = SalesforceApi( - salesforce_user, - salesforce_password, - salesforce_token, - salesforce_domain, - salesforce_assignee - ) - - if hubspot_api_key: - config['HUBSPOT'] = HubspotAPI( - hubspot_api_key, - hubspot_aws_region, - hubspot_from_address, - hubspot_alert_email - ) - if ecommerce_base_url: config['ECOMMERCE'] = EcommerceApi(lms_base_url, ecommerce_base_url, client_id, client_secret) @@ -233,11 +179,5 @@ def _setup_all_apis_or_exit(fail_func, fail_code, config): client_secret, ) - if segment_base_url: - config['SEGMENT'] = SegmentApi( - segment_base_url, - segment_auth_token, - segment_workspace_slug - ) except Exception as exc: # pylint: disable=broad-except fail_func(fail_code, 'Unexpected error occurred!', exc) diff --git a/tubular/scripts/replace_usernames.py b/tubular/scripts/replace_usernames.py deleted file mode 100644 index b3913fda..00000000 --- a/tubular/scripts/replace_usernames.py +++ /dev/null @@ -1,147 +0,0 @@ -#! /usr/bin/env python3 - -""" -Command-line script to replace the usernames for all passed in learners. -Accepts a list of current usernames and their preferred new username. This -script will call LMS first which generates a unique username if the passed in -new username is not unique. It then calls all other services to replace the -username in their DBs. - -""" - -from os import path -import csv -import io -import sys -import logging -import click -import yaml - -# Add top-level module path to sys.path before importing tubular code. -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - -from tubular.edx_api import CredentialsApi, DiscoveryApi, EcommerceApi, LmsApi # pylint: disable=wrong-import-position - -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) -LOG = logging.getLogger(__name__) - - -def write_responses(writer, replacements, status): - for replacement in replacements: - original_username = list(replacement.keys())[0] - new_username = list(replacement.values())[0] - writer.writerow([original_username, new_username, status]) - - -@click.command("replace_usernames") -@click.option( - '--config_file', - help='File in which YAML config exists that overrides all other params.' -) -@click.option( - '--username_replacement_csv', - help='File in which YAML config exists that overrides all other params.' -) -def replace_usernames(config_file, username_replacement_csv): - """ - Retrieves a JWT token as the retirement service user, then calls the LMS - endpoint to retrieve the list of learners awaiting retirement. - - Config file example: - ``` - client_id: xxx - client_secret: xxx - base_urls: - lms: http://localhost:18000 - ecommerce: http://localhost:18130 - discovery: http://localhost:18381 - credentials: http://localhost:18150 - ``` - - Username file example: - ``` - current_un_1,desired_un_1 - current_un_2,desired_un_2, - current_un_3,desired_un_3 - ``` - """ - if not config_file: - click.echo('A config file is required.') - sys.exit(-1) - - if not username_replacement_csv: - click.echo('A username replacement CSV file is required') - sys.exit(-1) - - with io.open(config_file, 'r') as config: - config_yaml = yaml.safe_load(config) - - with io.open(username_replacement_csv, 'r') as replacement_file: - csv_reader = csv.reader(replacement_file) - lms_username_mappings = [ - {current_username: desired_username} - for (current_username, desired_username) - in csv_reader - ] - - client_id = config_yaml['client_id'] - client_secret = config_yaml['client_secret'] - lms_base_url = config_yaml['base_urls']['lms'] - ecommerce_base_url = config_yaml['base_urls']['ecommerce'] - discovery_base_url = config_yaml['base_urls']['discovery'] - credentials_base_url = config_yaml['base_urls']['credentials'] - - # Note that though partially_failed sounds better than completely_failed, - # it's actually worse since the user is not consistant across DBs. - # Partially failed username replacements will need to be triaged so the - # user isn't in a broken state - successful_replacements = [] - partially_failed_replacements = [] - fully_failed_replacements = [] - - lms_api = LmsApi(lms_base_url, lms_base_url, client_id, client_secret) - ecommerce_api = EcommerceApi(lms_base_url, ecommerce_base_url, client_id, client_secret) - discovery_api = DiscoveryApi(lms_base_url, discovery_base_url, client_id, client_secret) - credentials_api = CredentialsApi(lms_base_url, credentials_base_url, client_id, client_secret) - - # Call LMS with current and desired usernames - response = lms_api.replace_lms_usernames(lms_username_mappings) - fully_failed_replacements += response['failed_replacements'] - in_progress_replacements = response['successful_replacements'] - - # Step through each services endpoints with the list returned from LMS. - # The LMS list has already verified usernames and made any duplicate - # usernames unique (e.g. 'matt' => 'mattf56a'). We pass successful - # replacements onto the next service and store all failed replacments. - replacement_methods = [ - ecommerce_api.replace_usernames, - discovery_api.replace_usernames, - credentials_api.replace_usernames, - lms_api.replace_forums_usernames, - ] - # Iterate through the endpoints above and if the APIs return any failures - # capture these in partially_failed_replacements. Only successfuly - # replacements will continue to be passed to the next service. - for replacement_method in replacement_methods: - response = replacement_method(in_progress_replacements) - partially_failed_replacements += response['failed_replacements'] - in_progress_replacements = response['successful_replacements'] - - successful_replacements = in_progress_replacements - - with open('username_replacement_results.csv', 'w', newline='') as output_file: - csv_writer = csv.writer(output_file) - # Write header - csv_writer.writerow(['Original Username', 'New Username', 'Status']) - write_responses(csv_writer, successful_replacements, "SUCCESS") - write_responses(csv_writer, partially_failed_replacements, "PARTIALLY FAILED") - write_responses(csv_writer, fully_failed_replacements, "FAILED") - - if partially_failed_replacements or fully_failed_replacements: - sys.exit(-1) - - -if __name__ == "__main__": - # pylint: disable=unexpected-keyword-arg, no-value-for-parameter - # If using env vars to provide params, prefix them with "RETIREMENT_", e.g. RETIREMENT_CLIENT_ID - replace_usernames(auto_envvar_prefix='USERNAME_REPLACEMENT') diff --git a/tubular/scripts/retirement_archive_and_cleanup.py b/tubular/scripts/retirement_archive_and_cleanup.py deleted file mode 100644 index 03ca93d4..00000000 --- a/tubular/scripts/retirement_archive_and_cleanup.py +++ /dev/null @@ -1,334 +0,0 @@ -#! /usr/bin/env python3 -""" -Command-line script to bulk archive and cleanup retired learners from LMS -""" - - -import datetime -import gzip -import json -import logging -import sys -import time -from functools import partial -from os import path - -import backoff -import boto3 -import click -from botocore.exceptions import BotoCoreError, ClientError -from six import text_type - -# Add top-level module path to sys.path before importing tubular code. -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - -# pylint: disable=wrong-import-position -from tubular.scripts.helpers import ( - _config_or_exit, _fail, _fail_exception, _log, _setup_lms_api_or_exit -) - - -SCRIPT_SHORTNAME = 'Archive and Cleanup' - -# Return codes for various fail cases -ERR_NO_CONFIG = -1 -ERR_BAD_CONFIG = -2 -ERR_FETCHING = -3 -ERR_ARCHIVING = -4 -ERR_DELETING = -5 -ERR_SETUP_FAILED = -5 -ERR_BAD_CLI_PARAM = -6 - -LOG = partial(_log, SCRIPT_SHORTNAME) -FAIL = partial(_fail, SCRIPT_SHORTNAME) -FAIL_EXCEPTION = partial(_fail_exception, SCRIPT_SHORTNAME) -CONFIG_OR_EXIT = partial(_config_or_exit, FAIL_EXCEPTION, ERR_BAD_CONFIG) -SETUP_LMS_OR_EXIT = partial(_setup_lms_api_or_exit, FAIL, ERR_SETUP_FAILED) - -DELAY = 10 - - -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) -logging.getLogger('boto').setLevel(logging.INFO) - - -def _fetch_learners_to_archive_or_exit(config, start_date, end_date, initial_state): - """ - Makes the call to fetch learners to be cleaned up, returns the list of learners or exits. - """ - LOG('Fetching users in state {} created from {} to {}'.format(initial_state, start_date, end_date)) - try: - learners = config['LMS'].get_learners_by_date_and_status(initial_state, start_date, end_date) - LOG('Successfully fetched {} learners'.format(str(len(learners)))) - return learners - except Exception as exc: # pylint: disable=broad-except - FAIL_EXCEPTION(ERR_FETCHING, 'Unexpected error occurred fetching users to update!', exc) - - -def _batch_learners(learners=None, batch_size=None): - """ - To avoid potentially overwheling the LMS with a large number of user retirements to - delete, create a list of smaller batches of users to iterate over. This has the - added benefit of reducing the amount of user retirement archive requests that can - get into a bad state should this script experience an error. - - Args: - learners (list): List of learners to portion into smaller batches (lists) - batch_size (int): The number of learners to portion into each batch. If this - parameter is not supplied, this function will return one batch containing - all of the learners supplied to it. - """ - if batch_size: - return [ - learners[i:i+batch_size] for i, _ in list(enumerate(learners))[::batch_size] - ] - else: - return [learners] - - -def _on_s3_backoff(details): - """ - Callback that is called when backoff... backs off - """ - LOG("Backing off {wait:0.1f} seconds after {tries} tries calling function {target}".format(**details)) - - -@backoff.on_exception( - backoff.expo, - ( - ClientError, - BotoCoreError - ), - on_backoff=lambda details: _on_s3_backoff(details), # pylint: disable=unnecessary-lambda, - max_time=120, # 2 minutes -) -def _upload_to_s3(config, filename, dry_run=False): - """ - Upload the archive file to S3 - """ - try: - datestr = datetime.datetime.now().strftime('%Y/%m/') - s3 = boto3.resource('s3') - bucket_name = config['s3_archive']['bucket_name'] - # Dry runs of this script should only generate the retirement archive file, not push it to s3. - bucket = s3.Bucket(bucket_name) - key = 'raw/' + datestr + filename - if dry_run: - LOG('Dry run. Skipping the step to upload data to {}'.format(key)) - return - else: - bucket.upload_file(filename, key) - LOG('Successfully uploaded retirement data to {}'.format(key)) - except Exception as exc: - LOG(text_type(exc)) - raise - - -def _format_datetime_for_athena(timestamp): - """ - Takes a JSON serialized timestamp string and returns a format of it that is queryable as a datetime in Athena - """ - return timestamp.replace('T', ' ').rstrip('Z') - - -def _archive_retirements_or_exit(config, learners, dry_run=False): - """ - Creates an archive file with all of the retirements and uploads it to S3 - - The format of learners from LMS should be a list of these: - { - 'id': 46, # This is the UserRetirementStatus ID! - 'user': - { - 'id': 5213599, # THIS is the LMS User ID - 'username': 'retired__user_88ad587896920805c26041a2e75c767c75471ee9', - 'email': 'retired__user_d08919da55a0e03c032425567e4a33e860488a96@retired.invalid', - 'profile': - { - 'id': 2842382, - 'name': '' - } - }, - 'current_state': - { - 'id': 41, - 'state_name': 'COMPLETE', - 'state_execution_order': 13 - }, - 'last_state': { - 'id': 1, - 'state_name': 'PENDING', - 'state_execution_order': 1 - }, - 'created': '2018-10-18T20:35:52.349757Z', # This is the UserRetirementStatus creation date - 'modified': '2018-10-18T20:35:52.350050Z', # This is the UserRetirementStatus last touched date - 'original_username': 'retirement_test', - 'original_email': 'orig@foo.invalid', - 'original_name': 'Retirement Test', - 'retired_username': 'retired__user_88ad587896920805c26041a2e75c767c75471ee9', - 'retired_email': 'retired__user_d08919da55a0e03c032425567e4a33e860488a96@retired.invalid' - } - """ - LOG('Archiving retirements for {} learners to {}'.format(len(learners), config['s3_archive']['bucket_name'])) - try: - now = _get_utc_now() - filename = 'retirement_archive_{}.json.gz'.format(now.strftime('%Y_%d_%m_%H_%M_%S')) - LOG('Creating retirement archive file {}'.format(filename)) - - # The file format is one JSON object per line with the newline as a separator. This allows for - # easy queries via AWS Athena if we need to confirm learner deletion. - with gzip.open(filename, 'wt') as out: - for learner in learners: - user = { - 'user_id': learner['user']['id'], - 'original_username': learner['original_username'], - 'original_email': learner['original_email'], - 'original_name': learner['original_name'], - 'retired_username': learner['retired_username'], - 'retired_email': learner['retired_email'], - 'retirement_request_date': _format_datetime_for_athena(learner['created']), - 'last_modified_date': _format_datetime_for_athena(learner['modified']), - } - json.dump(user, out) - out.write("\n") - if dry_run: - LOG('Dry run. Logging the contents of {} for debugging'.format(filename)) - with gzip.open(filename, 'r') as archive_file: - for line in archive_file.readlines(): - LOG(line) - _upload_to_s3(config, filename, dry_run) - except Exception as exc: # pylint: disable=broad-except - FAIL_EXCEPTION(ERR_ARCHIVING, 'Unexpected error occurred archiving retirements!', exc) - - -def _cleanup_retirements_or_exit(config, learners): - """ - Bulk deletes the retirements for this run - """ - LOG('Cleaning up retirements for {} learners'.format(len(learners))) - try: - usernames = [l['original_username'] for l in learners] - config['LMS'].bulk_cleanup_retirements(usernames) - except Exception as exc: # pylint: disable=broad-except - FAIL_EXCEPTION(ERR_DELETING, 'Unexpected error occurred deleting retirements!', exc) - -def _get_utc_now(): - """ - Helper function only used to make unit test mocking/patching easier. - """ - return datetime.datetime.utcnow() - - -@click.command("archive_and_cleanup") -@click.option( - '--config_file', - help='YAML file that contains retirement-related configuration for this environment.' -) -@click.option( - '--cool_off_days', - help='Number of days a retirement should exist before being archived and deleted.', - type=int, - default=37 # 7 days before retirement, 30 after -) -@click.option( - '--dry_run', - help=''' - Should this script be run in a dry-run mode, in which generated retirement - archive files are not pushed to s3 and retirements are not cleaned up in the LMS - ''', - type=bool, - default=False -) -@click.option( - '--start_date', - help=''' - Start of window used to select user retirements for archival. Only user retirements - added to the retirement queue after this date will be processed. - ''', - type=click.DateTime(formats=['%Y-%m-%d']) -) -@click.option( - '--end_date', - help=''' - End of window used to select user retirments for archival. Only user retirments - added to the retirement queue before this date will be processed. In the case that - this date is more recent than the value specified in the `cool_off_days` parameter, - an error will be thrown. If this parameter is not used, the script will default to - using an end_date based upon the `cool_off_days` parameter. - ''', - type=click.DateTime(formats=['%Y-%m-%d']) -) -@click.option( - '--batch_size', - help='Number of user retirements to process', - type=int -) -def archive_and_cleanup(config_file, cool_off_days, dry_run, start_date, end_date, batch_size): - """ - Cleans up UserRetirementStatus rows in LMS by: - 1- Getting all rows currently in COMPLETE that were created --cool_off_days ago or more, - unless a specific timeframe is specified - 2- Archiving them to S3 in an Athena-queryable format - 3- Deleting them from LMS (by username) - """ - try: - LOG('Starting bulk update script: Config: {}'.format(config_file)) - - if not config_file: - FAIL(ERR_NO_CONFIG, 'No config file passed in.') - - config = CONFIG_OR_EXIT(config_file) - SETUP_LMS_OR_EXIT(config) - - if not start_date: - # This date is just a bogus "earliest possible value" since the call requires one - start_date = datetime.datetime.strptime('2018-01-01', '%Y-%m-%d') - if end_date: - if end_date > _get_utc_now() - datetime.timedelta(days=cool_off_days): - FAIL(ERR_BAD_CLI_PARAM, 'End date cannot occur within the cool_off_days period') - else: - # Set an end_date of `cool_off_days` days before the time that this script is run - end_date = _get_utc_now() - datetime.timedelta(days=cool_off_days) - - if start_date >= end_date: - FAIL(ERR_BAD_CLI_PARAM, 'Conflicting start and end dates passed on CLI') - - - LOG( - 'Fetching retirements for learners that have a COMPLETE status and were created ' - 'between {} and {}.'.format( - start_date, end_date - ) - ) - learners = _fetch_learners_to_archive_or_exit( - config, start_date, end_date, 'COMPLETE' - ) - - learners_to_process = _batch_learners(learners, batch_size) - num_batches = len(learners_to_process) - - if learners_to_process: - for index, batch in enumerate(learners_to_process): - LOG( - 'Processing batch {} out of {} of user retirement requests'.format( - str(index + 1), str(num_batches) - ) - ) - _archive_retirements_or_exit(config, batch, dry_run) - - if dry_run: - LOG('This is a dry-run. Exiting before any retirements are cleaned up') - else: - _cleanup_retirements_or_exit(config, batch) - LOG('Archive and cleanup complete for batch #{}'.format(str(index + 1))) - time.sleep(DELAY) - else: - LOG('No learners found!') - except Exception as exc: - LOG(text_type(exc)) - raise - - -if __name__ == '__main__': - # pylint: disable=unexpected-keyword-arg, no-value-for-parameter - archive_and_cleanup(auto_envvar_prefix='RETIREMENT') diff --git a/tubular/scripts/retirement_bulk_status_update.py b/tubular/scripts/retirement_bulk_status_update.py deleted file mode 100755 index a6b41676..00000000 --- a/tubular/scripts/retirement_bulk_status_update.py +++ /dev/null @@ -1,155 +0,0 @@ -#! /usr/bin/env python3 -""" -Command-line script to bulk update retirement states in LMS -""" - - -from datetime import datetime -from functools import partial -from os import path -import logging -import sys - -import click -from six import text_type - -# Add top-level module path to sys.path before importing tubular code. -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - -# pylint: disable=wrong-import-position -from tubular.scripts.helpers import ( - _config_or_exit, - _fail, - _fail_exception, - _log, - _setup_lms_api_or_exit -) - - -SCRIPT_SHORTNAME = 'Bulk Status' - -# Return codes for various fail cases -ERR_NO_CONFIG = -1 -ERR_BAD_CONFIG = -2 -ERR_FETCHING = -3 -ERR_UPDATING = -4 -ERR_SETUP_FAILED = -5 - -LOG = partial(_log, SCRIPT_SHORTNAME) -FAIL = partial(_fail, SCRIPT_SHORTNAME) -FAIL_EXCEPTION = partial(_fail_exception, SCRIPT_SHORTNAME) -CONFIG_OR_EXIT = partial(_config_or_exit, FAIL_EXCEPTION, ERR_BAD_CONFIG) -SETUP_LMS_OR_EXIT = partial(_setup_lms_api_or_exit, FAIL, ERR_SETUP_FAILED) - - -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) - - -def validate_dates(_, __, value): - """ - Click input validator for date options. - - Validates string format - - Transforms the string into a datetime.Date object - - Validates the date is less than or equal to today - - Returns the Date, or raises a click.BadParameter - """ - try: - date = datetime.strptime(value, '%Y-%m-%d').date() - if date > datetime.now().date(): - raise ValueError() - return date - except ValueError: - raise click.BadParameter('Dates need to be in the format of YYYY-MM-DD and today or earlier.') - - -def _fetch_learners_to_update_or_exit(config, start_date, end_date, initial_state): - """ - Makes the call to fetch learners to be bulk updated, returns the list of learners - or exits. - """ - LOG('Fetching users in state {} created from {} to {}'.format(initial_state, start_date, end_date)) - try: - return config['LMS'].get_learners_by_date_and_status(initial_state, start_date, end_date) - except Exception as exc: # pylint: disable=broad-except - FAIL_EXCEPTION(ERR_FETCHING, 'Unexpected error occurred fetching users to update!', exc) - - -def _update_learners_or_exit(config, learners, new_state=None, rewind_state=False): - """ - Iterates the list of learners, setting each to the new state. On any error - it will exit the script. If rewind_state is set to True then the learner - will be reset to their previous state. - """ - if (not new_state and not rewind_state) or (rewind_state and new_state): - FAIL(ERR_BAD_CONFIG, "You must specify either the boolean rewind_state or a new state to set learners to.") - LOG('Updating {} learners to {}'.format(len(learners), new_state)) - try: - for learner in learners: - if rewind_state: - new_state = learner['last_state']['state_name'] - config['LMS'].update_learner_retirement_state( - learner['original_username'], - new_state, - 'Force updated via retirement_bulk_status_update Tubular script', - force=True - ) - except Exception as exc: # pylint: disable=broad-except - FAIL_EXCEPTION(ERR_UPDATING, 'Unexpected error occurred updating users!', exc) - - -@click.command("update_statuses") -@click.option( - '--config_file', - help='YAML file that contains retirement-related configuration for this environment.' -) -@click.option( - '--initial_state', - help='Find learners in this retirement state. Use the state name ex: PENDING, COMPLETE' -) -@click.option( - '--new_state', - help='Set any found learners to this new state. Use the state name ex: PENDING, COMPLETE', - default=None -) -@click.option( - '--start_date', - callback=validate_dates, - help='(YYYY-MM-DD) Earliest creation date for retirements to act on.' -) -@click.option( - '--end_date', - callback=validate_dates, - help='(YYYY-MM-DD) Latest creation date for retirements to act on.' -) -@click.option( - '--rewind-state', - help='Rewinds to the last_state for learners. Useful for resetting ERRORED users', - default=False, - is_flag=True -) -def update_statuses(config_file, initial_state, new_state, start_date, end_date, rewind_state): - """ - Bulk-updates user retirement statuses which are in the specified state -and- retirement was - requested between a start date and end date. - """ - try: - LOG('Starting bulk update script: Config: {}'.format(config_file)) - - if not config_file: - FAIL(ERR_NO_CONFIG, 'No config file passed in.') - - config = CONFIG_OR_EXIT(config_file) - SETUP_LMS_OR_EXIT(config) - - learners = _fetch_learners_to_update_or_exit(config, start_date, end_date, initial_state) - _update_learners_or_exit(config, learners, new_state, rewind_state) - - LOG('Bulk update complete') - except Exception as exc: - print(text_type(exc)) - raise - - -if __name__ == '__main__': - # pylint: disable=unexpected-keyword-arg, no-value-for-parameter - update_statuses(auto_envvar_prefix='RETIREMENT') diff --git a/tubular/tests/mixins.py b/tubular/tests/mixins.py deleted file mode 100644 index 49aba2c4..00000000 --- a/tubular/tests/mixins.py +++ /dev/null @@ -1,23 +0,0 @@ -from urllib.parse import urljoin - -import responses - -from tubular import edx_api - -FAKE_ACCESS_TOKEN = 'THIS_IS_A_JWT' -CONTENT_TYPE = 'application/json' - - -class OAuth2Mixin: - @staticmethod - def mock_access_token_response(status=200): - """ - Mock POST requests to retrieve an access token for this site's service user. - """ - responses.add( - responses.POST, - urljoin('http://localhost:18000/', edx_api.OAUTH_ACCESS_TOKEN_URL), - status=status, - json={'access_token': FAKE_ACCESS_TOKEN, 'expires_in': 60}, - content_type=CONTENT_TYPE - )