diff --git a/.github/workflows/tag_and_publish.yml b/.github/workflows/publish_dev.yml similarity index 67% rename from .github/workflows/tag_and_publish.yml rename to .github/workflows/publish_dev.yml index c71355d..bed4e37 100644 --- a/.github/workflows/tag_and_publish.yml +++ b/.github/workflows/publish_dev.yml @@ -1,20 +1,19 @@ -name: Tag and publish +name: Publish dev on: push: branches: - - main + - dev + jobs: - tag: - uses: AllenNeuralDynamics/aind-github-actions/.github/workflows/tag.yml@main - secrets: - SERVICE_TOKEN: ${{ secrets.SERVICE_TOKEN }} publish: runs-on: ubuntu-latest - needs: tag steps: - uses: actions/checkout@v3 - - name: Pull latest changes - run: git pull origin main + - name: Compute new docker image tag + run: | + echo "sha_short=$(git rev-parse --short "$GITHUB_SHA")" >> "$GITHUB_ENV" + echo "branch=$(echo ${GITHUB_REF_NAME})" >> "$GITHUB_ENV" + echo "docker_tag=$(echo ${GITHUB_REF_NAME})-$(git rev-parse --short "$GITHUB_SHA")" >> "$GITHUB_ENV" - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v2 @@ -31,5 +30,5 @@ jobs: context: . push: true tags: | - ghcr.io/allenneuraldynamics/aind-data-transfer-service:${{ needs.tag.outputs.new_version }} - ghcr.io/allenneuraldynamics/aind-data-transfer-service:latest + ghcr.io/allenneuraldynamics/aind-data-transfer-service:${{ env.docker_tag }} + ghcr.io/allenneuraldynamics/aind-data-transfer-service:dev diff --git a/.github/workflows/publish_main.yml b/.github/workflows/publish_main.yml new file mode 100644 index 0000000..a12722e --- /dev/null +++ b/.github/workflows/publish_main.yml @@ -0,0 +1,43 @@ +name: Tag and publish main +on: + push: + branches: + - main + +jobs: + tag_and_publish: + name: Parse version + runs-on: ubuntu-latest + outputs: + pkg_version: ${{ steps.output_version.outputs.pkg_version }} + steps: + - uses: actions/checkout@v3 + - name: Get version from file + run: | + pkg_name=$(grep -P 'version = \{attr = .*\}' pyproject.toml | grep -oP '\w+.__version__') + init_file="./src/${pkg_name//.__version__}/__init__.py" + pkg_version=$(grep -Po '[0-9]+\.[0-9]+\.[0-9]+' "$init_file") + echo "docker_tag=$pkg_version" >> "$GITHUB_ENV" + - name: Create git tag + run: | + git tag "v${{ env.docker_tag }}" + - name: Push git tag + run: git push origin "v${{ env.docker_tag }}" + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + - name: Login to Github Packages + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build image and push to GitHub Container Registry + uses: docker/build-push-action@v3 + with: + # relative path to the place where source code with Dockerfile is located + context: . + push: true + tags: | + ghcr.io/allenneuraldynamics/aind-data-transfer-service:${{ env.docker_tag }} + ghcr.io/allenneuraldynamics/aind-data-transfer-service:latest diff --git a/.github/workflows/test_and_lint.yml b/.github/workflows/run_dev_tests.yml similarity index 94% rename from .github/workflows/test_and_lint.yml rename to .github/workflows/run_dev_tests.yml index 51a8319..b91f152 100644 --- a/.github/workflows/test_and_lint.yml +++ b/.github/workflows/run_dev_tests.yml @@ -1,9 +1,9 @@ -name: Lint and run tests +name: Run checks in dev on: pull_request: branches: - - main + - dev jobs: ci: diff --git a/.github/workflows/run_main_tests.yml b/.github/workflows/run_main_tests.yml new file mode 100644 index 0000000..0e32080 --- /dev/null +++ b/.github/workflows/run_main_tests.yml @@ -0,0 +1,43 @@ +name: Run checks in main and release + +on: + pull_request: + branches: + - '*release*' + - main + +jobs: + ci: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.9', '3.10', '3.11' ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install -e .[dev] + - name: Run linter checks + run: flake8 . && interrogate --verbose . + - name: Run tests and coverage + run: coverage run -m unittest discover && coverage report + verify_version: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Check version incremented + run: | + pkg_name=$(grep -P 'version = \{attr = .*\}' pyproject.toml | grep -oP '\w+.__version__') + init_file="./src/${pkg_name//.__version__}/__init__.py" + pkg_version=$(grep -Po '[0-9]+\.[0-9]+\.[0-9]+' "$init_file") + latest_tag=$(git ls-remote --tags --refs --sort="v:refname" | tail -n1 | sed 's/.*\///') + echo "Checking pkg_version v$pkg_version and latest_tag $latest_tag" + if [ "$latest_tag" == "v$pkg_version" ] + then + exit 1 + fi + echo "Versions are different" diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..be755ca --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,14 @@ +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.9" + +python: + install: + - method: pip + path: . + extra_requirements: + - dev + - docs diff --git a/README.md b/README.md index 163116e..daa07cd 100644 --- a/README.md +++ b/README.md @@ -6,194 +6,4 @@ This service can be used to upload data stored in a VAST drive. It uses FastAPI to upload a job submission csv file that will be used to trigger a data transfer job in an on-prem HPC. Based on the information provided in the file, the data upload process fetches the appropriate metadata and starts the upload process. -## Metadata Sources - -The associated metadata files get pulled from different sources. - -- subject from LabTracks -- procedures from NSB Sharepoint, TARS -- instrument/rig from SLIMS - - -## Usage - -There are two options for uploading data: a python API or a browser UI service. - -### Browser UI -You can go to http://aind-data-transfer-service to submit a `.csv` or `.xlsx` file with the necessary parameters needed to launch a data upload job. Click on **Job Submit Template** to download a template which you may use as a reference. - -What each column means in the job submission template: - -- **project_name**: Project name. A full list can be downloaded at [Project Names](http://aind-metadata-service/project_names) -- **process_capsule_id**: Optional Code Ocean capsule or pipeline to run when data is uploaded -- **input_data_mount**: Optional data mount when running a custom pipeline -- **platform**: For a list of platforms click [here](https://github.com/AllenNeuralDynamics/aind-data-schema/blob/main/src/aind_data_schema/models/platforms.py). -- **acq_datetime**: The time that the data was acquired -- **subject_id**: The unique id of the subject -- **modality0**: For a list of modalities, click [here](https://github.com/AllenNeuralDynamics/aind-data-schema/blob/main/src/aind_data_schema/models/modalities.py). -- **modality0.source**: The source (path to file) of **modality0** in VAST drive -- **metadata_dir**: An optional folder for pre-compiled metadata json files - -Modify the job template as needed and click on **Browse** to upload the file. A rendered table with a message **Successfully validated jobs from file** appears to indicate a valid file. If there are errors in the job submit file, a message that says **Error validating jobs from file** appears. - -To launch a data upload job, click on `Submit`. A message that says **Successfuly submitted jobs** should appear. - -After submission, click on `Job Status` to see the status of the data upload job process. - -### Python API -It's also possible to submit a job via a python api. Here is an example script that can be used. - -Assuming that the data on a shared drive is organized as: -``` -/shared_drive/vr_foraging/690165/20240219T112517 - - Behavior - - Behavior videos - - Configs -``` -then a job request can be submitted as: -```python -from aind_data_transfer_service.configs.job_configs import ModalityConfigs, BasicUploadJobConfigs -from pathlib import PurePosixPath -import json -import requests - -from aind_data_transfer_models.core import ModalityConfigs, BasicUploadJobConfigs, SubmitJobRequest -from aind_data_schema_models.modalities import Modality -from aind_data_schema_models.platforms import Platform -from datetime import datetime - -source_dir = PurePosixPath("/shared_drive/vr_foraging/690165/20240219T112517") - -s3_bucket = "private" -subject_id = "690165" -acq_datetime = datetime(2024, 2, 19, 11, 25, 17) -platform = Platform.BEHAVIOR - - -behavior_config = ModalityConfigs(modality=Modality.BEHAVIOR, source=(source_dir / "Behavior")) -behavior_videos_config = ModalityConfigs(modality=Modality.BEHAVIOR_VIDEOS, source=(source_dir / "Behavior videos")) -metadata_dir = source_dir / "Config" # This is an optional folder of pre-compiled metadata json files -project_name="Ephys Platform" - -upload_job_configs = BasicUploadJobConfigs( - project_name=project_name, - s3_bucket = s3_bucket, - platform = platform, - subject_id = subject_id, - acq_datetime=acq_datetime, - modalities = [behavior_config, behavior_videos_config], - metadata_dir = metadata_dir -) - -# Add more to the list if needed -upload_jobs=[upload_job_configs] - -# Optional email address and notification types if desired -user_email = "my_email_address" -email_notification_types = ["fail"] -submit_request = SubmitJobRequest( - upload_jobs=upload_jobs, - user_email=user_email, - email_notification_types=email_notification_types, -) - -post_request_content = json.loads(submit_request.model_dump_json(round_trip=True)) -submit_job_response = requests.post(url="http://aind-data-transfer-service/api/v1/submit_jobs", json=post_request_content) -print(submit_job_response.status_code) -print(submit_job_response.json()) -``` - -## Installation -To use the software, in the root directory, run -```bash -pip install -e . -``` - -To develop the code, run -```bash -pip install -e .[dev] -``` - -## Local Development -Run uvicorn: -```bash -export AIND_METADATA_SERVICE_PROJECT_NAMES_URL='http://aind-metadata-service-dev/project_names' -export AIND_AIRFLOW_SERVICE_URL='http://localhost:8080/api/v1/dags/run_list_of_jobs/dagRuns' -export AIND_AIRFLOW_SERVICE_JOBS_URL='http://localhost:8080/api/v1/dags/transform_and_upload/dagRuns' -export AIND_AIRFLOW_SERVICE_PASSWORD='*****' -export AIND_AIRFLOW_SERVICE_USER='user' -uvicorn aind_data_transfer_service.server:app --host 0.0.0.0 --port 5000 -``` -You can now access `http://localhost:5000`. - -## Contributing - -### Linters and testing - -There are several libraries used to run linters, check documentation, and run tests. - -- Please test your changes using the **coverage** library, which will run the tests and log a coverage report: - -```bash -coverage run -m unittest discover && coverage report -``` - -- Use **interrogate** to check that modules, methods, etc. have been documented thoroughly: - -```bash -interrogate . -``` - -- Use **flake8** to check that code is up to standards (no unused imports, etc.): -```bash -flake8 . -``` - -- Use **black** to automatically format the code into PEP standards: -```bash -black . -``` - -- Use **isort** to automatically sort import statements: -```bash -isort . -``` - -### Pull requests - -For internal members, please create a branch. For external members, please fork the repository and open a pull request from the fork. We'll primarily use [Angular](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#commit) style for commit messages. Roughly, they should follow the pattern: -```text -(): -``` - -where scope (optional) describes the packages affected by the code changes and type (mandatory) is one of: - -- **build**: Changes that affect build tools or external dependencies (example scopes: pyproject.toml, setup.py) -- **ci**: Changes to our CI configuration files and scripts (examples: .github/workflows/ci.yml) -- **docs**: Documentation only changes -- **feat**: A new feature -- **fix**: A bugfix -- **perf**: A code change that improves performance -- **refactor**: A code change that neither fixes a bug nor adds a feature -- **test**: Adding missing tests or correcting existing tests - -### Semantic Release - -The table below, from [semantic release](https://github.com/semantic-release/semantic-release), shows which commit message gets you which release type when `semantic-release` runs (using the default configuration): - -| Commit message | Release type | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------- | -| `fix(pencil): stop graphite breaking when too much pressure applied` | ~~Patch~~ Fix Release, Default release | -| `feat(pencil): add 'graphiteWidth' option` | ~~Minor~~ Feature Release | -| `perf(pencil): remove graphiteWidth option`

`BREAKING CHANGE: The graphiteWidth option has been removed.`
`The default graphite width of 10mm is always used for performance reasons.` | ~~Major~~ Breaking Release
(Note that the `BREAKING CHANGE: ` token must be in the footer of the commit) | - -### Documentation -To generate the rst files source files for documentation, run -```bash -sphinx-apidoc -o doc_template/source/ src -``` -Then to create the documentation HTML files, run -```bash -sphinx-build -b html doc_template/source/ doc_template/build/html -``` -More info on sphinx installation can be found [here](https://www.sphinx-doc.org/en/master/usage/installation.html). +More information can be found at [http://aind-data-transfer-service.readthedocs.io](readthedocs). diff --git a/doc_template/Makefile b/docs/Makefile similarity index 100% rename from doc_template/Makefile rename to docs/Makefile diff --git a/docs/diagrams/system_container.png b/docs/diagrams/system_container.png new file mode 100644 index 0000000..e891772 Binary files /dev/null and b/docs/diagrams/system_container.png differ diff --git a/docs/diagrams/system_container.puml b/docs/diagrams/system_container.puml new file mode 100644 index 0000000..bda393d --- /dev/null +++ b/docs/diagrams/system_container.puml @@ -0,0 +1,26 @@ +@startuml +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Container.puml +' uncomment the following line and comment the first to use locally +' !include C4_Container.puml + +' LAYOUT_TOP_DOWN() +' LAYOUT_AS_SKETCH() +LAYOUT_WITH_LEGEND() + +title Container diagram for AIND Data Transfer Service + +Person(user, "User", "A scientist or engineer that wants to upload data to the cloud.") + +System_Boundary(c1, "AIND Data Transfer Service") { + Container(app, "API Application", "FastAPI, Docker Container", "Validates and submits request to aind-airflow-service. Runs in K8s cluster managed by Central IT.") +} + +System_Ext(aind_airflow_service, "AIND Airflow Service", "Receives job requests, does additional validation checks, submits and monitors jobs.") +System_Ext(slurm, "Slurm", "High performance computing cluster that runs data transformation and data upload jobs.") + +Rel(user, app, "Uses", "HTTP, REST") + +Rel_Back(user, aind_airflow_service, "Sends e-mails to", "SMTP") +Rel(app, aind_airflow_service, "Uses", "REST API") +Rel(aind_airflow_service, slurm, "Uses", "REST API") +@enduml diff --git a/docs/diagrams/system_context.png b/docs/diagrams/system_context.png new file mode 100644 index 0000000..040c2cf Binary files /dev/null and b/docs/diagrams/system_context.png differ diff --git a/docs/diagrams/system_context.puml b/docs/diagrams/system_context.puml new file mode 100644 index 0000000..fa512ca --- /dev/null +++ b/docs/diagrams/system_context.puml @@ -0,0 +1,19 @@ +@startuml +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Context.puml +' uncomment the following line and comment the first to use locally +' !include C4_Context.puml + +LAYOUT_WITH_LEGEND() + +title System Context diagram for AIND Data Transfer Service + +Person(user, "User", "A scientist or engineer that wants to upload data to the cloud.") +System(transfer_service, "AIND Data Transfer Service", "Allows people to send job requests to compress (or transform) and upload raw data assets.") +System_Ext(aind_airflow_service, "AIND Airflow Service", "Receives job requests, does additional validation checks, submits and monitors jobs.") +System_Ext(slurm, "Slurm", "High performance computing cluster that runs data transformation and data upload jobs.") + +Rel(user, transfer_service, "Uses", "web portal or REST API") +Rel_Back(user, aind_airflow_service, "Sends e-mails to", "SMTP") +Rel(transfer_service, aind_airflow_service, "Uses", "REST API") +Rel(aind_airflow_service, slurm, "Uses", "REST API") +@enduml diff --git a/docs/examples/example1.csv b/docs/examples/example1.csv new file mode 100644 index 0000000..d62bab1 --- /dev/null +++ b/docs/examples/example1.csv @@ -0,0 +1,4 @@ +project_name, process_capsule_id, modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime +Ephys Platform, , ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10 +Behavior Platform, 1f999652-00a0-4c4b-99b5-64c2985ad070, BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM +Behavior Platform, , BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM diff --git a/doc_template/make.bat b/docs/make.bat similarity index 100% rename from doc_template/make.bat rename to docs/make.bat diff --git a/docs/source/Contributing.rst b/docs/source/Contributing.rst new file mode 100644 index 0000000..919e367 --- /dev/null +++ b/docs/source/Contributing.rst @@ -0,0 +1,242 @@ +Contributor Guidelines +====================== + +This document will go through best practices for contributing to this +project. We welcome and appreciate contributions or ideas for +improvement. + +- `Bug Reports and Feature + Requests <#bug-reports-and-feature-requests>`__ +- `Local Installation for + Development <#local-installation-for-development>`__ +- `Branches and Pull Requests <#branches-and-pull-requests>`__ +- `Release Cycles <#release-cycles>`__ + +Bug Reports and Feature Requests +-------------------------------- + +Before creating a pull request, we ask contributors to please open a bug +report or feature request first: +`issues `__ + +We will do our best to monitor and maintain the backlog of issues. + +Local Installation for Development +---------------------------------- + +For development, + +- For new features or non-urgent bug fixes, create a branch off of + ``dev`` +- For an urgent hotfix to our production environment, create a branch + off of ``main`` + +Consult the `Branches and Pull Requests <#branches-and-pull-requests>`__ +and `Release Cycles <#release-cycles>`__ for more details. + +Running a local server +~~~~~~~~~~~~~~~~~~~~~~ + +From the root directory, run: + +.. code:: bash + + pip install -e .[dev] + +to install the relevant code for development. + +We will work on setting up a local dev server to mock airflow responses. +In the meantime, it’s assumed that you are able to connect to our +development backend. Please reach out to us if you need an airflow +account. + +To run uvicorn locally: + +.. code:: bash + + export AIND_METADATA_SERVICE_PROJECT_NAMES_URL='http://aind-metadata-service-dev/project_names' + export AIND_AIRFLOW_SERVICE_URL='http://aind-airflow-service-dev:8080/api/v1/dags/run_list_of_jobs/dagRuns' + export AIND_AIRFLOW_SERVICE_JOBS_URL='http://aind-airflow-service-dev:8080/api/v1/dags/transform_and_upload/dagRuns' + export AIND_AIRFLOW_SERVICE_PASSWORD='*****' + export AIND_AIRFLOW_SERVICE_USER='user' + uvicorn aind_data_transfer_service.server:app --host 0.0.0.0 --port 5000 --reload + +You can now access aind-data-transfer-service at +``http://localhost:5000``. + +Branches and Pull Requests +-------------------------- + +Branch naming conventions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Name your branch using the following format: +``--`` + +where: + +```` is one of: - **build**: Changes that affect the build system +or external dependencies (e.g., pyproject.toml, setup.py) - **ci**: +Changes to our CI configuration files and scripts (examples: +.github/workflows/ci.yml) - **docs**: Changes to our documentation - +**feat**: A new feature - **fix**: A bug fix - **perf**: A code change +that improves performance - **refactor**: A code change that neither +fixes a bug nor adds a feature, but will make the codebase easier to +maintain - **test**: Adding missing tests or correcting existing tests - +**hotfix**: An urgent bug fix to our production code + +```` references the GitHub issue this branch will close + +```` is a brief description that shouldn’t be more than 3 +words. + +Some examples: + +- ``feat-12-adds-email-field`` +- ``fix-27-corrects-endpoint`` +- ``test-43-updates-server-test`` + +We ask that a separate issue and branch are created if code is added +outside the scope of the reference issue. + +Commit Messages +~~~~~~~~~~~~~~~ + +Please format your commit messages as ``: `` where +```` is from the list above and the short summary is one or two +sentences. + +Testing and Docstrings +~~~~~~~~~~~~~~~~~~~~~~ + +We strive for complete code coverage and docstrings, and we also run +code format checks. + +- To run the code format check: + +.. code:: bash + + flake8 . + +- There are some helpful libraries that will automatically format the + code and import statements: + +.. code:: bash + + black . + +and + +.. code:: bash + + isort . + +Strings that exceed the maximum line length may still need to be +formatted manually. + +- To run the docstring coverage check and report: + +.. code:: bash + + interrogate -v . + +This project uses NumPy’s docstring format: `Numpy docstring +standards `__ + +Many IDEs can be configured to automatically format docstrings in the +NumPy convention. + +- To run the unit test coverage check and report: + +.. code:: bash + + coverage run -m unittest discover && coverage report + +- To view a more detailed html version of the report, run: + +.. code:: bash + + coverage run -m unittest discover && coverage report + coverage html + +and then open ``htmlcov/index.html`` in a browser. + +Pull Requests +~~~~~~~~~~~~~ + +Pull requests and reviews are required before merging code into this +project. You may open a ``Draft`` pull request and ask for a preliminary +review on code that is currently a work-in-progress. + +Before requesting a review on a finalized pull request, please verify +that the automated checks have passed first. + +Release Cycles +-------------------------- + +For this project, we have adopted the `Git +Flow `__ system. We will +strive to release new features and bug fixes on a two week cycle. The +rough workflow is: + +Hotfixes +~~~~~~~~ + +- A ``hotfix`` branch is created off of ``main`` +- A Pull Request into is ``main`` is opened, reviewed, and merged into + ``main`` +- A new ``tag`` with a patch bump is created, and a new ``release`` is + deployed +- The ``main`` branch is merged into all other branches + +Feature branches and bug fixes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- A new branch is created off of ``dev`` +- A Pull Request into ``dev`` is opened, reviewed, and merged + +Release branch +~~~~~~~~~~~~~~ + +- A new branch ``release-v{new_tag}`` is created +- Documentation updates and bug fixes are created off of the + ``release-v{new_tag}`` branch. +- Commits added to the ``release-v{new_tag}`` are also merged into + ``dev`` +- Once ready for release, a Pull Request from ``release-v{new_tag}`` + into ``main`` is opened for final review +- A new tag will automatically be generated +- Once merged, a new GitHub Release is created manually + +Pre-release checklist +~~~~~~~~~~~~~~~~~~~~~ + +- ☐ Increment ``__version__`` in + ``aind_data_transfer_service/__init__.py`` file +- ☐ Run linters, unit tests, and integration tests +- ☐ Verify code is deployed and tested in test environment +- ☐ Update examples +- ☐ Update documentation + + - Run: + + .. code:: bash + + sphinx-apidoc -o docs/source/ src + sphinx-build -b html docs/source/ docs/build/html + +- ☐ Update and build UML diagrams + + - To build UML diagrams locally using a docker container: + + .. code:: bash + + docker pull plantuml/plantuml-server + docker run -d -p 8080:8080 plantuml/plantuml-server:jetty + +Post-release checklist +~~~~~~~~~~~~~~~~~~~~~~ + +- ☐ Merge ``main`` into ``dev`` and feature branches +- ☐ Edit release notes if needed +- ☐ Post announcement diff --git a/docs/source/UserGuide.rst b/docs/source/UserGuide.rst new file mode 100644 index 0000000..a19a772 --- /dev/null +++ b/docs/source/UserGuide.rst @@ -0,0 +1,194 @@ +User Guide +========== + +Thank you for using ``aind-data-transfer-service``! This guide is +intended for scientists and engineers in AIND that wish to upload data +from our shared network drives (e.g., VAST) to the cloud. + +Prerequisites +------------- + +- It’s assumed that raw data is already stored and organized on a + shared network drive such as VAST. +- The raw data should be organized by modality. + + - Example 1: + + .. code:: bash + + - /allen/aind/scratch/working_group/session_123456_2024-06-19 + - /ecephys_data + - /behavior_data + - /behavior_videos + - /aind_metadata + + - Example 2: + + .. code:: bash + + - /allen/aind/scratch/ecephys_data/session_123456_2024-06-19 + - /allen/aind/scratch/behavior_data/session_123456_2024-06-19 + - /allen/aind/scratch/behavior_videos/session_123456_2024-06-19 + - /allen/aind/scratch/aind_metadata/session_123456_2024-06-19 + +- The different modalities should not be nested + +Using the web portal +-------------------- + +Access to the web portal is available only through the VPN. The web +portal can accessed at +`http://aind-data-transfer-service/ `__ + +- Download the excel template file by clicking the + ``Job Submit Template`` link. + +- If there are compatibility issues with the excel template, you can + try saving it as a csv file and modifying the csv file + +- Create one row per data acquisition session + +- Required fields + + - project_name: A list of project names can be seen by clicking the + ``Project Names`` link + - subject_id: The LabTracks ID of the mouse + - acq_datetime: The date and time the data was acquired. Should be + in ISO format, for example, 2024-05-27T16:07:59 + - platform: Standardized way of collecting and processing data + (chosen from drop down menu) + - **modalities**: Two columns must be added per modality. A + **modality** (chosen from drop down menu) and a Posix style path + to the data source. For example, + + - modality0 (e.g., ecephys) + - modaltity0.source (e.g., + /allen/aind/scratch/working_group/session_123456_2024-06-19/ecephys_data) + - modality1 (e.g, behavior) + - modality1.source (e.g., + /allen/aind/scratch/working_group/session_123456_2024-06-19/behavior_data) + - modality2 (e.g, behavior_videos) + - modality2.source (e.g., + /allen/aind/scratch/working_group/session_123456_2024-06-19/behavior_videos) + +- Optional fields + + - metadata_dir: If metadata files are pre-compiled and saved to a + directory, you can add the Posix style path to the directory under + this column + - process_capsule_id: If you wish to trigger a custom Code Ocean + Capsule or pipeline, you can add the capsule_id here + - input_data_mount: If you wish to trigger a custom Code Ocean + Pipeline that has been configured with a specific data mount, you + can add that here + - s3_bucket: As default, data will be uploaded to a private bucket + in S3 managed by AIND. Please reach out to the Scientific + Computing department if you wish to upload to a different bucket. + - metadata_dir_force: We will automatically pull subject and + procedures data for a mouse. By setting this ``True``, we will + overwrite any data in the ``metadata_dir`` folder with data + acquired automatically from our service + - force_cloud_sync: We run a check to verify whether there is + already a data asset with this name saved in our S3 bucket. If + this field is set to ``True``, we will sync the data to the + bucket/folder even if it already exists + +Using the REST API +------------------ + +Jobs can also be submitted via a REST API at the endpoint +``http://aind-data-transfer-service/api/v1/submit_jobs`` + +.. code-block:: python + + from aind_data_transfer_service.configs.job_configs import ModalityConfigs, BasicUploadJobConfigs + from pathlib import PurePosixPath + import json + import requests + + from aind_data_transfer_models.core import ModalityConfigs, BasicUploadJobConfigs, SubmitJobRequest + from aind_data_schema_models.modalities import Modality + from aind_data_schema_models.platforms import Platform + from datetime import datetime + + source_dir = PurePosixPath("/shared_drive/vr_foraging/690165/20240219T112517") + + s3_bucket = "private" + subject_id = "690165" + acq_datetime = datetime(2024, 2, 19, 11, 25, 17) + platform = Platform.BEHAVIOR + + behavior_config = ModalityConfigs(modality=Modality.BEHAVIOR, source=(source_dir / "Behavior")) + behavior_videos_config = ModalityConfigs(modality=Modality.BEHAVIOR_VIDEOS, source=(source_dir / "Behavior videos")) + metadata_dir = source_dir / "Config" # This is an optional folder of pre-compiled metadata json files + project_name = "Ephys Platform" + + upload_job_configs = BasicUploadJobConfigs( + project_name=project_name, + s3_bucket=s3_bucket, + platform=platform, + subject_id=subject_id, + acq_datetime=acq_datetime, + modalities=[behavior_config, behavior_videos_config], + metadata_dir=metadata_dir + ) + + # Add more to the list if needed + upload_jobs = [upload_job_configs] + + # Optional email address and notification types if desired + user_email = "my_email_address" + email_notification_types = ["fail"] + submit_request = SubmitJobRequest( + upload_jobs=upload_jobs, + user_email=user_email, + email_notification_types=email_notification_types, + ) + + post_request_content = json.loads(submit_request.model_dump_json(round_trip=True, exclude_none=True)) + # Uncomment the following to submit the request + # submit_job_response = requests.post(url="http://aind-data-transfer-service/api/v1/submit_jobs", json=post_request_content) + # print(submit_job_response.status_code) + # print(submit_job_response.json()) + +Adding a notifications email address +------------------------------------ + +- NOTE: This is currently optional, but may be required in the future + +You can optionally add your email address to receive email notifications +about the jobs you’ve submitted. The notification types are: + +- BEGIN: When a job starts +- END: When a job is finished +- RETRY: When a job step had an issue and was automatically retried +- FAIL: When a job has failed completely +- ALL: To receive a notification if any one of the previous events has + triggered + +Custom Slurm settings +--------------------- + +``aind-data-transfer-service`` is a small service that forwards requests +to run a compression and upload pipeline. The major computation work is +performed on our Slurm cluster. + +We have provided default settings that work in most cases. However, for +very large jobs, such as processing more than a TB of data, you may need +to customize the Slurm settings to avoid timeouts or out-of-memory +errors. + +Please reach out to Scientific Computing if you think you may need to +customize the Slurm settings. + +Viewing the status of submitted jobs +------------------------------------ + +The status of submitted jobs can be viewed at: +http://aind-data-transfer-service/jobs + +Reporting bugs or making feature requests +----------------------------------------- + +Please report any bugs or feature requests here: +`issues `__ diff --git a/doc_template/source/_static/dark-logo.svg b/docs/source/_static/dark-logo.svg similarity index 100% rename from doc_template/source/_static/dark-logo.svg rename to docs/source/_static/dark-logo.svg diff --git a/doc_template/source/_static/favicon.ico b/docs/source/_static/favicon.ico similarity index 100% rename from doc_template/source/_static/favicon.ico rename to docs/source/_static/favicon.ico diff --git a/doc_template/source/_static/light-logo.svg b/docs/source/_static/light-logo.svg similarity index 100% rename from doc_template/source/_static/light-logo.svg rename to docs/source/_static/light-logo.svg diff --git a/docs/source/aind_data_transfer_service.configs.rst b/docs/source/aind_data_transfer_service.configs.rst new file mode 100644 index 0000000..b30b75e --- /dev/null +++ b/docs/source/aind_data_transfer_service.configs.rst @@ -0,0 +1,37 @@ +aind\_data\_transfer\_service.configs package +============================================= + +Submodules +---------- + +aind\_data\_transfer\_service.configs.csv\_handler module +--------------------------------------------------------- + +.. automodule:: aind_data_transfer_service.configs.csv_handler + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_transfer\_service.configs.job\_configs module +--------------------------------------------------------- + +.. automodule:: aind_data_transfer_service.configs.job_configs + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_transfer\_service.configs.job\_upload\_template module +------------------------------------------------------------------ + +.. automodule:: aind_data_transfer_service.configs.job_upload_template + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: aind_data_transfer_service.configs + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/aind_data_transfer_service.hpc.rst b/docs/source/aind_data_transfer_service.hpc.rst new file mode 100644 index 0000000..a2091f1 --- /dev/null +++ b/docs/source/aind_data_transfer_service.hpc.rst @@ -0,0 +1,29 @@ +aind\_data\_transfer\_service.hpc package +========================================= + +Submodules +---------- + +aind\_data\_transfer\_service.hpc.client module +----------------------------------------------- + +.. automodule:: aind_data_transfer_service.hpc.client + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_transfer\_service.hpc.models module +----------------------------------------------- + +.. automodule:: aind_data_transfer_service.hpc.models + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: aind_data_transfer_service.hpc + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/aind_data_transfer_service.rst b/docs/source/aind_data_transfer_service.rst new file mode 100644 index 0000000..c07d6ae --- /dev/null +++ b/docs/source/aind_data_transfer_service.rst @@ -0,0 +1,38 @@ +aind\_data\_transfer\_service package +===================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + aind_data_transfer_service.configs + aind_data_transfer_service.hpc + +Submodules +---------- + +aind\_data\_transfer\_service.models module +------------------------------------------- + +.. automodule:: aind_data_transfer_service.models + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_transfer\_service.server module +------------------------------------------- + +.. automodule:: aind_data_transfer_service.server + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: aind_data_transfer_service + :members: + :undoc-members: + :show-inheritance: diff --git a/doc_template/source/conf.py b/docs/source/conf.py similarity index 100% rename from doc_template/source/conf.py rename to docs/source/conf.py diff --git a/doc_template/source/index.rst b/docs/source/index.rst similarity index 94% rename from doc_template/source/index.rst rename to docs/source/index.rst index 07adcad..d4bfdfb 100644 --- a/doc_template/source/index.rst +++ b/docs/source/index.rst @@ -11,6 +11,8 @@ Welcome to this repository's documentation! :maxdepth: 2 :caption: Contents: + UserGuide + Contributing modules diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..7b6d782 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +src +=== + +.. toctree:: + :maxdepth: 4 + + aind_data_transfer_service diff --git a/pyproject.toml b/pyproject.toml index fb12d09..fcb1b2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,10 @@ dev = [ 'coverage', 'flake8', 'interrogate', - 'isort', + 'isort' +] + +docs = [ 'Sphinx', 'furo' ] diff --git a/src/aind_data_transfer_service/__init__.py b/src/aind_data_transfer_service/__init__.py index c1b75e0..a7f8953 100644 --- a/src/aind_data_transfer_service/__init__.py +++ b/src/aind_data_transfer_service/__init__.py @@ -1,7 +1,7 @@ """Init package""" import os -__version__ = "0.14.1" +__version__ = "1.0.0" # Global constants OPEN_DATA_BUCKET_NAME = os.getenv("OPEN_DATA_BUCKET_NAME", "open")