diff --git a/.github/workflows/autogenerate_requirements.yaml b/.github/workflows/autogenerate_requirements.yaml deleted file mode 100644 index f56e483..0000000 --- a/.github/workflows/autogenerate_requirements.yaml +++ /dev/null @@ -1,43 +0,0 @@ -name: Autogenerate Requirements - -on: - pull_request: - branches: [dev, main] - paths: - - 'pyproject.toml' - - 'tools/autogenerate_requirements.py' - - '.github/workflows/autogenerate_requirements.yaml' - -jobs: - update-requirements: - runs-on: ubuntu-latest - - steps: - - name: Check out repository - uses: actions/checkout@v3 - with: - ref: ${{ github.head_ref }} - - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.10' - - - name: Run autogeneration script - run: | - pip install toml - python tools/autogenerate_requirements.py - - - name: Commit files - run: | - git config --global user.name 'GitHub Actions' - git config --global user.email 'actions@github.com' - git diff --quiet requirements.txt || { - git add requirements.txt - git commit -m "[Automated] Updated requirements.txt" - } - - - name: Push changes - uses: ad-m/github-push-action@master - with: - branch: ${{ github.head_ref }} diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000..0b1aaf7 --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,120 @@ +name: Tests + +on: + pull_request: + branches: [ dev, main ] + paths: + - 'datadreamer/**/**.py' + - 'tests/**/**.py' + - .github/workflows/tests.yaml + +jobs: + run_tests: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + version: ['3.10', '3.11'] + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.version }} + cache: pip + + - name: Install dependencies [Ubuntu] + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt update + sudo apt install -y pandoc + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + + - name: Install dependencies [Windows] + if: matrix.os == 'windows-latest' + run: | + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + + - name: Install dependencies [macOS] + if: matrix.os == 'macOS-latest' + run: | + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + + - name: Run tests with coverage [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml + + - name: Run tests [Windows, macOS] + if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10' + run: pytest tests --junit-xml pytest.xml + + - name: Generate coverage badge [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: coverage-badge -o media/coverage_badge.svg -f + + - name: Generate coverage report [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + uses: orgoro/coverage@v3.1 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Commit coverage badge [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: | + git config --global user.name 'GitHub Actions' + git config --global user.email 'actions@github.com' + git diff --quiet media/coverage_badge.svg || { + git add media/coverage_badge.svg + git commit -m "[Automated] Updated coverage badge" + } + + - name: Push changes [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + uses: ad-m/github-push-action@master + with: + branch: ${{ github.head_ref }} + + - name: Upload Test Results + if: always() + uses: actions/upload-artifact@v4 + with: + name: Test Results [${{ matrix.os }}] (Python ${{ matrix.version }}) + path: pytest.xml + retention-days: 10 + if-no-files-found: error + + publish-test-results: + name: "Publish Tests Results" + needs: run_tests + runs-on: ubuntu-latest + permissions: + checks: write + pull-requests: write + if: always() + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Download Artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Publish Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + with: + files: "artifacts/**/*.xml" \ No newline at end of file diff --git a/src/datadreamer/__init__.py b/datadreamer/__init__.py similarity index 100% rename from src/datadreamer/__init__.py rename to datadreamer/__init__.py diff --git a/src/datadreamer/dataset_annotation/__init__.py b/datadreamer/dataset_annotation/__init__.py similarity index 100% rename from src/datadreamer/dataset_annotation/__init__.py rename to datadreamer/dataset_annotation/__init__.py diff --git a/src/datadreamer/dataset_annotation/image_annotator.py b/datadreamer/dataset_annotation/image_annotator.py similarity index 100% rename from src/datadreamer/dataset_annotation/image_annotator.py rename to datadreamer/dataset_annotation/image_annotator.py diff --git a/src/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py similarity index 100% rename from src/datadreamer/dataset_annotation/owlv2_annotator.py rename to datadreamer/dataset_annotation/owlv2_annotator.py diff --git a/src/datadreamer/dataset_annotation/utils.py b/datadreamer/dataset_annotation/utils.py similarity index 100% rename from src/datadreamer/dataset_annotation/utils.py rename to datadreamer/dataset_annotation/utils.py diff --git a/src/datadreamer/image_generation/__init__.py b/datadreamer/image_generation/__init__.py similarity index 100% rename from src/datadreamer/image_generation/__init__.py rename to datadreamer/image_generation/__init__.py diff --git a/src/datadreamer/image_generation/clip_image_tester.py b/datadreamer/image_generation/clip_image_tester.py similarity index 100% rename from src/datadreamer/image_generation/clip_image_tester.py rename to datadreamer/image_generation/clip_image_tester.py diff --git a/src/datadreamer/image_generation/image_generator.py b/datadreamer/image_generation/image_generator.py similarity index 100% rename from src/datadreamer/image_generation/image_generator.py rename to datadreamer/image_generation/image_generator.py diff --git a/src/datadreamer/image_generation/sdxl_image_generator.py b/datadreamer/image_generation/sdxl_image_generator.py similarity index 100% rename from src/datadreamer/image_generation/sdxl_image_generator.py rename to datadreamer/image_generation/sdxl_image_generator.py diff --git a/src/datadreamer/image_generation/sdxl_turbo_image_generator.py b/datadreamer/image_generation/sdxl_turbo_image_generator.py similarity index 100% rename from src/datadreamer/image_generation/sdxl_turbo_image_generator.py rename to datadreamer/image_generation/sdxl_turbo_image_generator.py diff --git a/src/datadreamer/pipelines/__init__.py b/datadreamer/pipelines/__init__.py similarity index 100% rename from src/datadreamer/pipelines/__init__.py rename to datadreamer/pipelines/__init__.py diff --git a/src/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py similarity index 100% rename from src/datadreamer/pipelines/generate_dataset_from_scratch.py rename to datadreamer/pipelines/generate_dataset_from_scratch.py diff --git a/src/datadreamer/prompt_generation/__init__.py b/datadreamer/prompt_generation/__init__.py similarity index 100% rename from src/datadreamer/prompt_generation/__init__.py rename to datadreamer/prompt_generation/__init__.py diff --git a/src/datadreamer/prompt_generation/lm_prompt_generator.py b/datadreamer/prompt_generation/lm_prompt_generator.py similarity index 100% rename from src/datadreamer/prompt_generation/lm_prompt_generator.py rename to datadreamer/prompt_generation/lm_prompt_generator.py diff --git a/src/datadreamer/prompt_generation/prompt_generator.py b/datadreamer/prompt_generation/prompt_generator.py similarity index 100% rename from src/datadreamer/prompt_generation/prompt_generator.py rename to datadreamer/prompt_generation/prompt_generator.py diff --git a/src/datadreamer/prompt_generation/simple_prompt_generator.py b/datadreamer/prompt_generation/simple_prompt_generator.py similarity index 100% rename from src/datadreamer/prompt_generation/simple_prompt_generator.py rename to datadreamer/prompt_generation/simple_prompt_generator.py diff --git a/src/datadreamer/prompt_generation/synonym_generator.py b/datadreamer/prompt_generation/synonym_generator.py similarity index 100% rename from src/datadreamer/prompt_generation/synonym_generator.py rename to datadreamer/prompt_generation/synonym_generator.py diff --git a/src/datadreamer/utils/__init__.py b/datadreamer/utils/__init__.py similarity index 100% rename from src/datadreamer/utils/__init__.py rename to datadreamer/utils/__init__.py diff --git a/src/datadreamer/utils/convert_dataset_to_yolo.py b/datadreamer/utils/convert_dataset_to_yolo.py similarity index 100% rename from src/datadreamer/utils/convert_dataset_to_yolo.py rename to datadreamer/utils/convert_dataset_to_yolo.py diff --git a/src/datadreamer/utils/nms.py b/datadreamer/utils/nms.py similarity index 100% rename from src/datadreamer/utils/nms.py rename to datadreamer/utils/nms.py diff --git a/examples/image_annotation_example.py b/examples/image_annotation_example.py index 27d9909..c1ce649 100644 --- a/examples/image_annotation_example.py +++ b/examples/image_annotation_example.py @@ -1,9 +1,10 @@ import matplotlib.patches as patches import matplotlib.pyplot as plt import numpy as np -from datadreamer.dataset_annotation import OWLv2Annotator from PIL import Image +from datadreamer.dataset_annotation import OWLv2Annotator + # Initialize the OWLv2Annotator annotator = OWLv2Annotator( seed=42, diff --git a/examples/image_generation_example.py b/examples/image_generation_example.py index b2170c4..011602a 100644 --- a/examples/image_generation_example.py +++ b/examples/image_generation_example.py @@ -1,4 +1,5 @@ import matplotlib.pyplot as plt + from datadreamer.image_generation import ( StableDiffusionTurboImageGenerator, ) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg new file mode 100644 index 0000000..636889b --- /dev/null +++ b/media/coverage_badge.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + coverage + coverage + 44% + 44% + + diff --git a/pyproject.toml b/pyproject.toml index f7e2ffe..fc346bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ requires-python = ">=3.8" license = { file = "LICENSE" } maintainers = [{ name = "Luxonis", email = "support@luxonis.com"}] keywords = ["computer vision", "AI", "machine learning", "generative models"] +dynamic = ["dependencies", "optional-dependencies"] classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 3 - Alpha", @@ -21,26 +22,10 @@ classifiers = [ "Topic :: Scientific/Engineering :: Image Processing", "Topic :: Scientific/Engineering :: Image Recognition", ] -dependencies = [ - "torch>=2.0.0", - "torchvision>=0.16.0", - "transformers>=4.0.0", - "diffusers>=0.24.0", - "compel>=2.0.0", - "tqdm>=4.0.0", - "Pillow>=9.0.0", - "numpy>=1.22.0", - "matplotlib>=3.6.0", - "opencv-python>=4.7.0", - "accelerate>=0.25.0", - "scipy>=1.10.0", -] -[project.optional-dependencies] -dev = [ - "datadreamer", - "pre-commit>=3.2.1", - "toml>=0.10.2", -] + +[tool.setuptools.dynamic] +dependencies = { file = ["requirements.txt"] } +optional-dependencies = { dev = { file = ["requirements-dev.txt"] } } [project.urls] Homepage = "https://github.com/luxonis/datadreamer" @@ -49,7 +34,7 @@ Homepage = "https://github.com/luxonis/datadreamer" datadreamer = "datadreamer.pipelines.generate_dataset_from_scratch:main" [tool.setuptools.packages.find] -where = ["src"] +where = ["."] [tool.ruff] target-version = "py38" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..4bb28ea --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +pre-commit>=3.2.1 +toml>=0.10.2 diff --git a/requirements.txt b/requirements.txt index 0e871e8..1ad0442 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,3 @@ matplotlib>=3.6.0 opencv-python>=4.7.0 accelerate>=0.25.0 scipy>=1.10.0 - -# dev -pre-commit>=3.2.1 -toml>=0.10.2 diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py new file mode 100644 index 0000000..56f10b3 --- /dev/null +++ b/tests/integration/test_pipeline.py @@ -0,0 +1,546 @@ +import os +import subprocess + +import psutil +import pytest +import torch + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_detection_pipeline(cmd: str, target_folder: str): + # Run the command + result = subprocess.run(cmd, shell=True) + assert result.returncode == 0, "Command failed to run" + # Check that the target folder is a folder + assert os.path.isdir(target_folder), "Directory not created" + files = [ + "annotations.json", + "generation_args.json", + "image_0.jpg", + "prompts.json", + ] + # Check that all the files were created + for file in files: + assert os.path.isfile(os.path.join(target_folder, file)), f"{file} not created" + # Check that the "bboxes_visualization" folder was created + assert os.path.isdir( + os.path.join(target_folder, "bboxes_visualization") + ), "bboxes_visualization directory not created" + + +def _check_wrong_argument_choice(cmd: str): + with pytest.raises(subprocess.CalledProcessError): + subprocess.check_call(cmd, shell=True) + + +def _check_wrong_value(cmd: str): + with pytest.raises(ValueError): + try: + subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + raise ValueError(e.output.decode()) from e + + +# ========================================================= +# ARGUMENTS CHECKS +# ========================================================= +def test_invalid_task_value(): + # Define the cmd + cmd = "datadreamer --task invalid_task" + _check_wrong_argument_choice(cmd) + + +def test_invalid_prompts_number_type(): + # Define the cmd + cmd = "datadreamer --prompts_number value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_num_objects_range_type(): + # Define the cmd + cmd = "datadreamer --num_objects_range value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_conf_threshold_range_type(): + # Define the cmd + cmd = "datadreamer --conf_threshold value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_tester_patience_type(): + # Define the cmd + cmd = "datadreamer --image_tester_patience value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_seed_type(): + # Define the cmd + cmd = "datadreamer --seed value --device cpu" + _check_wrong_argument_choice(cmd) + + +def test_invalid_prompt_generator(): + # Define the cmd + cmd = "datadreamer --prompt_generator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_generator(): + # Define the cmd + cmd = "datadreamer --image_generator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_annotator(): + # Define the cmd + cmd = "datadreamer --image_annotator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_device(): + # Define the cmd + cmd = "datadreamer --device invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_empty_class_names(): + # Define the cmd + cmd = "datadreamer --class_names []" + _check_wrong_value(cmd) + + +def test_invalid_class_names(): + # Define the cmd + cmd = "datadreamer --class_names [2, -1]" + _check_wrong_value(cmd) + + +def test_invalid_prompts_number(): + # Define the cmd + cmd = "datadreamer --prompts_number -1" + _check_wrong_value(cmd) + + +def test_negative_conf_threshold(): + # Define the cmd + cmd = "datadreamer --conf_threshold -1" + _check_wrong_value(cmd) + + +def test_big_conf_threshold(): + # Define the cmd + cmd = "datadreamer --conf_threshold 10" + _check_wrong_value(cmd) + + +def test_invalid_image_tester_patience(): + # Define the cmd + cmd = "datadreamer --image_tester_patience -1" + _check_wrong_value(cmd) + + +def test_invalid_seed(): + # Define the cmd + cmd = "datadreamer --seed -1 --device cpu" + _check_wrong_value(cmd) + + +def test_invalid_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 1" + _check_wrong_value(cmd) + + +def test_many_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 1 2 3" + _check_wrong_value(cmd) + + +def test_desc_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 3 1" + _check_wrong_value(cmd) + + +def test_negative_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range -3 1" + _check_wrong_value(cmd) + + +# ========================================================= +# DETECTION - SIMPLE LM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# DETECTION - LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", +) +def test_cuda_lm_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", +) +def test_cuda_lm_sdxl_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# CLASSIFICATION - SIMPLE LM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cpu-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cuda-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cpu-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cuda-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# CLASSIFICATION - LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_turbo_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cpu-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, 55GB of HDD and CUDA support", +) +def test_cuda_lm_sdxl_turbo_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cuda-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cpu-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", +) +def test_cuda_lm_sdxl_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cuda-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) diff --git a/tests/unittests/test_annotators.py b/tests/unittests/test_annotators.py new file mode 100644 index 0000000..0926d85 --- /dev/null +++ b/tests/unittests/test_annotators.py @@ -0,0 +1,49 @@ +import psutil +import pytest +import requests +import torch +from PIL import Image + +from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator + +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_owlv2_annotator(device: str): + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = OWLv2Annotator(device=device) + final_boxes, final_scores, final_labels = annotator.annotate(im, ["bus", "people"]) + # Assert that the boxes, scores and labels are tensors + assert type(final_boxes) == torch.Tensor + assert type(final_scores) == torch.Tensor + assert type(final_labels) == torch.Tensor + # Get the number of objects detected + num_objects = final_boxes.shape[0] + # Check that the boxes has correct shape + assert final_boxes.shape == (num_objects, 4) + # Check that the scores has correct shape + assert final_scores.shape == (num_objects,) + # Check that the labels has correct shape + assert final_labels.shape == (num_objects,) + # Check that the scores are not zero + assert torch.all(final_scores > 0) + # Check that the labels are bigger or equal to zero + assert torch.all(final_labels >= 0) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 15, + reason="Test requires GPU and 15GB of HDD", +) +def test_cuda_owlv2_annotator(): + _check_owlv2_annotator("cuda") + + +@pytest.mark.skipif( + total_disk_space < 15, + reason="Test requires at least 15GB of HDD", +) +def test_cou_owlv2_annotator(): + _check_owlv2_annotator("cpu") diff --git a/tests/unittests/test_image_generation.py b/tests/unittests/test_image_generation.py new file mode 100644 index 0000000..05e6a9d --- /dev/null +++ b/tests/unittests/test_image_generation.py @@ -0,0 +1,102 @@ +from typing import Type, Union + +import psutil +import pytest +import requests +import torch +from PIL import Image + +from datadreamer.image_generation.clip_image_tester import ClipImageTester +from datadreamer.image_generation.sdxl_image_generator import ( + StableDiffusionImageGenerator, +) +from datadreamer.image_generation.sdxl_turbo_image_generator import ( + StableDiffusionTurboImageGenerator, +) + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_clip_image_tester(device: str): + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + tester = ClipImageTester(device=device) + passed, probs, num_passed = tester.test_image(im, ["bus"]) + # Check that the image passed the test + assert passed is True + # Check that the number of objects passed is correct + assert num_passed == 1 + # Check that the probability has correct shape + assert probs.shape == (1, 1) + # Check that the probability is not zero + assert probs[0, 0] > 0 + # Release the tester + tester.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 15, + reason="Test requires GPU and 15GB of HDD", +) +def test_cuda_clip_image_tester(): + _check_clip_image_tester("cuda") + + +@pytest.mark.skipif( + total_disk_space < 15, + reason="Test requires at least 15GB of HDD", +) +def test_cpu_clip_image_tester(): + _check_clip_image_tester("cpu") + + +def _check_image_generator( + image_generator_class: Type[ + Union[StableDiffusionImageGenerator, StableDiffusionTurboImageGenerator] + ], + device: str, +): + image_generator = image_generator_class(device=device) + # Generate images and check each of them + for generated_image in image_generator.generate_images( + ["A photo of a cat, dog"], [["cat", "dog"]] + ): + assert generated_image is not None + assert isinstance(generated_image, Image.Image) + # Release the generator + image_generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", +) +def test_cuda_sdxl_image_generator(): + _check_image_generator(StableDiffusionImageGenerator, "cuda") + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", +) +def test_cpu_sdxl_image_generator(): + _check_image_generator(StableDiffusionImageGenerator, "cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", +) +def test_cuda_sdxl_turbo_image_generator(): + _check_image_generator(StableDiffusionTurboImageGenerator, "cuda") + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", +) +def test_cpu_sdxl_turbo_image_generator(): + _check_image_generator(StableDiffusionTurboImageGenerator, "cpu") diff --git a/tests/unittests/test_prompt_generation.py b/tests/unittests/test_prompt_generation.py new file mode 100644 index 0000000..9e3423c --- /dev/null +++ b/tests/unittests/test_prompt_generation.py @@ -0,0 +1,107 @@ +import psutil +import pytest +import torch + +from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator +from datadreamer.prompt_generation.simple_prompt_generator import SimplePromptGenerator +from datadreamer.prompt_generation.synonym_generator import SynonymGenerator + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def test_simple_prompt_generator(): + class_names = ["dog", "cat", "bird", "tree", "car", "person", "house", "flower"] + prompt_generator = SimplePromptGenerator(class_names, prompts_number=10) + prompts = prompt_generator.generate_prompts() + # Check that the some prompts were generated + assert len(prompts) > 0 + # Iterate through the prompts + for selected_objects, prompt_text in prompts: + # Selected objects aren't empty + assert len(selected_objects) > 0 + # The slected objects are in the range + assert ( + prompt_generator.num_objects_range[0] + <= len(selected_objects) + <= prompt_generator.num_objects_range[1] + ) + # Check the generated text + assert prompt_text == f"A photo of a {', a '.join(selected_objects)}" + + +def _check_lm_prompt_generator(device: str): + object_names = ["aeroplane", "bicycle", "bird", "boat"] + prompt_generator = LMPromptGenerator( + class_names=object_names, prompts_number=2, device=device + ) + prompts = prompt_generator.generate_prompts() + # Check that the some prompts were generated + assert len(prompts) > 0 + # Iterate through the prompts + for selected_objects, prompt_text in prompts: + # Selected objects aren't empty + assert len(selected_objects) > 0 + # The slected objects are in the range + assert ( + prompt_generator.num_objects_range[0] + <= len(selected_objects) + <= prompt_generator.num_objects_range[1] + ) + # Check the generated text + assert len(prompt_text) > 0 and any( + [x in prompt_text for x in selected_objects] + ) + prompt_generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", +) +def test_cuda_lm_prompt_generator(): + _check_lm_prompt_generator("cuda") + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 35, + reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", +) +def test_cpu_lm_prompt_generator(): + _check_lm_prompt_generator("cpu") + + +def _check_synonym_generator(device: str): + synonyms_num = 3 + generator = SynonymGenerator(synonyms_number=synonyms_num, device=device) + synonyms = generator.generate_synonyms_for_list(["astronaut", "cat", "dog"]) + # Check that the some synonyms were generated + assert len(synonyms) > 0 + # Iterate through the synonyms + for word, synonym_list in synonyms.items(): + # Check that the word is not empty + assert len(word) > 0 + # Check that the synonym list is not empty and has the correct number of synonyms + assert len(synonym_list) > 0 and len(synonym_list) == synonyms_num + # Check that the synonyms are not empty + for synonym in synonym_list: + assert len(synonym) > 0 + generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", +) +def test_cuda_synonym_generator(): + _check_synonym_generator("cuda") + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 35, + reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", +) +def test_cpu_synonym_generator(): + _check_synonym_generator("cpu")