permitio · philipclaesson · Nov 17, 2023 · Nov 17, 2023 · Nov 17, 2023 · Nov 17, 2023
diff --git a/documentation/docs/getting-started/configuration.mdx b/documentation/docs/getting-started/configuration.mdx
@@ -113,7 +113,8 @@ Please use this table as a reference.
 | OPAL_POLICY_REPO_URL                          | The repo url the policy repo is located at. Must be available from the machine running OPAL (opt for public internet addresses). Supported URI schemes: https:// and ssh{" "} (i.e: git@).                                                                                                                                  |                                          |
 | OPAL_POLICY_REPO_SSH_KEY                      | The content of the var is a private crypto key (i.e: SSH key). You will need to register the matching public key with your repo. For example, see the{" "} GitHub tutorial {" "} on the subject. The passed value must be the contents of the SSH key in one line (replace new-line with underscore, i.e: \n with{" "} \_). |                                          |
 | OPAL_POLICY_REPO_CLONE_PATH                   | Where (i.e: base target path) to clone the repo in your docker filesystem (not important unless you mount a docker volume).                                                                                                                                                                                                 |                                          |
-| OPAL_POLICY_REPO_MAIN_BRANCH                  | Name of the git branch to track for policy files (default: `master`).                                                                                                                                                                                                                                                       |                                          |
+| OPAL_POLICY_REPO_MAIN_BRANCH                  | Name of the git branch to track for policy files (default: `master`, unless `OPAL_POLICY_REPO_TAG` is set).                                                                                                                                                                                                                 |                                          |
+| OPAL_POLICY_REPO_TAG                          | Name of the git tag to track for policy files (default: None).                                                                                                                                                                                                                                                              |                                          |
 | OPAL_BUNDLE_IGNORE                            | Paths to omit from policy bundle. List of glob style paths, or paths without wildcards but ending with "/\*\*" indicating a parent path (ignoring all under it).                                                                                                                                                            | `bundle_ignore: Optional[List[str]]`     |
 
 ## OPAL Client Configuration Variables

diff --git a/...ation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx b/...ation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx
@@ -90,9 +90,11 @@ a [Github SSH key here](https://docs.github.com/en/github/authenticating-to-gith
 
 The value you pass for the `POLICY_REPO_SSH_KEY` can either be a file path, or the contents of the SSH-key - with newlines replaced with `\_`.
 
-#### `OPAL_POLICY_REPO_CLONE_PATH` & `OPAL_POLICY_REPO_MAIN_BRANCH`
+#### `OPAL_POLICY_REPO_CLONE_PATH`, `OPAL_POLICY_REPO_MAIN_BRANCH` & `OPAL_POLICY_REPO_TAG`
 
-These will allow you to control how the repo is cloned.
+These will allow you to control how the repo is cloned. By default OPAL will track the `master` branch of the repo, you may optionally track another branch or a tag in the repo.
+
+You must choose between tracking a branch or a tag, OPAL will fail if you try to supply both `OPAL_POLICY_REPO_MAIN_BRANCH` and `OPAL_POLICY_REPO_TAG`.
 
 ### Simple run with Data source configuration
 

diff --git a/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx b/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx
@@ -185,7 +185,9 @@ The value you pass for the `POLICY_REPO_SSH_KEY` can either be a file path, or t
 
 ##### `OPAL_POLICY_REPO_CLONE_PATH` & `OPAL_POLICY_REPO_MAIN_BRANCH`
 
-These will allow you to control how the repo is cloned.
+These will allow you to control how the repo is cloned. By default OPAL will track the `master` branch of the repo, you may optionally track another branch or a tag in the repo.
+
+You must choose between tracking a branch or a tag, OPAL will fail if you try to supply both `OPAL_POLICY_REPO_MAIN_BRANCH` and `OPAL_POLICY_REPO_TAG`.
 
 #### Simple run with Data source configuration
 

diff --git a/...tion/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx b/...tion/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx
@@ -87,7 +87,13 @@ For these config vars, in most cases you are good with the default values:
     <tr>
       <td valign="top">OPAL_POLICY_REPO_MAIN_BRANCH</td>
       <td>
-        Name of the git branch to track for policy files (default: `master`)
+        Name of the git branch to track for policy files (default: `master`, unless `OPAL_POLICY_REPO_TAG` is set)
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">OPAL_POLICY_REPO_TAG</td>
+      <td>
+        Name of the git tag to track for policy files (default: `None`).
       </td>
     </tr>
   </tbody>

diff --git a/packages/opal-common/opal_common/git/branch_tracker.py b/packages/opal-common/opal_common/git/branch_tracker.py
@@ -1,7 +1,7 @@
 from functools import partial
 from typing import Optional, Tuple
 
-from git import GitCommandError, Head, Remote, Repo
+from git import GitCommandError, Head, Reference, Remote, Repo
 from git.objects.commit import Commit
 from opal_common.git.env import provide_git_ssh_environment
 from opal_common.git.exceptions import GitFailed
@@ -135,6 +135,10 @@ def tracked_branch(self) -> Head:
             )
             raise GitFailed(e)
 
+    @property
+    def tracked_reference(self) -> Reference:
+        return self.tracked_branch
+
     @property
     def tracked_remote(self) -> Remote:
         """returns the tracked remote object (of type git.Remote) or throws if

diff --git a/packages/opal-common/opal_common/git/tag_tracker.py b/packages/opal-common/opal_common/git/tag_tracker.py
@@ -0,0 +1,113 @@
+from functools import partial
+from typing import Optional, Tuple
+
+from git import GitCommandError, Reference, Repo, Tag
+from git.objects.commit import Commit
+from opal_common.git.branch_tracker import BranchTracker
+from opal_common.git.env import provide_git_ssh_environment
+from opal_common.git.exceptions import GitFailed
+from opal_common.logger import logger
+from tenacity import retry, stop_after_attempt, wait_fixed
+
+
+class TagTracker(BranchTracker):
+    """Tracks the state of a git tag (hash the tag is pointing at).
+
+    Can detect if the tag has been moved to point at a different commit.
+    """
+
+    def __init__(
+        self,
+        repo: Repo,
+        tag_name: str,
+        remote_name: str = "origin",
+        retry_config=None,
+        ssh_key: Optional[str] = None,
+    ):
+        """Initializes the TagTracker.
+
+        Args:
+            repo (Repo): a git repo in which we want to track the specific commit a tag is pointing to
+            tag_name (str): the tag we want to track
+            remote_name (str): the remote in which the tag is located
+            retry_config (dict): Tenacity.retry config
+            ssh_key (Optional[str]): SSH key for private repositories
+        """
+        self._tag_name = tag_name
+        super().__init__(
+            repo,
+            branch_name=None,
+            remote_name=remote_name,
+            retry_config=retry_config,
+            ssh_key=ssh_key,
+        )
+
+    def checkout(self):
+        """Checkouts the repository at the current tag."""
+        checkout_func = partial(self._repo.git.checkout, self._tag_name)
+        attempt_checkout = retry(**self._retry_config)(checkout_func)
+        try:
+            return attempt_checkout()
+        except GitCommandError as e:
+            tags = [tag.name for tag in self._repo.tags]
+            logger.error(
+                "did not find tag: {tag_name}, instead found: {tags_found}, got error: {error}",
+                tag_name=self._tag_name,
+                tags_found=tags,
+                error=str(e),
+            )
+            raise GitFailed(e)
+
+    def _fetch(self):
+        """Fetch updates including tags with force option."""
+
+        def _inner_fetch(*args, **kwargs):
+            env = provide_git_ssh_environment(self.tracked_remote.url, self._ssh_key)
+            with self.tracked_remote.repo.git.custom_environment(**env):
+                self.tracked_remote.repo.git.fetch("--tags", "--force", *args, **kwargs)
+
+        attempt_fetch = retry(**self._retry_config)(_inner_fetch)
+        return attempt_fetch()
+
+    @property
+    def latest_commit(self) -> Commit:
+        """the commit of the tracked tag."""
+        return self.tracked_tag.commit
+
+    @property
+    def tracked_tag(self) -> Tag:
+        """returns the tracked tag reference (of type git.Reference) or throws
+        if such tag does not exist on the repo."""
+        try:
+            return getattr(self._repo.tags, self._tag_name)
+        except AttributeError as e:
+            tags = [{"path": tag.path} for tag in self._repo.tags]
+            logger.exception(
+                "did not find main branch: {error}, instead found: {tags_found}",
+                error=e,
+                tags_found=tags,
+            )
+            raise GitFailed(e)
+
+    @property
+    def tracked_reference(self) -> Reference:
+        return self.tracked_tag
+
+    def pull(self) -> Tuple[bool, Commit, Commit]:
+        """Overrides the pull method to handle tag updates.
+
+        Returns:
+            pull_result (bool, Commit, Commit): a tuple consisting of:
+                has_changes (bool): whether the tag has been moved to a different commit
+                prev (Commit): the previous commit the tag was pointing to
+                latest (Commit): the new commit the tag is currently pointing to
+        """
+        self._fetch()
+        self.checkout()
+
+        if self.prev_commit.hexsha == self.latest_commit.hexsha:
+            return False, self.prev_commit, self.prev_commit
+        else:
+            prev = self._prev_commit
+            self._save_latest_commit_as_prev_commit()
+            return True, prev, self.latest_commit
diff --git a/packages/opal-common/opal_common/git/tests/conftest.py b/packages/opal-common/opal_common/git/tests/conftest.py
@@ -73,6 +73,15 @@ def create_rename_file_commit(
         repo.index.move([filename, new_filename])
         repo.index.commit(commit_msg, author=author)
 
+    @staticmethod
+    def create_new_tag(repo: Repo, tag_name: str):
+        repo.create_tag(tag_name)
+
+    @staticmethod
+    def update_tag_to_head(repo: Repo, tag_name: str):
+        repo.delete_tag(tag_name)
+        repo.create_tag(tag_name)
+
 
 @pytest.fixture
 def helpers() -> Helpers:
@@ -140,6 +149,9 @@ def local_repo(tmp_path, helpers: Helpers) -> Repo:
 
     # create a "delete" commit
     helpers.create_delete_file_commit(repo, root / "deleted.rego")
+
+    # create a test tag
+    helpers.create_new_tag(repo, "test_tag")
     return repo
 
 

diff --git a/packages/opal-common/opal_common/git/tests/repo_watcher_test.py b/packages/opal-common/opal_common/git/tests/repo_watcher_test.py
@@ -46,6 +46,7 @@ async def failure_callback(e: Exception):
     # configure the watcher to watch an invalid repo
     watcher = GitPolicySource(
         remote_source_url=INVALID_REPO_REMOTE_URL,
+        branch_name="master",
         local_clone_path=target_path,
         request_timeout=3,
     )
@@ -86,7 +87,9 @@ async def new_commits_callback(
     # configure the watcher with a valid local repo (our test repo)
     # the returned repo will track the local remote repo
     watcher = GitPolicySource(
-        remote_source_url=remote_repo.working_tree_dir, local_clone_path=target_path
+        remote_source_url=remote_repo.working_tree_dir,
+        local_clone_path=target_path,
+        branch_name=remote_repo.active_branch.name,
     )
     # configure the error callback
     watcher.add_on_new_policy_callback(partial(new_commits_callback, detected_commits))
@@ -157,6 +160,7 @@ async def new_commits_callback(
     watcher = GitPolicySource(
         remote_source_url=remote_repo.working_tree_dir,
         local_clone_path=target_path,
+        branch_name=remote_repo.active_branch.name,
         polling_interval=3,  # every 3 seconds do a pull to try and detect changes
     )
     # configure the error callback

diff --git a/packages/opal-common/opal_common/git/tests/tag_tracker_test.py b/packages/opal-common/opal_common/git/tests/tag_tracker_test.py
@@ -0,0 +1,81 @@
+import os
+import sys
+
+import pytest
+
+# Add root opal dir to use local src as package for tests (i.e, no need for python -m pytest)
+root_dir = os.path.abspath(
+    os.path.join(
+        os.path.dirname(__file__),
+        os.path.pardir,
+        os.path.pardir,
+        os.path.pardir,
+    )
+)
+sys.path.append(root_dir)
+
+from pathlib import Path
+
+from git import Repo
+from git.objects.commit import Commit
+from opal_common.git.exceptions import GitFailed
+from opal_common.git.tag_tracker import TagTracker
+
+
+def test_pull_with_no_changes(local_repo_clone: Repo):
+    """Test pulling when there are no changes on the remote repo."""
+    repo: Repo = local_repo_clone  # local repo, cloned from another local repo
+    tracker = TagTracker(repo=repo, tag_name="test_tag")
+    latest_commit: Commit = repo.head.commit
+    assert latest_commit == tracker.latest_commit == tracker.prev_commit
+    has_changes, prev, latest = tracker.pull()  # pulls from origin
+    assert has_changes == False
+    assert latest_commit == prev == latest
+
+
+def test_pull_with_new_commits(
+    local_repo: Repo,
+    local_repo_clone: Repo,
+    helpers,
+):
+    """Test pulling when there are changes (new commits) on the remote repo."""
+    remote_repo: Repo = (
+        local_repo  # local repo, the 'origin' remote of 'local_repo_clone'
+    )
+    repo: Repo = local_repo_clone  # local repo, cloned from 'local_repo'
+
+    tracker = TagTracker(repo=repo, tag_name="test_tag")
+    most_recent_commit_before_pull: Commit = repo.head.commit
+
+    assert (
+        most_recent_commit_before_pull == tracker.latest_commit == tracker.prev_commit
+    )
+
+    # create new file commit on the remote repo
+    helpers.create_new_file_commit(
+        remote_repo, Path(remote_repo.working_tree_dir) / "2.txt"
+    )
+
+    helpers.update_tag_to_head(remote_repo, "test_tag")
+
+    # now the remote repo tag is pointing at a different commit
+    assert remote_repo.tags.__getattr__("test_tag").commit != repo.head.commit
+    # and our tag tracker does not know it yet
+    assert remote_repo.tags.__getattr__("test_tag").commit != tracker.latest_commit
+
+    has_changes, prev, latest = tracker.pull()  # pulls from origin
+    assert has_changes == True
+    assert prev != latest
+    assert most_recent_commit_before_pull == prev
+    assert (
+        remote_repo.tags.__getattr__("test_tag").commit
+        == repo.tags.__getattr__("test_tag").commit
+        == latest
+        == tracker.latest_commit
+    )
+
+
+def test_tracked_branch_does_not_exist(local_repo: Repo):
+    """Test that tag tracker throws when tag does not exist."""
+    with pytest.raises(GitFailed):
+        tracker = TagTracker(local_repo, tag_name="no_such_tag")
diff --git a/packages/opal-common/opal_common/sources/git_policy_source.py b/packages/opal-common/opal_common/sources/git_policy_source.py
@@ -4,6 +4,7 @@
 from opal_common.git.branch_tracker import BranchTracker
 from opal_common.git.exceptions import GitFailed
 from opal_common.git.repo_cloner import RepoCloner
+from opal_common.git.tag_tracker import TagTracker
 from opal_common.logger import logger
 from opal_common.sources.base_policy_source import BasePolicySource
 
@@ -30,7 +31,8 @@ def __init__(
         self,
         remote_source_url: str,
         local_clone_path: str,
-        branch_name: str = "master",
+        branch_name: Optional[str] = None,
+        tag_name: Optional[str] = None,
         ssh_key: Optional[str] = None,
         polling_interval: int = 0,
         request_timeout: int = 0,
@@ -49,7 +51,16 @@ def __init__(
             ssh_key=self._ssh_key,
             clone_timeout=request_timeout,
         )
+
+        if branch_name is None and tag_name is None:
+            logger.exception("Must provide either branch_name or tag_name")
+            raise ValueError("Must provide either branch_name or tag_name")
+        if branch_name is not None and tag_name is not None:
+            logger.exception("Must provide either branch_name or tag_name, not both")
+            raise ValueError("Must provide either branch_name or tag_name, not both")
+
         self._branch_name = branch_name
+        self._tag_name = tag_name
         self._tracker = None
 
     async def get_initial_policy_state_from_remote(self):
@@ -82,9 +93,14 @@ async def get_initial_policy_state_from_remote(self):
             await self._on_git_failed(e)
             return
 
-        self._tracker = BranchTracker(
-            repo=repo, branch_name=self._branch_name, ssh_key=self._ssh_key
-        )
+        if self._tag_name is not None:
+            self._tracker = TagTracker(
+                repo=repo, tag_name=self._tag_name, ssh_key=self._ssh_key
+            )
+        else:
+            self._tracker = BranchTracker(
+                repo=repo, branch_name=self._branch_name, ssh_key=self._ssh_key
+            )
 
     async def check_for_changes(self):
         """Calling this method will trigger a git pull from the tracked remote.
@@ -98,7 +114,11 @@ async def check_for_changes(self):
         )
         has_changes, prev, latest = self._tracker.pull()
         if not has_changes:
-            logger.info("No new commits: HEAD is at '{head}'", head=latest.hexsha)
+            logger.info(
+                "No new commits: {ref} is at '{head}'",
+                ref=self._tracker.tracked_reference.name,
+                head=latest.hexsha,
+            )
         else:
             logger.info(
                 "Found new commits: old HEAD was '{prev_head}', new HEAD is '{new_head}'",

diff --git a/packages/opal-server/opal_server/config.py b/packages/opal-server/opal_server/config.py
@@ -99,7 +99,8 @@ class OpalServerConfig(Confi):
         False,
         "Set if OPAL server should use a fixed clone path (and reuse if it already exists) instead of randomizing its suffix on each run",
     )
-    POLICY_REPO_MAIN_BRANCH = confi.str("POLICY_REPO_MAIN_BRANCH", "master")
+    POLICY_REPO_MAIN_BRANCH = confi.str("POLICY_REPO_MAIN_BRANCH", None)
+    POLICY_REPO_TAG = confi.str("POLICY_REPO_TAG", None)
     POLICY_REPO_SSH_KEY = confi.str("POLICY_REPO_SSH_KEY", None)
     POLICY_REPO_MANIFEST_PATH = confi.str(
         "POLICY_REPO_MANIFEST_PATH",