Skip to content

Commit

Permalink
handle cross-platform cache check optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
savingoyal committed Sep 11, 2023
1 parent 9ee706c commit 9bdcd26
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 6 deletions.
2 changes: 1 addition & 1 deletion metaflow/plugins/pypi/conda_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def runtime_step_cli(
cli_args.env["PYTHONPATH"] = self.metaflow_dir.name
# TODO: Verify user site-package isolation behavior
# https://github.com/conda/conda/issues/7707
# Also ref - https://github.com/Netflix/metaflow/pull/178
# Also ref - https://github.com/Netflix/metaflow/pull/178
# cli_args.env["PYTHONNOUSERSITE"] = "1"
# The executable is already in place for the user code to execute against
if self.interpreter:
Expand Down
2 changes: 2 additions & 0 deletions metaflow/plugins/pypi/conda_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def cache(storage, results, type_):
results = list(
executor.map(lambda x: solve(*x, solver), environments(solver))
)
# TODO: Only download packages that are needed for either creating the
# environment or for caching in the remote datastore
_ = list(map(lambda x: self.solvers[solver].download(*x), results))
with ThreadPoolExecutor() as executor:
_ = list(
Expand Down
26 changes: 21 additions & 5 deletions metaflow/plugins/pypi/micromamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def __init__(self):
msg = "No installation for *Micromamba* found.\n"
msg += "Visit https://mamba.readthedocs.io/en/latest/micromamba-installation.html for installation instructions."
raise MetaflowException(msg)
# TODO (savin): Introduce a version check for micromamba

def solve(self, id_, packages, python, platform):
# Performance enhancements
Expand Down Expand Up @@ -104,11 +103,20 @@ def download(self, id_, packages, python, platform):
# already cached. As a perf heuristic, we check if the environment already
# exists to short circuit package downloads.

# TODO: Introduce a perf optimization to skip cross-platform downloads
# when already done
if self.path_to_environment(id_, platform):
return

prefix = "{env_dirs}/{keyword}/{platform}/{id}".format(
env_dirs=self.info()["envs_dirs"][0],
platform=platform,
keyword="metaflow", # indicates metaflow generated environment
id=id_,
)

# Another forced perf heuristic to skip cross-platform downloads.
if os.path.exists(f"{prefix}/fake.done"):
return

with tempfile.TemporaryDirectory() as tmp_dir:
env = {
"CONDA_SUBDIR": platform,
Expand All @@ -126,7 +134,14 @@ def download(self, id_, packages, python, platform):
]
for package in packages:
cmd.append("{url}".format(**package))
return self._call(cmd, env)

self._call(cmd, env)
# Perf optimization to skip cross-platform downloads.
if platform != self.platform():
os.makedirs(prefix, exist_ok=True) or open(
f"{prefix}/fake.done", "w"
).close()
return

def create(self, id_, packages, python, platform):
# create environment only if the platform matches system platform
Expand All @@ -139,6 +154,7 @@ def create(self, id_, packages, python, platform):
keyword="metaflow", # indicates metaflow generated environment
id=id_,
)

env = {
# "CONDA_PKGS_DIRS": "/Users/savin/micromamba/pkgs/%s" % id_,
# use hardlinks when possible, otherwise copy files
Expand Down Expand Up @@ -181,7 +197,7 @@ def metadata(self, id_, packages, python, platform):
packages_to_filenames = {
package["url"]: package["url"].split("/")[-1] for package in packages
}
directories = self._call(["config", "list", "-a"])["pkgs_dirs"]
directories = self.info()["pkgs_dirs"]
# search all package caches for packages
# TODO: Handle conda clean -a
return {
Expand Down

0 comments on commit 9bdcd26

Please sign in to comment.