Skip to content

Commit

Permalink
Add the possibility of defining default decorators for steps (#1837)
Browse files Browse the repository at this point in the history
* Add decorator source information

This is in preparation of having default decorators injected by
the user and/or various metaflow extensions.

* Add support for default decorators

* No removal of default decorators

* Address comments

Mechanism is now as follows:
 - users can set METAFLOW_DECOSPECS in their configuration or in the environment
 - extensions can set DECOSPECS in their config or set a list of TOGGLE_DECOSPECS

METAFLOW_DECOSPECS/DECOSPECS is a string (space separated to form an array)

Precedence order (from first priority to last):
  - DECOSPECS from extension if it sets DECOSPECS = ...
  - METAFLOW_DECOSPECS in the environment
  - METAFLOW_DECOSPECS in the configuration
  - DECOSPECS (default from extension if it uses from_conf...)
  - If still not set after all these checks, form by adding all the
    TOGGLE_DECOSPECS together

* Forgot to remove old changes

* One more

* Typo

* Address comments

* Rework priority order of decorators.

The order is now:
 - statically defined using a decorator
 - passed using --with
 - passed using METAFLOW_DECOSPECS in the environment variable
 - passed using METAFLOW_DECOSPECS in a configuration
 - passed using TOGGLE_DECOSPECS in the configuration

Note that an error will occur if:
 - a decorator is defined statically and dynamically and it doesn't allow multiple

An error will NOT be raised if:
 - a decorator is passed multiple times dynamically (newer ones are *ignored*)

* Properly address order so that ./myflow.py --with foo run and
./myflow.py run --with foo both work properly
  • Loading branch information
romain-intel authored May 29, 2024
1 parent 5f57997 commit 3947a0b
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 10 deletions.
59 changes: 52 additions & 7 deletions metaflow/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
import os
import sys
import traceback
from datetime import datetime
Expand All @@ -14,6 +15,7 @@
from .exception import CommandException, MetaflowException
from .graph import FlowGraph
from .metaflow_config import (
DECOSPECS,
DEFAULT_DATASTORE,
DEFAULT_ENVIRONMENT,
DEFAULT_EVENT_LOGGER,
Expand Down Expand Up @@ -118,6 +120,26 @@ def logger(body="", system_msg=False, head="", bad=False, timestamp=True, nl=Tru
click.secho(body, bold=system_msg, fg=LOGGER_BAD_COLOR if bad else None, nl=nl)


def config_merge_cb(ctx, param, value):
# Callback to:
# - read the Click auto_envvar variable from both the
# environment AND the configuration
# - merge that value with the value passed in the command line (value)
# - return the value as a tuple
# Note that this function gets called even if there is no option passed on the
# command line.
# NOTE: Assumes that ctx.auto_envvar_prefix is set to METAFLOW (same as in
# from_conf)

# Special case where DECOSPECS and value are the same. This happens
# when there is no --with option at the TL and DECOSPECS is read from
# the env var. In this case, click also passes it as value
splits = DECOSPECS.split()
if len(splits) == len(value) and all([a == b for (a, b) in zip(splits, value)]):
return value
return tuple(list(value) + DECOSPECS.split())


@click.group()
def cli(ctx):
pass
Expand Down Expand Up @@ -618,7 +640,7 @@ def resume(
resume_identifier=None,
runner_attribute_file=None,
):
before_run(obj, tags, decospecs + obj.environment.decospecs())
before_run(obj, tags, decospecs)

if origin_run_id is None:
origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
Expand Down Expand Up @@ -716,7 +738,7 @@ def run(
):
if user_namespace is not None:
namespace(user_namespace or None)
before_run(obj, tags, decospecs + obj.environment.decospecs())
before_run(obj, tags, decospecs)

runtime = NativeRuntime(
obj.flow,
Expand Down Expand Up @@ -764,9 +786,20 @@ def before_run(obj, tags, decospecs):
# A downside is that we need to have the following decorators handling
# in two places in this module and make sure _init_step_decorators
# doesn't get called twice.
if decospecs:
decorators._attach_decorators(obj.flow, decospecs)

# We want the order to be the following:
# - run level decospecs
# - top level decospecs
# - environment decospecs
all_decospecs = (
list(decospecs or [])
+ obj.tl_decospecs
+ list(obj.environment.decospecs() or [])
)
if all_decospecs:
decorators._attach_decorators(obj.flow, all_decospecs)
obj.graph = FlowGraph(obj.flow.__class__)

obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
# obj.environment.init_environment(obj.logger)

Expand Down Expand Up @@ -837,6 +870,7 @@ def version(obj):
multiple=True,
help="Add a decorator to all steps. You can specify this option "
"multiple times to attach multiple decorators in steps.",
callback=config_merge_cb,
)
@click.option(
"--pylint/--no-pylint",
Expand Down Expand Up @@ -955,8 +989,11 @@ def start(
deco_options,
)

if decospecs:
decorators._attach_decorators(ctx.obj.flow, decospecs)
# In the case of run/resume, we will want to apply the TL decospecs
# *after* the run decospecs so that they don't take precedence. In other
# words, for the same decorator, we want `myflow.py run --with foo` to
# take precedence over any other `foo` decospec
ctx.obj.tl_decospecs = list(decospecs or [])

# initialize current and parameter context for deploy-time parameters
current._set_env(flow=ctx.obj.flow, is_running=False)
Expand All @@ -967,14 +1004,22 @@ def start(
if ctx.invoked_subcommand not in ("run", "resume"):
# run/resume are special cases because they can add more decorators with --with,
# so they have to take care of themselves.
decorators._attach_decorators(ctx.obj.flow, ctx.obj.environment.decospecs())
all_decospecs = ctx.obj.tl_decospecs + list(
ctx.obj.environment.decospecs() or []
)
if all_decospecs:
decorators._attach_decorators(ctx.obj.flow, all_decospecs)
# Regenerate graph if we attached more decorators
ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)

decorators._init_step_decorators(
ctx.obj.flow,
ctx.obj.graph,
ctx.obj.environment,
ctx.obj.flow_datastore,
ctx.obj.logger,
)

# TODO (savin): Enable lazy instantiation of package
ctx.obj.package = None
if ctx.invoked_subcommand is None:
Expand Down
19 changes: 16 additions & 3 deletions metaflow/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,14 @@ def make_decorator_spec(self):
attr_list.append("%s=%s" % (k, str(v)))
else:
attr_list.append("%s=%s" % (k, json.dumps(v).replace('"', '\\"')))

attrstr = ",".join(attr_list)
return "%s:%s" % (self.name, attrstr)
else:
return self.name

def __str__(self):
mode = "decorated" if self.statically_defined else "cli"
mode = "static" if self.statically_defined else "dynamic"
attrs = " ".join("%s=%s" % x for x in self.attributes.items())
if attrs:
attrs = " " + attrs
Expand Down Expand Up @@ -450,6 +451,18 @@ def wrap(f):
return wrap


_all_step_decos = None


def _get_all_step_decos():
global _all_step_decos
if _all_step_decos is None:
from .plugins import STEP_DECORATORS

_all_step_decos = {decotype.name: decotype for decotype in STEP_DECORATORS}
return _all_step_decos


def _attach_decorators(flow, decospecs):
"""
Attach decorators to all steps during runtime. This has the same
Expand All @@ -462,6 +475,7 @@ def _attach_decorators(flow, decospecs):
#
# Note that each step gets its own instance of the decorator class,
# so decorator can maintain step-specific state.

for step in flow:
_attach_decorators_to_step(step, decospecs)

Expand All @@ -472,9 +486,8 @@ def _attach_decorators_to_step(step, decospecs):
effect as if you defined the decorators statically in the source for
the step.
"""
from .plugins import STEP_DECORATORS

decos = {decotype.name: decotype for decotype in STEP_DECORATORS}
decos = _get_all_step_decos()

for decospec in decospecs:
splits = decospec.split(":", 1)
Expand Down
21 changes: 21 additions & 0 deletions metaflow/metaflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,14 @@
},
)


###
# Decorators
###
# Format is a space separated string of decospecs (what is passed
# using --with)
DECOSPECS = from_conf("DECOSPECS", "")

###
# AWS Batch configuration
###
Expand Down Expand Up @@ -496,6 +504,8 @@ def get_pinned_conda_libs(python_version, datastore_type):
try:
from metaflow.extension_support import get_modules

_TOGGLE_DECOSPECS = []

ext_modules = get_modules("config")
for m in ext_modules:
# We load into globals whatever we have in extension_module
Expand All @@ -519,8 +529,18 @@ def _new_get_pinned_conda_libs(
return d1

globals()[n] = _new_get_pinned_conda_libs
elif n == "TOGGLE_DECOSPECS":
if any([x.startswith("-") for x in o]):
raise ValueError("Removing decospecs is not currently supported")
if any(" " in x for x in o):
raise ValueError("Decospecs cannot contain spaces")
_TOGGLE_DECOSPECS.extend(o)
elif not n.startswith("__") and not isinstance(o, types.ModuleType):
globals()[n] = o
# If DECOSPECS is set, use that, else extrapolate from extensions
if not DECOSPECS:
DECOSPECS = " ".join(_TOGGLE_DECOSPECS)

finally:
# Erase all temporary names to avoid leaking things
for _n in [
Expand All @@ -537,6 +557,7 @@ def _new_get_pinned_conda_libs(
"v",
"f1",
"f2",
"_TOGGLE_DECOSPECS",
]:
try:
del globals()[_n]
Expand Down

0 comments on commit 3947a0b

Please sign in to comment.