From e172d985f73e0de5c1205ff8d10de1e2deb4d2c4 Mon Sep 17 00:00:00 2001 From: Riley Hun Date: Thu, 21 Sep 2023 23:28:03 -0700 Subject: [PATCH] handle single node use case --- metaflow/plugins/aws/batch/batch_client.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/metaflow/plugins/aws/batch/batch_client.py b/metaflow/plugins/aws/batch/batch_client.py index 9130e06ebb..0822f89c0f 100644 --- a/metaflow/plugins/aws/batch/batch_client.py +++ b/metaflow/plugins/aws/batch/batch_client.py @@ -320,14 +320,23 @@ def _register_job_definition( ) else: job_definition["containerProperties"]["linuxParameters"]["devices"] = [] - for i in range(int(efa)): + if (num_parallel or 0) > 1: + for i in range(int(efa)): + job_definition["containerProperties"]["linuxParameters"][ + "devices" + ].append( + { + "hostPath": "/dev/infiniband/uverbs{}".format(i), + "containerPath": "/dev/infiniband/uverbs{}".format(i), + "permissions": ["READ", "WRITE", "MKNOD"] + } + ) + else: job_definition["containerProperties"]["linuxParameters"][ "devices" ].append( { - "hostPath": "/dev/infiniband/uverbs{}".format(i), - "containerPath": "/dev/infiniband/uverbs{}".format(i), - "permissions": ["READ", "WRITE", "MKNOD"] + "hostPath": "/dev/infiniband/uverbs0" } )