From 40c0b4c8c3133948aca941d47b5d96649cc88dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Mudrini=C4=87?= Date: Wed, 3 Apr 2019 00:25:44 +0200 Subject: [PATCH] Implement Retryable tasks (#328) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marko Mudrinić --- pkg/installer/installation/install.go | 42 +++++++++--------- pkg/task/task.go | 63 +++++++++++++++++++++++++++ pkg/upgrader/upgrade/upgrade.go | 32 +++++++------- 3 files changed, 98 insertions(+), 39 deletions(-) create mode 100644 pkg/task/task.go diff --git a/pkg/installer/installation/install.go b/pkg/installer/installation/install.go index 4443153f5..1dbd3fec5 100644 --- a/pkg/installer/installation/install.go +++ b/pkg/installer/installation/install.go @@ -21,6 +21,7 @@ import ( "github.com/kubermatic/kubeone/pkg/certificate" "github.com/kubermatic/kubeone/pkg/features" + "github.com/kubermatic/kubeone/pkg/task" "github.com/kubermatic/kubeone/pkg/templates/machinecontroller" "github.com/kubermatic/kubeone/pkg/util" ) @@ -28,31 +29,28 @@ import ( // Install performs all the steps required to install Kubernetes on // an empty, pristine machine. func Install(ctx *util.Context) error { - installSteps := []struct { - fn func(*util.Context) error - errMsg string - }{ - {fn: installPrerequisites, errMsg: "failed to install prerequisites"}, - {fn: generateKubeadm, errMsg: "failed to generate kubeadm config files"}, - {fn: kubeadmCertsOnLeader, errMsg: "failed to provision certs and etcd on leader"}, - {fn: certificate.DownloadCA, errMsg: "unable to download ca from leader"}, - {fn: deployCA, errMsg: "unable to deploy ca on nodes"}, - {fn: kubeadmCertsOnFollower, errMsg: "failed to provision certs and etcd on followers"}, - {fn: initKubernetesLeader, errMsg: "failed to init kubernetes on leader"}, - {fn: joinControlplaneNode, errMsg: "unable to join other masters a cluster"}, - {fn: copyKubeconfig, errMsg: "unable to copy kubeconfig to home directory"}, - {fn: saveKubeconfig, errMsg: "unable to save kubeconfig to the local machine"}, - {fn: util.BuildKubernetesClientset, errMsg: "unable to build kubernetes clientset"}, - {fn: features.Activate, errMsg: "unable to activate features"}, - {fn: applyCanalCNI, errMsg: "failed to install cni plugin canal"}, - {fn: machinecontroller.EnsureMachineController, errMsg: "failed to install machine-controller"}, - {fn: machinecontroller.WaitReady, errMsg: "failed to wait for machine-controller"}, - {fn: createWorkerMachines, errMsg: "failed to create worker machines"}, + installSteps := []task.Task{ + {Fn: installPrerequisites, ErrMsg: "failed to install prerequisites"}, + {Fn: generateKubeadm, ErrMsg: "failed to generate kubeadm config files"}, + {Fn: kubeadmCertsOnLeader, ErrMsg: "failed to provision certs and etcd on leader"}, + {Fn: certificate.DownloadCA, ErrMsg: "unable to download ca from leader", Retries: 3}, + {Fn: deployCA, ErrMsg: "unable to deploy ca on nodes", Retries: 3}, + {Fn: kubeadmCertsOnFollower, ErrMsg: "failed to provision certs and etcd on followers"}, + {Fn: initKubernetesLeader, ErrMsg: "failed to init kubernetes on leader"}, + {Fn: joinControlplaneNode, ErrMsg: "unable to join other masters a cluster"}, + {Fn: copyKubeconfig, ErrMsg: "unable to copy kubeconfig to home directory", Retries: 3}, + {Fn: saveKubeconfig, ErrMsg: "unable to save kubeconfig to the local machine", Retries: 3}, + {Fn: util.BuildKubernetesClientset, ErrMsg: "unable to build kubernetes clientset", Retries: 3}, + {Fn: features.Activate, ErrMsg: "unable to activate features"}, + {Fn: applyCanalCNI, ErrMsg: "failed to install cni plugin canal", Retries: 3}, + {Fn: machinecontroller.EnsureMachineController, ErrMsg: "failed to install machine-controller", Retries: 3}, + {Fn: machinecontroller.WaitReady, ErrMsg: "failed to wait for machine-controller", Retries: 3}, + {Fn: createWorkerMachines, ErrMsg: "failed to create worker machines", Retries: 3}, } for _, step := range installSteps { - if err := step.fn(ctx); err != nil { - return errors.Wrap(err, step.errMsg) + if err := step.Run(ctx); err != nil { + return errors.Wrap(err, step.ErrMsg) } } diff --git a/pkg/task/task.go b/pkg/task/task.go new file mode 100644 index 000000000..4357c1092 --- /dev/null +++ b/pkg/task/task.go @@ -0,0 +1,63 @@ +/* +Copyright 2019 The KubeOne Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package task + +import ( + "time" + + "github.com/kubermatic/kubeone/pkg/util" + + "k8s.io/apimachinery/pkg/util/wait" +) + +// defaultRetryBackoff is backoff with with duration of 5 seconds and factor of 2.0 +func defaultRetryBackoff(retries int) wait.Backoff { + return wait.Backoff{ + Steps: retries, + Duration: 5 * time.Second, + Factor: 2.0, + } +} + +// Task is a runnable task +type Task struct { + Fn func(*util.Context) error + ErrMsg string + Retries int +} + +// RunTask runs a task +func (t *Task) Run(ctx *util.Context) error { + if t.Retries == 0 { + t.Retries = 1 + } + backoff := defaultRetryBackoff(t.Retries) + + var lastError error + err := wait.ExponentialBackoff(backoff, func() (bool, error) { + lastError = t.Fn(ctx) + if lastError != nil { + ctx.Logger.Warn("Task failed, retrying…") + return false, nil + } + return true, nil + }) + if err == wait.ErrWaitTimeout { + err = lastError + } + return err +} diff --git a/pkg/upgrader/upgrade/upgrade.go b/pkg/upgrader/upgrade/upgrade.go index 240dca3ea..6e7af55d6 100644 --- a/pkg/upgrader/upgrade/upgrade.go +++ b/pkg/upgrader/upgrade/upgrade.go @@ -23,6 +23,7 @@ import ( "github.com/kubermatic/kubeone/pkg/certificate" "github.com/kubermatic/kubeone/pkg/features" + "github.com/kubermatic/kubeone/pkg/task" "github.com/kubermatic/kubeone/pkg/templates/machinecontroller" "github.com/kubermatic/kubeone/pkg/util" ) @@ -42,26 +43,23 @@ const ( // cluster provisioned using KubeOne func Upgrade(ctx *util.Context) error { // commonSteps are same for all worker nodes and they are safe to be run in parallel - commonSteps := []struct { - fn func(ctx *util.Context) error - errMsg string - }{ - {fn: util.BuildKubernetesClientset, errMsg: "unable to build kubernetes clientset"}, - {fn: determineHostname, errMsg: "unable to determine hostname"}, - {fn: determineOS, errMsg: "unable to determine operating system"}, - {fn: runPreflightChecks, errMsg: "preflight checks failed"}, - {fn: upgradeLeader, errMsg: "unable to upgrade leader control plane"}, - {fn: upgradeFollower, errMsg: "unable to upgrade follower control plane"}, - {fn: features.Activate, errMsg: "unable to activate features"}, - {fn: certificate.DownloadCA, errMsg: "unable to download ca from leader"}, - {fn: machinecontroller.EnsureMachineController, errMsg: "failed to update machine-controller"}, - {fn: machinecontroller.WaitReady, errMsg: "failed to wait for machine-controller"}, - {fn: upgradeMachineDeployments, errMsg: "unable to upgrade MachineDeployments"}, + commonSteps := []task.Task{ + {Fn: util.BuildKubernetesClientset, ErrMsg: "unable to build kubernetes clientset"}, + {Fn: determineHostname, ErrMsg: "unable to determine hostname"}, + {Fn: determineOS, ErrMsg: "unable to determine operating system"}, + {Fn: runPreflightChecks, ErrMsg: "preflight checks failed"}, + {Fn: upgradeLeader, ErrMsg: "unable to upgrade leader control plane", Retries: 3}, + {Fn: upgradeFollower, ErrMsg: "unable to upgrade follower control plane", Retries: 3}, + {Fn: features.Activate, ErrMsg: "unable to activate features"}, + {Fn: certificate.DownloadCA, ErrMsg: "unable to download ca from leader", Retries: 3}, + {Fn: machinecontroller.EnsureMachineController, ErrMsg: "failed to update machine-controller", Retries: 3}, + {Fn: machinecontroller.WaitReady, ErrMsg: "failed to wait for machine-controller", Retries: 3}, + {Fn: upgradeMachineDeployments, ErrMsg: "unable to upgrade MachineDeployments", Retries: 3}, } for _, step := range commonSteps { - if err := step.fn(ctx); err != nil { - return errors.Wrap(err, step.errMsg) + if err := step.Run(ctx); err != nil { + return errors.Wrap(err, step.ErrMsg) } }