Skip to content

Commit

Permalink
Implement Retryable tasks (#328)
Browse files Browse the repository at this point in the history
Signed-off-by: Marko Mudrinić <mudrinic.mare@gmail.com>
  • Loading branch information
xmudrii authored and kubermatic-bot committed Apr 2, 2019
1 parent cbda3e6 commit 40c0b4c
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 39 deletions.
42 changes: 20 additions & 22 deletions pkg/installer/installation/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,38 +21,36 @@ import (

"github.com/kubermatic/kubeone/pkg/certificate"
"github.com/kubermatic/kubeone/pkg/features"
"github.com/kubermatic/kubeone/pkg/task"
"github.com/kubermatic/kubeone/pkg/templates/machinecontroller"
"github.com/kubermatic/kubeone/pkg/util"
)

// Install performs all the steps required to install Kubernetes on
// an empty, pristine machine.
func Install(ctx *util.Context) error {
installSteps := []struct {
fn func(*util.Context) error
errMsg string
}{
{fn: installPrerequisites, errMsg: "failed to install prerequisites"},
{fn: generateKubeadm, errMsg: "failed to generate kubeadm config files"},
{fn: kubeadmCertsOnLeader, errMsg: "failed to provision certs and etcd on leader"},
{fn: certificate.DownloadCA, errMsg: "unable to download ca from leader"},
{fn: deployCA, errMsg: "unable to deploy ca on nodes"},
{fn: kubeadmCertsOnFollower, errMsg: "failed to provision certs and etcd on followers"},
{fn: initKubernetesLeader, errMsg: "failed to init kubernetes on leader"},
{fn: joinControlplaneNode, errMsg: "unable to join other masters a cluster"},
{fn: copyKubeconfig, errMsg: "unable to copy kubeconfig to home directory"},
{fn: saveKubeconfig, errMsg: "unable to save kubeconfig to the local machine"},
{fn: util.BuildKubernetesClientset, errMsg: "unable to build kubernetes clientset"},
{fn: features.Activate, errMsg: "unable to activate features"},
{fn: applyCanalCNI, errMsg: "failed to install cni plugin canal"},
{fn: machinecontroller.EnsureMachineController, errMsg: "failed to install machine-controller"},
{fn: machinecontroller.WaitReady, errMsg: "failed to wait for machine-controller"},
{fn: createWorkerMachines, errMsg: "failed to create worker machines"},
installSteps := []task.Task{
{Fn: installPrerequisites, ErrMsg: "failed to install prerequisites"},
{Fn: generateKubeadm, ErrMsg: "failed to generate kubeadm config files"},
{Fn: kubeadmCertsOnLeader, ErrMsg: "failed to provision certs and etcd on leader"},
{Fn: certificate.DownloadCA, ErrMsg: "unable to download ca from leader", Retries: 3},
{Fn: deployCA, ErrMsg: "unable to deploy ca on nodes", Retries: 3},
{Fn: kubeadmCertsOnFollower, ErrMsg: "failed to provision certs and etcd on followers"},
{Fn: initKubernetesLeader, ErrMsg: "failed to init kubernetes on leader"},
{Fn: joinControlplaneNode, ErrMsg: "unable to join other masters a cluster"},
{Fn: copyKubeconfig, ErrMsg: "unable to copy kubeconfig to home directory", Retries: 3},
{Fn: saveKubeconfig, ErrMsg: "unable to save kubeconfig to the local machine", Retries: 3},
{Fn: util.BuildKubernetesClientset, ErrMsg: "unable to build kubernetes clientset", Retries: 3},
{Fn: features.Activate, ErrMsg: "unable to activate features"},
{Fn: applyCanalCNI, ErrMsg: "failed to install cni plugin canal", Retries: 3},
{Fn: machinecontroller.EnsureMachineController, ErrMsg: "failed to install machine-controller", Retries: 3},
{Fn: machinecontroller.WaitReady, ErrMsg: "failed to wait for machine-controller", Retries: 3},
{Fn: createWorkerMachines, ErrMsg: "failed to create worker machines", Retries: 3},
}

for _, step := range installSteps {
if err := step.fn(ctx); err != nil {
return errors.Wrap(err, step.errMsg)
if err := step.Run(ctx); err != nil {
return errors.Wrap(err, step.ErrMsg)
}
}

Expand Down
63 changes: 63 additions & 0 deletions pkg/task/task.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
Copyright 2019 The KubeOne Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package task

import (
"time"

"github.com/kubermatic/kubeone/pkg/util"

"k8s.io/apimachinery/pkg/util/wait"
)

// defaultRetryBackoff is backoff with with duration of 5 seconds and factor of 2.0
func defaultRetryBackoff(retries int) wait.Backoff {
return wait.Backoff{
Steps: retries,
Duration: 5 * time.Second,
Factor: 2.0,
}
}

// Task is a runnable task
type Task struct {
Fn func(*util.Context) error
ErrMsg string
Retries int
}

// RunTask runs a task
func (t *Task) Run(ctx *util.Context) error {
if t.Retries == 0 {
t.Retries = 1
}
backoff := defaultRetryBackoff(t.Retries)

var lastError error
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
lastError = t.Fn(ctx)
if lastError != nil {
ctx.Logger.Warn("Task failed, retrying…")
return false, nil
}
return true, nil
})
if err == wait.ErrWaitTimeout {
err = lastError
}
return err
}
32 changes: 15 additions & 17 deletions pkg/upgrader/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/kubermatic/kubeone/pkg/certificate"
"github.com/kubermatic/kubeone/pkg/features"
"github.com/kubermatic/kubeone/pkg/task"
"github.com/kubermatic/kubeone/pkg/templates/machinecontroller"
"github.com/kubermatic/kubeone/pkg/util"
)
Expand All @@ -42,26 +43,23 @@ const (
// cluster provisioned using KubeOne
func Upgrade(ctx *util.Context) error {
// commonSteps are same for all worker nodes and they are safe to be run in parallel
commonSteps := []struct {
fn func(ctx *util.Context) error
errMsg string
}{
{fn: util.BuildKubernetesClientset, errMsg: "unable to build kubernetes clientset"},
{fn: determineHostname, errMsg: "unable to determine hostname"},
{fn: determineOS, errMsg: "unable to determine operating system"},
{fn: runPreflightChecks, errMsg: "preflight checks failed"},
{fn: upgradeLeader, errMsg: "unable to upgrade leader control plane"},
{fn: upgradeFollower, errMsg: "unable to upgrade follower control plane"},
{fn: features.Activate, errMsg: "unable to activate features"},
{fn: certificate.DownloadCA, errMsg: "unable to download ca from leader"},
{fn: machinecontroller.EnsureMachineController, errMsg: "failed to update machine-controller"},
{fn: machinecontroller.WaitReady, errMsg: "failed to wait for machine-controller"},
{fn: upgradeMachineDeployments, errMsg: "unable to upgrade MachineDeployments"},
commonSteps := []task.Task{
{Fn: util.BuildKubernetesClientset, ErrMsg: "unable to build kubernetes clientset"},
{Fn: determineHostname, ErrMsg: "unable to determine hostname"},
{Fn: determineOS, ErrMsg: "unable to determine operating system"},
{Fn: runPreflightChecks, ErrMsg: "preflight checks failed"},
{Fn: upgradeLeader, ErrMsg: "unable to upgrade leader control plane", Retries: 3},
{Fn: upgradeFollower, ErrMsg: "unable to upgrade follower control plane", Retries: 3},
{Fn: features.Activate, ErrMsg: "unable to activate features"},
{Fn: certificate.DownloadCA, ErrMsg: "unable to download ca from leader", Retries: 3},
{Fn: machinecontroller.EnsureMachineController, ErrMsg: "failed to update machine-controller", Retries: 3},
{Fn: machinecontroller.WaitReady, ErrMsg: "failed to wait for machine-controller", Retries: 3},
{Fn: upgradeMachineDeployments, ErrMsg: "unable to upgrade MachineDeployments", Retries: 3},
}

for _, step := range commonSteps {
if err := step.fn(ctx); err != nil {
return errors.Wrap(err, step.errMsg)
if err := step.Run(ctx); err != nil {
return errors.Wrap(err, step.ErrMsg)
}
}

Expand Down

0 comments on commit 40c0b4c

Please sign in to comment.