Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry container creation on health check failures #154

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 21 additions & 35 deletions esdb/containers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,83 +136,69 @@ func (container *Container) Close() {
}
}

// TODO - Keep retrying when the healthcheck failed. We should try creating a new container instead of failing the test.
func getDatabase(t *testing.T, options *dockertest.RunOptions) *Container {
const maxContainerRetries = 5
const healthCheckRetries = 10

pool, err := dockertest.NewPool("")
if err != nil {
t.Fatalf("Could not connect to docker. Reason: %v", err)
}

isInsecure := GetEnvOrDefault("EVENTSTORE_INSECURE", "true") == "true"

if !isInsecure {
err = setTLSContext(options)
if err != nil {
t.Fatal(err)
}
}

fmt.Println("Starting docker container...")

var resource *dockertest.Resource
var endpoint string
retries := 0
retryLimit := 5

for retries < retryLimit {
retries += 1
for containerRetry := 0; containerRetry < maxContainerRetries; containerRetry++ {
resource, err = pool.RunWithOptions(options)
if err != nil {
t.Fatalf("Could not start resource. Reason: %v", err)
}

fmt.Printf("Started container with id: %v, name: %s\n",
resource.Container.ID,
resource.Container.Name)

fmt.Printf("Started container with ID: %v, name: %s\n", resource.Container.ID, resource.Container.Name)
endpoint = fmt.Sprintf("localhost:%s", resource.GetPort("2113/tcp"))

// Disable certificate verification
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
err = pool.Retry(func() error {
if resource != nil && resource.Container != nil {
containerInfo, containerError := pool.Client.InspectContainer(resource.Container.ID)
if containerError == nil && containerInfo.State.Running == false {
return fmt.Errorf("unexpected exit of container check the container logs for more information, container ID: %v", resource.Container.ID)
}
}

healthCheckSuccess := false

for healthRetry := 0; healthRetry < healthCheckRetries; healthRetry++ {
scheme := "https"
if isInsecure {
scheme = "http"
}

healthCheckEndpoint := fmt.Sprintf("%s://%s/health/alive", scheme, endpoint)
_, err := http.Get(healthCheckEndpoint)
return err
})

if err != nil {
log.Printf("[debug] healthCheck failed. Reason: %v\n", err)

closeErr := resource.Close()

if closeErr != nil && retries >= retryLimit {
t.Fatalf("Failed to closeConnection docker resource. Reason: %v", err)
if err == nil {
healthCheckSuccess = true
break
}

if retries < retryLimit {
log.Printf("[debug] heatlhCheck failed retrying...%v/%v", retries, retryLimit)
continue
}
time.Sleep(2 * time.Second)
}

t.Fatal("[debug] stopping docker resource")
} else {
log.Print("[debug] healthCheck succeeded!")
if healthCheckSuccess {
break
} else {
log.Printf("[debug] Health check failed for container %d/%d. Creating a new one...", containerRetry+1, maxContainerRetries)
resource.Close()
}
}

if !resource.Container.State.Running {
t.Fatalf("Failed to get a running container after %d attempts", maxContainerRetries)
}

return &Container{
Endpoint: endpoint,
Resource: resource,
Expand Down
Loading