Skip to content

Commit

Permalink
Mark initial service health check as passing (#138)
Browse files Browse the repository at this point in the history
* Mark initial service health check as passing

Sometimes for few seconds after deployment new service version
there is no healthy instances of this service in consul.
Currently consul hook registers new service instance into consul
right after marathon health check passess. According to tests I performed
and consul documentation registered service's health check is in critical
state after registration. It stays in this state until first health check
invocation (in practice for few seconds). Consul hook ignores this
behaviour and removes old service instance from consul immediately.
In result there is no healthy service instances in consul for few seconds.

The best way to fix this problem is to register new service instance with
it's healthcheck status equal to passing. As consul health check is the same
as marathon healthcheck and consul registration is triggered by marathon
healtcheck status change to passing we can assume consul healtcheck status is
also passing.
  • Loading branch information
franek1709 authored Mar 18, 2020
1 parent 267511b commit 8fa54b2
Showing 1 changed file with 22 additions and 17 deletions.
39 changes: 22 additions & 17 deletions hook/consul/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ type Config struct {
// > should be unique for every Marathon-cluster connected to Consul
// https://github.com/allegro/marathon-consul/blob/1.4.2/config/config.go#L74
ConsulGlobalTag string `default:"marathon" envconfig:"consul_global_tag"`
// Status that will be set for registered service health check
// By default we assume service health was checked initially by marathon
// It will be set to passing.
InitialHealthCheckStatus string `default:"passing" envconfig:"initial_health_check_status"`
}

// HandleEvent calls appropriate hook functions that correspond to supported
Expand Down Expand Up @@ -142,7 +146,7 @@ func (h *Hook) RegisterIntoConsul(taskInfo mesosutils.TaskInfo) error {
Address: runenv.IP().String(),
EnableTagOverride: false,
Checks: api.AgentServiceChecks{},
Check: generateHealthCheck(taskInfo.GetHealthCheck(), int(serviceData.port)),
Check: h.generateHealthCheck(taskInfo.GetHealthCheck(), int(serviceData.port)),
}

if err := agent.ServiceRegister(&serviceRegistration); err != nil {
Expand Down Expand Up @@ -176,6 +180,23 @@ func (h *Hook) DeregisterFromConsul(taskInfo mesosutils.TaskInfo) error {
return nil
}

func (h *Hook) generateHealthCheck(mesosCheck mesosutils.HealthCheck, port int) *api.AgentServiceCheck {
check := api.AgentServiceCheck{}
check.Interval = mesosCheck.Interval.String()
check.Timeout = mesosCheck.Timeout.String()
check.Status = h.config.InitialHealthCheckStatus

switch mesosCheck.Type {
case mesosutils.HTTP:
check.HTTP = generateURL(mesosCheck.HTTP.Path, port)
return &check
case mesosutils.TCP:
check.TCP = fmt.Sprintf("%s:%d", serviceHost, port)
return &check
}
return nil
}

func getPlaceholders(ports []mesos.Port) map[string]string {
placeholders := map[string]string{}
for _, port := range ports {
Expand Down Expand Up @@ -218,22 +239,6 @@ func marathonAppNameToServiceName(name mesosutils.TaskID) string {
return sanitizedName
}

func generateHealthCheck(mesosCheck mesosutils.HealthCheck, port int) *api.AgentServiceCheck {
check := api.AgentServiceCheck{}
check.Interval = mesosCheck.Interval.String()
check.Timeout = mesosCheck.Timeout.String()

switch mesosCheck.Type {
case mesosutils.HTTP:
check.HTTP = generateURL(mesosCheck.HTTP.Path, port)
return &check
case mesosutils.TCP:
check.TCP = fmt.Sprintf("%s:%d", serviceHost, port)
return &check
}
return nil
}

func generateURL(path string, port int) string {
var checkURL url.URL
checkURL.Scheme = "http"
Expand Down

0 comments on commit 8fa54b2

Please sign in to comment.