Skip to content

Commit 65c6a2a

Browse files
authored
Delete duplicate ha tracker config in distributor (#7235)
Signed-off-by: SungJin1212 <tjdwls1201@gmail.com>
1 parent d645591 commit 65c6a2a

6 files changed

Lines changed: 22 additions & 85 deletions

File tree

docs/configuration/config-file-reference.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,24 +3075,24 @@ pool:
30753075
[health_check_ingesters: <boolean> | default = true]
30763076
30773077
ha_tracker:
3078-
# Enable the distributors HA tracker so that it can accept samples from
3079-
# Prometheus HA replicas gracefully (requires labels).
3078+
# Enable the HA tracker so that it can accept data from Prometheus HA replicas
3079+
# gracefully (requires labels).
30803080
# CLI flag: -distributor.ha-tracker.enable
30813081
[enable_ha_tracker: <boolean> | default = false]
30823082
3083-
# Update the timestamp in the KV store for a given cluster/replica only after
3084-
# this amount of time has passed since the current stored timestamp.
3083+
# The time interval that must pass since the last timestamp update in the KV
3084+
# store before updating it again for a given cluster.
30853085
# CLI flag: -distributor.ha-tracker.update-timeout
30863086
[ha_tracker_update_timeout: <duration> | default = 15s]
30873087
3088-
# Maximum jitter applied to the update timeout, in order to spread the HA
3089-
# heartbeats over time.
3088+
# The maximum jitter applied to the update timeout to spread KV store updates
3089+
# over time.
30903090
# CLI flag: -distributor.ha-tracker.update-timeout-jitter-max
30913091
[ha_tracker_update_timeout_jitter_max: <duration> | default = 5s]
30923092
3093-
# If we don't receive any samples from the accepted replica for a cluster in
3094-
# this amount of time we will failover to the next replica we receive a sample
3095-
# from. This value must be greater than the update timeout
3093+
# The timeout after which a new replica will be accepted if the currently
3094+
# elected replica stops sending data. This value must be greater than the
3095+
# update timeout plus the maximum jitter.
30963096
# CLI flag: -distributor.ha-tracker.failover-timeout
30973097
[ha_tracker_failover_timeout: <duration> | default = 30s]
30983098

pkg/distributor/distributor.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ type Distributor struct {
144144
type Config struct {
145145
PoolConfig PoolConfig `yaml:"pool"`
146146

147-
HATrackerConfig HATrackerConfig `yaml:"ha_tracker"`
147+
HATrackerConfig ha.HATrackerConfig `yaml:"ha_tracker"`
148148

149149
MaxRecvMsgSize int `yaml:"max_recv_msg_size"`
150150
OTLPMaxRecvMsgSize int `yaml:"otlp_max_recv_msg_size"`
@@ -207,7 +207,7 @@ type OTLPConfig struct {
207207
// RegisterFlags adds the flags required to config this to the given FlagSet
208208
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
209209
cfg.PoolConfig.RegisterFlags(f)
210-
cfg.HATrackerConfig.RegisterFlags(f)
210+
cfg.HATrackerConfig.RegisterFlagsWithPrefix("distributor.", "", f)
211211
cfg.DistributorRing.RegisterFlags(f)
212212

213213
f.IntVar(&cfg.MaxRecvMsgSize, "distributor.max-recv-msg-size", 100<<20, "remote_write API max receive message size (bytes).")
@@ -243,9 +243,7 @@ func (cfg *Config) Validate(limits validation.Limits) error {
243243
return errInvalidTenantShardSize
244244
}
245245

246-
haHATrackerConfig := cfg.HATrackerConfig.ToHATrackerConfig()
247-
248-
return haHATrackerConfig.Validate()
246+
return cfg.HATrackerConfig.Validate()
249247
}
250248

251249
const (
@@ -268,7 +266,7 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove
268266
Title: "Cortex HA Tracker Status",
269267
ReplicaGroupLabel: "Cluster",
270268
}
271-
haTracker, err := ha.NewHATracker(cfg.HATrackerConfig.ToHATrackerConfig(), limits, haTrackerStatusConfig, prometheus.WrapRegistererWithPrefix("cortex_", reg), "distributor-hatracker", log)
269+
haTracker, err := ha.NewHATracker(cfg.HATrackerConfig, limits, haTrackerStatusConfig, prometheus.WrapRegistererWithPrefix("cortex_", reg), "distributor-hatracker", log)
272270
if err != nil {
273271
return nil, err
274272
}

pkg/distributor/distributor_ha_tracker.go

Lines changed: 0 additions & 61 deletions
This file was deleted.

pkg/distributor/distributor_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3272,7 +3272,7 @@ func prepare(tb testing.TB, cfg prepConfig) ([]*Distributor, []*mockIngester, []
32723272
ringStore, closer := consul.NewInMemoryClient(codec, log.NewNopLogger(), nil)
32733273
tb.Cleanup(func() { assert.NoError(tb, closer.Close()) })
32743274
mock := kv.PrefixClient(ringStore, "prefix")
3275-
distributorCfg.HATrackerConfig = HATrackerConfig{
3275+
distributorCfg.HATrackerConfig = ha.HATrackerConfig{
32763276
EnableHATracker: true,
32773277
KVStore: kv.Config{Mock: mock},
32783278
UpdateTimeout: 100 * time.Millisecond,

pkg/ha/ha_tracker.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ func (cfg *HATrackerConfig) RegisterFlagsWithPrefix(flagPrefix string, kvPrefix
9292
finalKVPrefix = kvPrefix
9393
}
9494

95-
f.BoolVar(&cfg.EnableHATracker, finalFlagPrefix+"ha-tracker.enable", false, "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully.")
96-
f.DurationVar(&cfg.UpdateTimeout, finalFlagPrefix+"ha-tracker.update-timeout", 15*time.Second, "Update the timestamp in the KV store for a given cluster/replicaGroup only after this amount of time has passed since the current stored timestamp.")
97-
f.DurationVar(&cfg.UpdateTimeoutJitterMax, finalFlagPrefix+"ha-tracker.update-timeout-jitter-max", 5*time.Second, "Maximum jitter applied to the update timeout, in order to spread the HA heartbeats over time.")
98-
f.DurationVar(&cfg.FailoverTimeout, finalFlagPrefix+"ha-tracker.failover-timeout", 30*time.Second, "If we don't receive any data from the accepted replica for a cluster/replicaGroup in this amount of time we will failover to the next replica we receive a sample from. This value must be greater than the update timeout")
95+
f.BoolVar(&cfg.EnableHATracker, finalFlagPrefix+"ha-tracker.enable", false, "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully (requires labels).")
96+
f.DurationVar(&cfg.UpdateTimeout, finalFlagPrefix+"ha-tracker.update-timeout", 15*time.Second, "The time interval that must pass since the last timestamp update in the KV store before updating it again for a given cluster.")
97+
f.DurationVar(&cfg.UpdateTimeoutJitterMax, finalFlagPrefix+"ha-tracker.update-timeout-jitter-max", 5*time.Second, "The maximum jitter applied to the update timeout to spread KV store updates over time.")
98+
f.DurationVar(&cfg.FailoverTimeout, finalFlagPrefix+"ha-tracker.failover-timeout", 30*time.Second, "The timeout after which a new replica will be accepted if the currently elected replica stops sending data. This value must be greater than the update timeout plus the maximum jitter.")
9999
f.BoolVar(&cfg.EnableStartupSync, finalFlagPrefix+"ha-tracker.enable-startup-sync", false, "[Experimental] If enabled, fetches all tracked keys on startup to populate the local cache. This prevents duplicate GET calls for the same key while the cache is cold, but could cause a spike in GET requests during initialization if the number of tracked keys is large.")
100100

101101
// We want the ability to use different Consul instances for the ring and

schemas/cortex-config-schema.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3718,7 +3718,7 @@
37183718
"properties": {
37193719
"enable_ha_tracker": {
37203720
"default": false,
3721-
"description": "Enable the distributors HA tracker so that it can accept samples from Prometheus HA replicas gracefully (requires labels).",
3721+
"description": "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully (requires labels).",
37223722
"type": "boolean",
37233723
"x-cli-flag": "distributor.ha-tracker.enable"
37243724
},
@@ -3730,21 +3730,21 @@
37303730
},
37313731
"ha_tracker_failover_timeout": {
37323732
"default": "30s",
3733-
"description": "If we don't receive any samples from the accepted replica for a cluster in this amount of time we will failover to the next replica we receive a sample from. This value must be greater than the update timeout",
3733+
"description": "The timeout after which a new replica will be accepted if the currently elected replica stops sending data. This value must be greater than the update timeout plus the maximum jitter.",
37343734
"type": "string",
37353735
"x-cli-flag": "distributor.ha-tracker.failover-timeout",
37363736
"x-format": "duration"
37373737
},
37383738
"ha_tracker_update_timeout": {
37393739
"default": "15s",
3740-
"description": "Update the timestamp in the KV store for a given cluster/replica only after this amount of time has passed since the current stored timestamp.",
3740+
"description": "The time interval that must pass since the last timestamp update in the KV store before updating it again for a given cluster.",
37413741
"type": "string",
37423742
"x-cli-flag": "distributor.ha-tracker.update-timeout",
37433743
"x-format": "duration"
37443744
},
37453745
"ha_tracker_update_timeout_jitter_max": {
37463746
"default": "5s",
3747-
"description": "Maximum jitter applied to the update timeout, in order to spread the HA heartbeats over time.",
3747+
"description": "The maximum jitter applied to the update timeout to spread KV store updates over time.",
37483748
"type": "string",
37493749
"x-cli-flag": "distributor.ha-tracker.update-timeout-jitter-max",
37503750
"x-format": "duration"

0 commit comments

Comments
 (0)