chproxy/config/testdata/full.yml at e47a098d41acda52aad31bd6ae0140e59ad3746b · ContentSquare/chproxy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# Whether to print debug logs.
#
# By default debug logs are disabled.
log_debug: true

# Optional log masks, for example replace s3('', '', 'secret', '') -> s3('', '', '******', '')
log_masks:
  - regex: (s3\(\s*'(?:(?:\\'|[^'])*)'\s*,\s*'(?:(?:\\'|[^'])*)'\s*,\s*')((?:\\'|[^'])*)(')
    replacement: $1******$3

# Whether to ignore security checks during config parsing.
#
# By default security checks are enabled.
hack_me_please: true

# Optional response cache configs.
#
# Multiple distinct caches with different settings may be configured.
caches:
    # Cache name, which may be passed into `cache` option on the `user` level.
    #
    # Multiple users may share the same cache.
  - name: "longterm"

    mode: "file_system"
    file_system:
      # Maximum cache size.
      # `Kb`, `Mb`, `Gb` and `Tb` suffixes may be used.
      max_size: 100Gb

      # Path to directory where cached responses will be stored.
      dir: "/path/to/longterm/cachedir"

    max_payload_size: 100Gb

    # Expiration time for cached responses.
    expire: 1h

    # When multiple requests with identical query simultaneously hit `chproxy`
    # and there is no cached response for the query, then only a single
    # request will be proxied to clickhouse. Other requests will wait
    # for the cached response during this grace duration.
    # This is known as protection from `thundering herd` problem.
    #
    # By default `grace_time` is 5s. Negative value disables the protection
    # from `thundering herd` problem.
    grace_time: 20s

  - name: "shortterm"
    mode: "file_system"
    file_system:
      max_size: 100Mb
      dir: "/path/to/shortterm/cachedir"
    max_payload_size: 100Mb
    shared_with_all_users: true
    expire: 10s
  - name: redis-cache
    mode: redis
    expire: 10s
    redis:
      username: chproxy
      password: password
      pool_size: 10
      addresses:
        - 127.0.0.1:16379
    max_payload_size: 107374182400
    shared_with_all_users: true

# Optional network lists, might be used as values for `allowed_networks`.
network_groups:
  - name: "office"
    # Each item may contain either IP or IP subnet mask.
    networks: ["127.0.0.0/24", "10.10.0.1"]

  - name: "reporting-apps"
    networks: ["10.10.10.0/24"]

max_error_reason_size: 100Mb

# Optional lists of query params to send with each proxied request to ClickHouse.
# These lists may be used for overriding ClickHouse settings on a per-user basis.
param_groups:
    # Group name, which may be passed into `params` option on the `user` level.
  - name: "cron-job"
    # List of key-value params to send
    params:
      - key: "max_memory_usage"
        value: "40000000000"

      - key: "max_bytes_before_external_group_by"
        value: "20000000000"

  - name: "web"
    params:
      - key: "max_memory_usage"
        value: "5000000000"

      - key: "max_columns_to_read"
        value: "30"

      - key: "max_execution_time"
        value: "30"

# Settings for `chproxy` connection pool to ClickHouse.
connection_pool:
  # Total number of connections to keep open
  # when they are not needed for clients.
  # Consider increasing if many sessions in TIME_WAIT and Bad Gateway is
  # often returned by chporxy.
  max_idle_conns: 100
  # Number of connections per ClickHouse host to keep open
  # when they are not needed for clients.
  max_idle_conns_per_host: 2
  # Idle Timeout For Connections To Clickhouse. This value must be < the Clickhouse
  # `keep_alive_timeout` to prevent graceful disconnection race-conditions that can
  # result in Bad Gateway (unable to reach host errors)
  idle_conn_timeout: 9s

# Settings for `chproxy` input interfaces.
server:
  # Configs for input http interface.
  # The interface works only if this section is present.
  http:
    # TCP address to listen to for http.
    # May be in the form IP:port . IP part is optional.
    listen_addr: ":9090"

    # List of allowed networks or network_groups.
    # Each item may contain IP address, IP subnet mask or a name
    # from `network_groups`.
    # By default requests are accepted from all the IPs.
    allowed_networks: ["office", "reporting-apps", "1.2.3.4"]

    # ReadTimeout is the maximum duration for proxy to reading the entire
    # request, including the body.
    # Default value is 1m
    read_timeout: 5m

    # WriteTimeout is the maximum duration for proxy before timing out writes of the response.
    # Default is largest MaxExecutionTime + MaxQueueTime value from Users or Clusters
    write_timeout: 10m

    # IdleTimeout is the maximum amount of time for proxy to wait for the next request.
    # Default is 10m
    idle_timeout: 20m

  # Configs for input https interface.
  # The interface works only if this section is present.
  https:
    # TCP address to listen to for https.
    listen_addr: ":443"

    # Paths to TLS cert and key files.
    # cert_file: "cert_file"
    # key_file: "key_file"

    # Letsencrypt config.
    # Certificates are automatically issued and renewed if this section
    # is present.
    # There is no need in cert_file and key_file if this section is present.
    # Autocert requires application to listen on :80 port for certificate generation
    autocert:
      # Path to the directory where autocert certs are cached.
      cache_dir: "certs_dir"

      # The list of host names proxy is allowed to respond to.
      # See https://godoc.org/golang.org/x/crypto/acme/autocert#HostPolicy
      allowed_hosts: ["example.com"]

  # Metrics in prometheus format are exposed on the `/metrics` path.
  # Access to `/metrics` endpoint may be restricted in this section.
  # By default access to `/metrics` is unrestricted.
  metrics:
    allowed_networks: ["office"]
    namespace: ""

  # Proxy settings enable parsing proxy headers in cases where
  # CHProxy is run behind another proxy.
  proxy:
    enable: true
    header: CF-Connecting-IP

# Configs for input users.
users:
    # Name and password are used to authorize access via BasicAuth or
    # via `user`/`password` query params.
    # Password is optional. By default empty password is used.
  - name: "web"
    password: "****"

    # Requests from the user are routed to this cluster.
    to_cluster: "first cluster"

    # Input user is substituted by the given output user from `to_cluster`
    # before proxying the request.
    to_user: "web"

    # Whether to deny input requests over HTTP.
    deny_http: true

    # Whether to allow `CORS` requests like `tabix` does.
    # By default `CORS` requests are denied for security reasons.
    allow_cors: true

    # Requests per minute limit for the given input user.
    #
    # By default there is no per-minute limit.
    requests_per_minute: 4

    # Response cache config name to use.
    #
    # By default responses aren't cached.
    cache: "longterm"

    # An optional group of params to send to ClickHouse with each proxied request.
    # These params may be set in param_groups block.
    #
    # By default no additional params are sent to ClickHouse.
    params: "web"

    # The maximum number of requests that may wait for their chance
    # to be executed because they cannot run now due to the current limits.
    #
    # This option may be useful for handling request bursts from `tabix`
    # or `clickhouse-grafana`.
    #
    # By default all the requests are immediately executed without
    # waiting in the queue.
    max_queue_size: 100

    # The maximum duration the queued requests may wait for their chance
    # to be executed.
    # This option makes sense only if max_queue_size is set.
    # By default requests wait for up to 10 seconds in the queue.
    max_queue_time: 35s

  - name: "default"
    to_cluster: "second cluster"
    to_user: "default"
    allowed_networks: ["office", "1.2.3.0/24"]

    # The maximum number of concurrently running queries for the user.
    #
    # By default there is no limit on the number of concurrently
    # running queries.
    max_concurrent_queries: 4

    # The maximum query duration for the user.
    # Timed out queries are forcibly killed via `KILL QUERY`.
    #
    # By default set to 120s.
    max_execution_time: 1m

    # Whether to deny input requests over HTTPS.
    deny_https: true

# Configs for ClickHouse clusters.
clusters:
    # The cluster name is used in `to_cluster`.
  - name: "first cluster"

    # Protocol to use for communicating with cluster nodes.
    # Currently supported values are `http` or `https`.
    # By default `http` is used.
    scheme: "http"

    # Cluster node addresses.
    # Requests are evenly distributed among them.
    nodes: ["127.0.0.1:8123", "shard2:8123"]

    # Timed out queries are killed using this user.
    # By default `default` user is used.
    kill_query_user:
      name: "default"
      password: "***"

    # Retry query when it cannot be run by the current node.
    # By default 0 is used.
    retry_number: 1

    # Configuration for cluster users.
    users:
        # The user name is used in `to_user`.
      - name: "web"
        password: "password"
        max_concurrent_queries: 4
        max_execution_time: 1m

  - name: "second cluster"
    scheme: "https"

    # The cluster may contain multiple replicas instead of flat nodes.
    #
    # Chproxy selects the least loaded node among the least loaded replicas.
    replicas:
      - name: "replica1"
        nodes: ["127.0.1.1:8443", "127.0.1.2:8443"]
      - name: "replica2"
        nodes: ["127.0.2.1:8443", "127.0.2.2:8443"]

    # Retry query when it cannot be run by the current node.
    # By default 0 is used.
    retry_number: 2

    users:
      - name: "default"
        max_concurrent_queries: 4
        max_execution_time: 1m

      - name: "web"
        max_concurrent_queries: 4
        max_execution_time: 10s
        requests_per_minute: 10
        max_queue_size: 50
        max_queue_time: 70s
        allowed_networks: ["office"]

    heartbeat:
      # Username for heartbeats
      # No default value
      # If ommited (by default),
      # credentials of the first user in clusters.users will be used for heart beat requests to clickhouse.
      # Note, that credentials of a wildcarded user cannot be used for heat beat
      user: "hbuser"

      # Password for heartbeats
      # No default value
      password: "hbpassword"

  - name: "third cluster"
    nodes: ["third1:8123", "third2:8123"]

    # Retry query when it cannot be run by the current node.
    # By default 0 is used.
    retry_number: 3

    # User configuration for heart beat requests.
    # Credentials of the first user in clusters.users will be used for heart beat requests to clickhouse.
    heartbeat:
      # An interval for checking all cluster nodes for availability
      # By default each node is checked for every 5 seconds.
      interval: 2m

      # A timeout of wait response from cluster nodes
      # By default 3s
      timeout: 10s

      # The parameter to set the URI to request in a health check
      # By default "/ping"
      request: "/?query=SELECT%201"

      # Reference response from clickhouse on health check request
      # By default "Ok.\n"
      response: "Ok.\n"