-
Notifications
You must be signed in to change notification settings - Fork 56
Expand file tree
/
Copy pathpg_exporter.yml
More file actions
1923 lines (1800 loc) · 140 KB
/
pg_exporter.yml
File metadata and controls
1923 lines (1800 loc) · 140 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#==============================================================#
# Desc : pg_exporter metrics collector definition (Legacy)
# Ver : PostgreSQL 9.1 ~ 9.6 and pgbouncer 1.9~1.25+
# Ctime : 2019-12-09
# Mtime : 2026-02-07
# Homepage : https://pigsty.io
# Author : Ruohang Feng (rh@vonng.com)
# License : Apache-2.0 @ https://github.com/pgsty/pg_exporter
# Copyright : 2018-2026 Ruohang Feng / Vonng (rh@vonng.com)
#==============================================================#
#==============================================================#
# 1. Config File
#==============================================================#
# The configuration file for pg_exporter is a YAML file.
# Default configurations are retrieved via following precedence:
# 1. command line args: --config=<config path>
# 2. environment variables: PG_EXPORTER_CONFIG=<config path>
# 3. pg_exporter.yml (Current directory)
# 4. /etc/pg_exporter.yml (config file)
# 5. /etc/pg_exporter (config dir)
#==============================================================#
# 2. Config Format
#==============================================================#
# pg_exporter config could be a single YAML file, or a directory containing a series of separated YAML files.
# Each YAML config file consists of one or more metrics Collector definition, which are top-level objects.
# If a directory is provided, all YAML in that directory will be merged in alphabetic order.
#==============================================================#
# 3. Version Compatibility
#==============================================================#
# Each collector has two optional version compatibility parameters: `min_version` and `max_version`.
# These two parameters specify the version compatibility of the collector. If target postgres/pgbouncer's
# version is less than `min_version`, or higher than `max_version`, the collector will not be installed.
#
# These two parameters are using PostgreSQL server version number format, which is a 6-digit integer
# format as <major:2 digit><minor:2 digit>:<release: 2 digit>.
#
# For example:
# - 90100 stands for 9.1
# - 90600 stands for 9.6
# - 100000 stands for 10.0
#
# Version compatibility range is left-inclusive right-exclusive: [min, max)
#==============================================================#
# 0110 pg
#==============================================================#
pg_primary_only:
name: pg
desc: PostgreSQL basic information (on primary)
query: |-
SELECT
extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp,
extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime,
extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time,
(('x' || lpad(split_part(pg_current_xlog_location()::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(pg_current_xlog_location()::text, '/', 2), 8, '0'))::bit(32)::bigint) AS lsn,
(('x' || lpad(split_part(pg_current_xlog_insert_location()::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(pg_current_xlog_insert_location()::text, '/', 2), 8, '0'))::bit(32)::bigint) AS insert_lsn,
(('x' || lpad(split_part(pg_current_xlog_location()::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(pg_current_xlog_location()::text, '/', 2), 8, '0'))::bit(32)::bigint) AS write_lsn,
(('x' || lpad(split_part(pg_current_xlog_location()::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(pg_current_xlog_location()::text, '/', 2), 8, '0'))::bit(32)::bigint) AS flush_lsn,
NULL::BIGINT AS receive_lsn,
NULL::BIGINT AS replay_lsn,
extract(EPOCH FROM pg_conf_load_time()) AS reload_time,
extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time,
NULL::FLOAT AS last_replay_time,
0::FLOAT AS lag,
pg_is_in_recovery() AS is_in_recovery,
FALSE AS is_wal_replay_paused;
tags: [ cluster, primary ]
ttl: 1
min_version: 90100
max_version: 100000
fatal: true
skip: false
metrics:
- timestamp: { usage: GAUGE ,description: "current database timestamp in unix epoch" }
- uptime: { usage: GAUGE ,description: "seconds since postmaster start" }
- boot_time: { usage: GAUGE ,description: "postmaster boot timestamp in unix epoch" }
- lsn: { usage: COUNTER ,description: "log sequence number, current write location" }
- insert_lsn: { usage: COUNTER ,description: "primary only, location of current wal inserting" }
- write_lsn: { usage: COUNTER ,description: "primary only, location of current wal writing" }
- flush_lsn: { usage: COUNTER ,description: "primary only, location of current wal syncing" }
- receive_lsn: { usage: COUNTER ,description: "replica only, location of wal synced to disk" }
- replay_lsn: { usage: COUNTER ,description: "replica only, location of wal applied" }
- reload_time: { usage: GAUGE ,description: "time when configuration was last reloaded" }
- conf_reload_time: { usage: GAUGE ,description: "seconds since last configuration reload" }
- last_replay_time: { usage: GAUGE ,description: "time when last transaction been replayed" }
- lag: { usage: GAUGE ,description: "replica only, replication lag in seconds" }
- is_in_recovery: { usage: GAUGE ,description: "1 if in recovery mode" }
- is_wal_replay_paused: { usage: GAUGE ,description: "1 if wal play is paused" }
pg_replica_only:
name: pg
desc: PostgreSQL basic information (on replica, 9.1+)
query: |-
SELECT
extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp,
extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime,
extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time,
(('x' || lpad(split_part(pg_last_xlog_replay_location()::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(pg_last_xlog_replay_location()::text, '/', 2), 8, '0'))::bit(32)::bigint) AS lsn,
NULL::BIGINT AS insert_lsn,
NULL::BIGINT AS write_lsn,
NULL::BIGINT AS flush_lsn,
(('x' || lpad(split_part(pg_last_xlog_receive_location()::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(pg_last_xlog_receive_location()::text, '/', 2), 8, '0'))::bit(32)::bigint) AS receive_lsn,
(('x' || lpad(split_part(pg_last_xlog_replay_location()::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(pg_last_xlog_replay_location()::text, '/', 2), 8, '0'))::bit(32)::bigint) AS replay_lsn,
extract(EPOCH FROM pg_conf_load_time()) AS reload_time,
extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time,
extract(EPOCH FROM pg_last_xact_replay_timestamp()) AS last_replay_time,
CASE WHEN pg_last_xlog_receive_location() = pg_last_xlog_replay_location() THEN 0
ELSE EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp()) END AS lag,
pg_is_in_recovery() AS is_in_recovery,
pg_is_xlog_replay_paused() AS is_wal_replay_paused;
tags: [ cluster, replica ]
ttl: 1
min_version: 90100
max_version: 100000
fatal: true
skip: false
metrics:
- timestamp: { usage: GAUGE ,description: "current database timestamp in unix epoch" }
- uptime: { usage: GAUGE ,description: "seconds since postmaster start" }
- boot_time: { usage: GAUGE ,description: "postmaster boot timestamp in unix epoch" }
- lsn: { usage: COUNTER ,description: "log sequence number, current write location" }
- insert_lsn: { usage: COUNTER ,description: "primary only, location of current wal inserting" }
- write_lsn: { usage: COUNTER ,description: "primary only, location of current wal writing" }
- flush_lsn: { usage: COUNTER ,description: "primary only, location of current wal syncing" }
- receive_lsn: { usage: COUNTER ,description: "replica only, location of wal synced to disk" }
- replay_lsn: { usage: COUNTER ,description: "replica only, location of wal applied" }
- reload_time: { usage: GAUGE ,description: "time when configuration was last reloaded" }
- conf_reload_time: { usage: GAUGE ,description: "seconds since last configuration reload" }
- last_replay_time: { usage: GAUGE ,description: "time when last transaction been replayed" }
- lag: { usage: GAUGE ,description: "replica only, replication lag in seconds" }
- is_in_recovery: { usage: GAUGE ,description: "1 if in recovery mode" }
- is_wal_replay_paused: { usage: GAUGE ,description: "1 if wal play is paused" }
#==============================================================#
# 0120 pg_meta
#==============================================================#
pg_meta_96:
name: pg_meta
desc: PostgreSQL meta info for pg 9.6 (with pg_control_system)
query: |
SELECT
(SELECT system_identifier FROM pg_control_system()) AS cluster_id,
coalesce((SELECT setting FROM pg_settings WHERE name = 'cluster_name'), 'N/A') AS cluster_name,
(SELECT setting FROM pg_settings WHERE name = 'port') AS listen_port,
(SELECT setting FROM pg_settings WHERE name = 'data_directory') AS data_dir,
(SELECT setting FROM pg_settings WHERE name = 'config_file') AS conf_path,
(SELECT setting FROM pg_settings WHERE name = 'hba_file') AS hba_path,
(SELECT setting FROM pg_settings WHERE name = 'wal_level') AS wal_level,
(SELECT setting FROM pg_settings WHERE name = 'server_encoding') AS encoding,
(SELECT setting FROM pg_settings WHERE name = 'server_version') AS version,
(SELECT setting FROM pg_settings WHERE name = 'server_version_num') AS ver_num,
version() AS ver_str,
(SELECT setting FROM pg_settings WHERE name = 'shared_preload_libraries') AS extensions,
coalesce((SELECT setting FROM pg_settings WHERE name = 'primary_conninfo'), 'N/A') AS primary_conninfo,
1 AS info;
ttl: 10
min_version: 90600
tags: [ cluster ]
metrics:
- cluster_id: { usage: LABEL ,description: "cluster system identifier" }
- cluster_name: { usage: LABEL ,description: "cluster name" }
- listen_port: { usage: LABEL ,description: "listen port" }
- data_dir: { usage: LABEL ,description: "path to data directory" }
- conf_path: { usage: LABEL ,description: "path to postgresql.conf" }
- hba_path: { usage: LABEL ,description: "path to pg_hba.conf" }
- wal_level: { usage: LABEL ,description: "wal level" }
- encoding: { usage: LABEL ,description: "server encoding" }
- version: { usage: LABEL ,description: "server version in human-readable format" }
- ver_num: { usage: LABEL ,description: "server version number in machine-readable format" }
- ver_str: { usage: LABEL ,description: "complete version string" }
- extensions: { usage: LABEL ,description: "server installed preload libraries" }
- primary_conninfo: { usage: LABEL ,description: "connection string to upstream (do not set password here)" }
- info: { usage: GAUGE ,description: "constant 1" }
pg_meta_91:
name: pg_meta
desc: PostgreSQL meta info for pg 9.1 - 9.5
query: |
SELECT
'N/A' AS cluster_id,
coalesce((SELECT setting FROM pg_settings WHERE name = 'cluster_name'), 'N/A') AS cluster_name,
(SELECT setting FROM pg_settings WHERE name = 'port') AS listen_port,
(SELECT setting FROM pg_settings WHERE name = 'data_directory') AS data_dir,
(SELECT setting FROM pg_settings WHERE name = 'config_file') AS conf_path,
(SELECT setting FROM pg_settings WHERE name = 'hba_file') AS hba_path,
(SELECT setting FROM pg_settings WHERE name = 'wal_level') AS wal_level,
(SELECT setting FROM pg_settings WHERE name = 'server_encoding') AS encoding,
(SELECT setting FROM pg_settings WHERE name = 'server_version') AS version,
(SELECT setting FROM pg_settings WHERE name = 'server_version_num') AS ver_num,
version() AS ver_str,
(SELECT setting FROM pg_settings WHERE name = 'shared_preload_libraries') AS extensions,
coalesce((SELECT setting FROM pg_settings WHERE name = 'primary_conninfo'), 'N/A') AS primary_conninfo,
1 AS info;
ttl: 10
min_version: 90100
max_version: 90600
tags: [ cluster ]
metrics:
- cluster_id: { usage: LABEL ,description: "cluster system identifier" }
- cluster_name: { usage: LABEL ,description: "cluster name" }
- listen_port: { usage: LABEL ,description: "listen port" }
- data_dir: { usage: LABEL ,description: "path to data directory" }
- conf_path: { usage: LABEL ,description: "path to postgresql.conf" }
- hba_path: { usage: LABEL ,description: "path to pg_hba.conf" }
- wal_level: { usage: LABEL ,description: "wal level" }
- encoding: { usage: LABEL ,description: "server encoding" }
- version: { usage: LABEL ,description: "server version in human-readable format" }
- ver_num: { usage: LABEL ,description: "server version number in machine-readable format" }
- ver_str: { usage: LABEL ,description: "complete version string" }
- extensions: { usage: LABEL ,description: "server installed preload libraries" }
- primary_conninfo: { usage: LABEL ,description: "connection string to upstream (do not set password here)" }
- info: { usage: GAUGE ,description: "constant 1" }
#==============================================================#
# 0130 pg_setting
#==============================================================#
# Key PostgreSQL configuration parameters for PostgreSQL 9.1 - 9.6
# Use scalar subquery on pg_settings for "missing_ok" semantics (return NULL if not exist)
pg_setting:
name: pg_setting
desc: PostgreSQL shared configuration parameters (legacy 9.1-9.6)
query: |
SELECT
(SELECT setting::int FROM pg_settings WHERE name = 'max_connections') AS max_connections,
(SELECT setting::int FROM pg_settings WHERE name = 'max_prepared_transactions') AS max_prepared_transactions,
(SELECT setting::int FROM pg_settings WHERE name = 'max_locks_per_transaction') AS max_locks_per_transaction,
(SELECT setting::int FROM pg_settings WHERE name = 'max_worker_processes') AS max_worker_processes,
(SELECT setting::int FROM pg_settings WHERE name = 'max_parallel_workers') AS max_parallel_workers,
(SELECT setting::int FROM pg_settings WHERE name = 'max_parallel_workers_per_gather') AS max_parallel_workers_per_gather,
(SELECT setting::int FROM pg_settings WHERE name = 'max_parallel_maintenance_workers') AS max_parallel_maintenance_workers,
(SELECT setting::int FROM pg_settings WHERE name = 'max_replication_slots') AS max_replication_slots,
(SELECT setting::int FROM pg_settings WHERE name = 'max_wal_senders') AS max_wal_senders,
(SELECT setting::int FROM pg_settings WHERE name = 'block_size') AS block_size,
(SELECT setting::int FROM pg_settings WHERE name = 'wal_block_size') AS wal_block_size,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'segment_size') AS segment_size,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'wal_segment_size') AS wal_segment_size,
(SELECT CASE setting WHEN 'on' THEN 1 ELSE 0 END FROM pg_settings WHERE name = 'data_checksums') AS data_checksums,
(SELECT CASE setting WHEN 'on' THEN 1 ELSE 0 END FROM pg_settings WHERE name = 'wal_log_hints') AS wal_log_hints,
(SELECT CASE setting WHEN 'on' THEN 1 ELSE 0 END FROM pg_settings WHERE name = 'fsync') AS fsync,
(SELECT CASE setting WHEN 'on' THEN 1 ELSE 0 END FROM pg_settings WHERE name = 'full_page_writes') AS full_page_writes,
(SELECT CASE setting WHEN 'minimal' THEN 1 WHEN 'archive' THEN 2 WHEN 'hot_standby' THEN 3 ELSE 0 END FROM pg_settings WHERE name = 'wal_level') AS wal_level,
(SELECT setting::int FROM pg_settings WHERE name = 'checkpoint_segments') AS checkpoint_segments,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'min_wal_size') AS min_wal_size,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'max_wal_size') AS max_wal_size,
(SELECT setting::int FROM pg_settings WHERE name = 'wal_keep_segments') AS wal_keep_segments,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'shared_buffers') AS shared_buffers,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'work_mem') AS work_mem,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'maintenance_work_mem') AS maintenance_work_mem,
(SELECT setting::bigint * CASE unit WHEN '8kB' THEN 8192 WHEN 'kB' THEN 1024 WHEN 'MB' THEN 1048576 WHEN 'GB' THEN 1073741824 ELSE 1 END
FROM pg_settings WHERE name = 'effective_cache_size') AS effective_cache_size,
(SELECT CASE setting WHEN 'off' THEN 0 WHEN 'on' THEN 1 WHEN 'always' THEN 2 ELSE -1 END FROM pg_settings WHERE name = 'archive_mode') AS archive_mode,
(SELECT CASE setting WHEN 'on' THEN 1 ELSE 0 END FROM pg_settings WHERE name = 'autovacuum') AS autovacuum,
(SELECT setting::int FROM pg_settings WHERE name = 'autovacuum_max_workers') AS autovacuum_max_workers,
(SELECT setting::int FROM pg_settings WHERE name = 'checkpoint_timeout') AS checkpoint_timeout,
(SELECT setting::float FROM pg_settings WHERE name = 'checkpoint_completion_target') AS checkpoint_completion_target,
(SELECT CASE setting WHEN 'on' THEN 1 ELSE 0 END FROM pg_settings WHERE name = 'hot_standby') AS hot_standby,
(SELECT CASE setting
WHEN 'off' THEN 0 WHEN 'local' THEN 1 WHEN 'remote_write' THEN 2
WHEN 'on' THEN 3 WHEN 'remote_apply' THEN 4 ELSE -1 END
FROM pg_settings WHERE name = 'synchronous_commit') AS synchronous_commit;
ttl: 10
min_version: 90100
tags: [ cluster ]
metrics:
- max_connections: { usage: GAUGE ,description: "maximum number of concurrent connections to the database server" }
- max_prepared_transactions: { usage: GAUGE ,description: "maximum number of transactions that can be in the prepared state simultaneously" }
- max_locks_per_transaction: { usage: GAUGE ,description: "maximum number of locks per transaction" }
- max_worker_processes: { usage: GAUGE ,description: "maximum number of background processes (9.4+)" }
- max_parallel_workers: { usage: GAUGE ,description: "maximum number of parallel workers that can be active at one time (9.6+)" }
- max_parallel_workers_per_gather: { usage: GAUGE ,description: "maximum number of parallel workers per Gather node (9.6+)" }
- max_parallel_maintenance_workers: { usage: GAUGE ,description: "maximum number of parallel maintenance workers (NULL on 9.x)" }
- max_replication_slots: { usage: GAUGE ,description: "maximum number of replication slots (9.4+)" }
- max_wal_senders: { usage: GAUGE ,description: "maximum number of concurrent WAL sender connections" }
- block_size: { usage: GAUGE ,description: "database block size in bytes (default 8192)" }
- wal_block_size: { usage: GAUGE ,description: "WAL block size in bytes" }
- segment_size: { usage: GAUGE ,description: "database file segment size in bytes" }
- wal_segment_size: { usage: GAUGE ,description: "WAL segment size in bytes" }
- data_checksums: { usage: GAUGE ,description: "data checksums enabled, 1=on 0=off (9.3+)" }
- wal_log_hints: { usage: GAUGE ,description: "WAL log hints enabled, 1=on 0=off (9.4+)" }
- fsync: { usage: GAUGE ,description: "fsync enabled (CRITICAL for data safety), 1=on 0=off" }
- full_page_writes: { usage: GAUGE ,description: "full page writes enabled, 1=on 0=off" }
- wal_level: { usage: GAUGE ,description: "WAL level, 1=minimal 2=archive 3=hot_standby" }
- checkpoint_segments: { usage: GAUGE ,description: "number of checkpoint segments (pre-9.5)" }
- min_wal_size: { usage: GAUGE ,description: "minimum WAL size in bytes (9.5+)" }
- max_wal_size: { usage: GAUGE ,description: "maximum WAL size in bytes (9.5+)" }
- wal_keep_segments: { usage: GAUGE ,description: "WAL segments kept for standby replication (pg_basebackup/streaming)" }
- shared_buffers: { usage: GAUGE ,description: "shared buffer size in bytes" }
- work_mem: { usage: GAUGE ,description: "work memory size in bytes" }
- maintenance_work_mem: { usage: GAUGE ,description: "maintenance work memory size in bytes" }
- effective_cache_size: { usage: GAUGE ,description: "planner's assumption about effective OS cache size in bytes" }
- archive_mode: { usage: GAUGE ,description: "archive mode, 0=off 1=on 2=always" }
- autovacuum: { usage: GAUGE ,description: "autovacuum enabled, 1=on 0=off" }
- autovacuum_max_workers: { usage: GAUGE ,description: "maximum number of autovacuum worker processes" }
- checkpoint_timeout: { usage: GAUGE ,description: "checkpoint timeout in seconds" }
- checkpoint_completion_target: { usage: GAUGE ,description: "checkpoint completion target (0.0-1.0)" }
- hot_standby: { usage: GAUGE ,description: "hot standby mode enabled, 1=on 0=off" }
- synchronous_commit: { usage: GAUGE ,description: "synchronous commit level, 0=off 1=local 2=remote_write 3=on 4=remote_apply" }
#==============================================================#
# 0210 pg_repl
#==============================================================#
pg_repl_94:
name: pg_repl
desc: PostgreSQL replication stat metrics 9.4 - 9.6 (with backend_xmin)
query: |-
SELECT appname, usename, address, pid, client_port, state, sync_state, sync_priority, backend_xmin, lsn,
lsn - sent_lsn AS sent_diff, lsn - write_lsn AS write_diff, lsn - flush_lsn AS flush_diff, lsn - replay_lsn AS replay_diff,
sent_lsn, write_lsn, flush_lsn, replay_lsn,
0::FLOAT AS write_lag, 0::FLOAT AS flush_lag, 0::FLOAT AS replay_lag,
extract(EPOCH FROM current_timestamp) AS "time", extract(EPOCH FROM backend_start) AS launch_time
FROM (
SELECT application_name AS appname, usename, coalesce(client_addr::TEXT,'localhost') AS address, pid::TEXT, client_port,
CASE state WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state,
CASE sync_state WHEN 'async' THEN 0 WHEN 'potential' THEN 1 WHEN 'sync' THEN 2 WHEN 'quorum' THEN 3 ELSE -1 END AS sync_state,
sync_priority, backend_xmin::TEXT::BIGINT AS backend_xmin,
(('x' || lpad(split_part(current.loc::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(current.loc::text, '/', 2), 8, '0'))::bit(32)::bigint) AS lsn,
(('x' || lpad(split_part(sent_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(sent_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS sent_lsn,
(('x' || lpad(split_part(write_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(write_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS write_lsn,
(('x' || lpad(split_part(flush_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(flush_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS flush_lsn,
(('x' || lpad(split_part(replay_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(replay_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS replay_lsn,
backend_start
FROM pg_stat_replication,
(SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_xlog_replay_location() ELSE pg_current_xlog_location() END AS loc) current
) d;
ttl: 10
min_version: 90400
max_version: 100000
tags: [ cluster ]
metrics:
- appname: { usage: LABEL ,description: "Name of the application that is connected to this WAL sender" }
- usename: { usage: LABEL ,description: "Name of the user logged into this WAL sender process" }
- address: { usage: LABEL ,description: "IP address of the client connected to this WAL sender, localhost for unix socket" }
- pid: { usage: LABEL ,description: "Process ID of the WAL sender process" }
- client_port: { usage: GAUGE ,description: "TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used" }
- state: { usage: GAUGE ,description: "Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping" }
- sync_state: { usage: GAUGE ,description: "Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum" }
- sync_priority: { usage: GAUGE ,description: "Priority of this standby server for being chosen as the synchronous standby" }
- backend_xmin: { usage: COUNTER ,description: "This standby's xmin horizon reported by hot_standby_feedback." }
- lsn: { usage: COUNTER ,description: "Current log position on this server" }
- sent_diff: { usage: GAUGE ,description: "Last log position sent to this standby server diff with current lsn" }
- write_diff: { usage: GAUGE ,description: "Last log position written to disk by this standby server diff with current lsn" }
- flush_diff: { usage: GAUGE ,description: "Last log position flushed to disk by this standby server diff with current lsn" }
- replay_diff: { usage: GAUGE ,description: "Last log position replayed into the database on this standby server diff with current lsn" }
- sent_lsn: { usage: COUNTER ,description: "Last write-ahead log location sent on this connection" }
- write_lsn: { usage: COUNTER ,description: "Last write-ahead log location written to disk by this standby server" }
- flush_lsn: { usage: COUNTER ,description: "Last write-ahead log location flushed to disk by this standby server" }
- replay_lsn: { usage: COUNTER ,description: "Last write-ahead log location replayed into the database on this standby server" }
- write_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (N/A on 9.x)" }
- flush_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (N/A on 9.x)" }
- replay_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it (N/A on 9.x)" }
- time: { usage: COUNTER ,description: "Current timestamp in unix epoch" }
- launch_time: { usage: COUNTER ,description: "Time when this process was started, i.e., when the client connected to this WAL sender" }
pg_repl_92:
name: pg_repl
desc: PostgreSQL replication stat metrics 9.2 - 9.3
query: |-
SELECT appname, usename, address, pid, client_port, state, sync_state, sync_priority, backend_xmin, lsn,
lsn - sent_lsn AS sent_diff, lsn - write_lsn AS write_diff, lsn - flush_lsn AS flush_diff, lsn - replay_lsn AS replay_diff,
sent_lsn, write_lsn, flush_lsn, replay_lsn,
0::FLOAT AS write_lag, 0::FLOAT AS flush_lag, 0::FLOAT AS replay_lag,
extract(EPOCH FROM current_timestamp) AS "time", extract(EPOCH FROM backend_start) AS launch_time
FROM (
SELECT application_name AS appname, usename, coalesce(client_addr::TEXT,'localhost') AS address, pid::TEXT, client_port,
CASE state WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state,
CASE sync_state WHEN 'async' THEN 0 WHEN 'potential' THEN 1 WHEN 'sync' THEN 2 WHEN 'quorum' THEN 3 ELSE -1 END AS sync_state,
sync_priority, 0::BIGINT AS backend_xmin,
(('x' || lpad(split_part(current.loc::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(current.loc::text, '/', 2), 8, '0'))::bit(32)::bigint) AS lsn,
(('x' || lpad(split_part(sent_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(sent_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS sent_lsn,
(('x' || lpad(split_part(write_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(write_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS write_lsn,
(('x' || lpad(split_part(flush_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(flush_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS flush_lsn,
(('x' || lpad(split_part(replay_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(replay_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS replay_lsn,
backend_start
FROM pg_stat_replication,
(SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_xlog_replay_location() ELSE pg_current_xlog_location() END AS loc) current
) d;
ttl: 10
min_version: 90200
max_version: 90400
tags: [ cluster ]
metrics:
- appname: { usage: LABEL ,description: "Name of the application that is connected to this WAL sender" }
- usename: { usage: LABEL ,description: "Name of the user logged into this WAL sender process" }
- address: { usage: LABEL ,description: "IP address of the client connected to this WAL sender, localhost for unix socket" }
- pid: { usage: LABEL ,description: "Process ID of the WAL sender process" }
- client_port: { usage: GAUGE ,description: "TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used" }
- state: { usage: GAUGE ,description: "Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping" }
- sync_state: { usage: GAUGE ,description: "Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum" }
- sync_priority: { usage: GAUGE ,description: "Priority of this standby server for being chosen as the synchronous standby" }
- backend_xmin: { usage: COUNTER ,description: "This standby's xmin horizon reported by hot_standby_feedback (N/A before 9.4)" }
- lsn: { usage: COUNTER ,description: "Current log position on this server" }
- sent_diff: { usage: GAUGE ,description: "Last log position sent to this standby server diff with current lsn" }
- write_diff: { usage: GAUGE ,description: "Last log position written to disk by this standby server diff with current lsn" }
- flush_diff: { usage: GAUGE ,description: "Last log position flushed to disk by this standby server diff with current lsn" }
- replay_diff: { usage: GAUGE ,description: "Last log position replayed into the database on this standby server diff with current lsn" }
- sent_lsn: { usage: COUNTER ,description: "Last write-ahead log location sent on this connection" }
- write_lsn: { usage: COUNTER ,description: "Last write-ahead log location written to disk by this standby server" }
- flush_lsn: { usage: COUNTER ,description: "Last write-ahead log location flushed to disk by this standby server" }
- replay_lsn: { usage: COUNTER ,description: "Last write-ahead log location replayed into the database on this standby server" }
- write_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (N/A on 9.x)" }
- flush_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (N/A on 9.x)" }
- replay_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it (N/A on 9.x)" }
- time: { usage: COUNTER ,description: "Current timestamp in unix epoch" }
- launch_time: { usage: COUNTER ,description: "Time when this process was started, i.e., when the client connected to this WAL sender" }
pg_repl_91:
name: pg_repl
desc: PostgreSQL replication stat metrics 9.1 (procpid, no state/sync columns)
query: |-
SELECT appname, usename, address, pid, client_port, state, sync_state, sync_priority, backend_xmin, lsn,
lsn - sent_lsn AS sent_diff, lsn - write_lsn AS write_diff, lsn - flush_lsn AS flush_diff, lsn - replay_lsn AS replay_diff,
sent_lsn, write_lsn, flush_lsn, replay_lsn,
0::FLOAT AS write_lag, 0::FLOAT AS flush_lag, 0::FLOAT AS replay_lag,
extract(EPOCH FROM current_timestamp) AS "time", extract(EPOCH FROM backend_start) AS launch_time
FROM (
SELECT application_name AS appname, usename, coalesce(client_addr::TEXT,'localhost') AS address, procpid::TEXT AS pid, client_port,
0::INT AS state, 0::INT AS sync_state, 0::INT AS sync_priority, 0::BIGINT AS backend_xmin,
(('x' || lpad(split_part(current.loc::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(current.loc::text, '/', 2), 8, '0'))::bit(32)::bigint) AS lsn,
(('x' || lpad(split_part(sent_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(sent_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS sent_lsn,
(('x' || lpad(split_part(write_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(write_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS write_lsn,
(('x' || lpad(split_part(flush_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(flush_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS flush_lsn,
(('x' || lpad(split_part(replay_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(replay_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS replay_lsn,
backend_start
FROM pg_stat_replication,
(SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_xlog_replay_location() ELSE pg_current_xlog_location() END AS loc) current
) d;
ttl: 10
min_version: 90100
max_version: 90200
tags: [ cluster ]
metrics:
- appname: { usage: LABEL ,description: "Name of the application that is connected to this WAL sender" }
- usename: { usage: LABEL ,description: "Name of the user logged into this WAL sender process" }
- address: { usage: LABEL ,description: "IP address of the client connected to this WAL sender, localhost for unix socket" }
- pid: { usage: LABEL ,description: "Process ID of the WAL sender process" }
- client_port: { usage: GAUGE ,description: "TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used" }
- state: { usage: GAUGE ,description: "Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping" }
- sync_state: { usage: GAUGE ,description: "Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum" }
- sync_priority: { usage: GAUGE ,description: "Priority of this standby server for being chosen as the synchronous standby" }
- backend_xmin: { usage: COUNTER ,description: "This standby's xmin horizon reported by hot_standby_feedback (N/A before 9.4)" }
- lsn: { usage: COUNTER ,description: "Current log position on this server" }
- sent_diff: { usage: GAUGE ,description: "Last log position sent to this standby server diff with current lsn" }
- write_diff: { usage: GAUGE ,description: "Last log position written to disk by this standby server diff with current lsn" }
- flush_diff: { usage: GAUGE ,description: "Last log position flushed to disk by this standby server diff with current lsn" }
- replay_diff: { usage: GAUGE ,description: "Last log position replayed into the database on this standby server diff with current lsn" }
- sent_lsn: { usage: COUNTER ,description: "Last write-ahead log location sent on this connection" }
- write_lsn: { usage: COUNTER ,description: "Last write-ahead log location written to disk by this standby server" }
- flush_lsn: { usage: COUNTER ,description: "Last write-ahead log location flushed to disk by this standby server" }
- replay_lsn: { usage: COUNTER ,description: "Last write-ahead log location replayed into the database on this standby server" }
- write_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (N/A on 9.x)" }
- flush_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (N/A on 9.x)" }
- replay_lag: { usage: GAUGE ,description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it (N/A on 9.x)" }
- time: { usage: COUNTER ,description: "Current timestamp in unix epoch" }
- launch_time: { usage: COUNTER ,description: "Time when this process was started, i.e., when the client connected to this WAL sender" }
#==============================================================#
# 0220 pg_sync_standby
#==============================================================#
pg_sync_standby:
name: pg_sync_standby
desc: PostgreSQL synchronous standby status and names
query: |-
SELECT
CASE WHEN names <> '' THEN names ELSE '<null>' END AS names,
CASE WHEN names <> '' THEN 1 ELSE 0 END AS enabled
FROM (SELECT current_setting('synchronous_standby_names') AS names) n;
ttl: 10
min_version: 90100
tags: [ cluster ]
metrics:
- names: { usage: LABEL ,description: "List of standby servers that can support synchronous replication" }
- enabled: { usage: GAUGE ,description: "Synchronous commit enabled, 1 if enabled, 0 if disabled" }
#==============================================================#
# 0230 pg_downstream
#==============================================================#
pg_downstream:
name: pg_downstream
desc: PostgreSQL replication client count (no state column on 9.1)
query: |-
SELECT 'connected' AS state, count(*) AS count FROM pg_stat_replication;
ttl: 10
min_version: 90100
max_version: 90200
tags: [ cluster ]
metrics:
- state: { usage: LABEL ,description: "Replication client state" }
- count: { usage: GAUGE ,description: "Count of replication clients by state" }
pg_downstream_92:
name: pg_downstream
desc: PostgreSQL replication client count (group by state)
query: |-
SELECT state, count(*) AS count FROM pg_stat_replication GROUP BY state;
ttl: 10
min_version: 90200
tags: [ cluster ]
metrics:
- state: { usage: LABEL ,description: "Replication client state" }
- count: { usage: GAUGE ,description: "Count of replication clients by state" }
#==============================================================#
# 0240 pg_slot
#==============================================================#
pg_slot_96:
name: pg_slot
desc: PostgreSQL replication slot metrics 9.6 (with active_pid, confirmed_flush_lsn)
query: |-
SELECT slot_name, slot_type, plugin, database AS datname, datoid, active_pid,
active, FALSE AS temporary,
xmin::TEXT::BIGINT AS xmin, catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn, confirm_lsn, current_lsn - restart_lsn AS retained_bytes
FROM (
SELECT slot_name, slot_type, plugin, database, datoid, active_pid, active, xmin, catalog_xmin,
(('x' || lpad(split_part(restart_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(restart_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS restart_lsn,
(('x' || lpad(split_part(confirmed_flush_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(confirmed_flush_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS confirm_lsn,
(('x' || lpad(split_part(current.loc::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(current.loc::text, '/', 2), 8, '0'))::bit(32)::bigint) AS current_lsn
FROM pg_replication_slots,
(SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_xlog_replay_location() ELSE pg_current_xlog_location() END AS loc) current
) d;
ttl: 10
min_version: 90600
max_version: 100000
tags: [ cluster, primary ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "The process ID of the session streaming data for this slot. NULL if inactive." }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot (N/A on 9.x, always 0)" }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "The address (LSN) up to which the logical slot's consumer has confirmed receiving data." }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }
pg_slot_95:
name: pg_slot
desc: PostgreSQL replication slot metrics 9.5 (no confirmed_flush_lsn)
query: |-
SELECT slot_name, slot_type, plugin, database AS datname, datoid, active_pid,
active, FALSE AS temporary,
xmin::TEXT::BIGINT AS xmin, catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn, confirm_lsn, current_lsn - restart_lsn AS retained_bytes
FROM (
SELECT slot_name, slot_type, plugin, database, datoid, active_pid, active, xmin, catalog_xmin,
(('x' || lpad(split_part(restart_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(restart_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS restart_lsn,
0::BIGINT AS confirm_lsn,
(('x' || lpad(split_part(current.loc::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(current.loc::text, '/', 2), 8, '0'))::bit(32)::bigint) AS current_lsn
FROM pg_replication_slots,
(SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_xlog_replay_location() ELSE pg_current_xlog_location() END AS loc) current
) d;
ttl: 10
min_version: 90500
max_version: 90600
tags: [ cluster, primary ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "The process ID of the session streaming data for this slot. NULL if inactive." }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot (N/A on 9.x, always 0)" }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "Confirmed flush lsn (N/A before 9.6, always 0)" }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }
pg_slot_94:
name: pg_slot
desc: PostgreSQL replication slot metrics 9.4 (no active_pid, confirmed_flush_lsn)
query: |-
SELECT slot_name, slot_type, plugin, database AS datname, datoid, active_pid,
active, FALSE AS temporary,
xmin::TEXT::BIGINT AS xmin, catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn, confirm_lsn, current_lsn - restart_lsn AS retained_bytes
FROM (
SELECT slot_name, slot_type, plugin, database, datoid, NULL::INT AS active_pid, active, xmin, catalog_xmin,
(('x' || lpad(split_part(restart_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(restart_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS restart_lsn,
0::BIGINT AS confirm_lsn,
(('x' || lpad(split_part(current.loc::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(current.loc::text, '/', 2), 8, '0'))::bit(32)::bigint) AS current_lsn
FROM pg_replication_slots,
(SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_xlog_replay_location() ELSE pg_current_xlog_location() END AS loc) current
) d;
ttl: 10
min_version: 90400
max_version: 90500
tags: [ cluster, primary ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "Process ID is not available before 9.5 (NULL)" }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot (N/A on 9.x, always 0)" }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "Confirmed flush lsn (N/A before 9.6, always 0)" }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }
#==============================================================#
# 0250 pg_recv
#==============================================================#
pg_recv_96:
name: pg_recv
desc: PostgreSQL walreceiver metrics (9.6 - 12)
query: |-
SELECT
(regexp_match(conninfo, '.*host=(\S+).*'))[1] AS sender_host,
(regexp_match(conninfo, '.*port=(\S+).*'))[1] AS sender_port,
coalesce(slot_name, 'NULL') AS slot_name,
pid,
CASE status WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state,
(('x' || lpad(split_part(receive_start_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(receive_start_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS init_lsn,
receive_start_tli AS init_tli,
(('x' || lpad(split_part(received_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(received_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS flush_lsn,
received_tli AS flush_tli,
(('x' || lpad(split_part(latest_end_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(latest_end_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS reported_lsn,
last_msg_send_time AS msg_send_time,
last_msg_receipt_time AS msg_recv_time,
latest_end_time AS reported_time,
now() AS time
FROM pg_stat_wal_receiver;
ttl: 10
min_version: 90600
max_version: 130000
tags: [ cluster, replica ]
metrics:
- sender_host: { usage: LABEL ,description: "Host of the PostgreSQL instance this WAL receiver is connected to" }
- sender_port: { usage: LABEL ,description: "Port number of the PostgreSQL instance this WAL receiver is connected to." }
- slot_name: { usage: LABEL ,description: "Replication slot name used by this WAL receiver" }
- pid: { usage: GAUGE ,description: "Process ID of the WAL receiver process" }
- state: { usage: GAUGE ,description: "Encoded activity status of the WAL receiver process 0-4 for streaming|startup|catchup|backup|stopping" }
- init_lsn: { usage: COUNTER ,description: "First write-ahead log location used when WAL receiver is started" }
- init_tli: { usage: COUNTER ,description: "First timeline number used when WAL receiver is started" }
- flush_lsn: { usage: COUNTER ,description: "Last write-ahead log location already received and flushed to disk" }
- flush_tli: { usage: COUNTER ,description: "Timeline number of last write-ahead log location received and flushed to disk" }
- reported_lsn: { usage: COUNTER ,description: "Last write-ahead log location reported to origin WAL sender" }
- msg_send_time: { usage: GAUGE ,description: "Send time of last message received from origin WAL sender" }
- msg_recv_time: { usage: GAUGE ,description: "Receipt time of last message received from origin WAL sender" }
- reported_time: { usage: GAUGE ,description: "Time of last write-ahead log location reported to origin WAL sender" }
- time: { usage: GAUGE ,description: "Time of current snapshot" }
#==============================================================#
# 0270 pg_origin
#==============================================================#
# skip by default, require additional privilege setup
# GRANT SELECT ON pg_replication_origin, pg_replication_origin_status TO pg_monitor;
pg_origin:
name: pg_origin
desc: PostgreSQL replay state (approximate) for a certain origin
query: |-
SELECT roname,
(('x' || lpad(split_part(remote_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(remote_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS remote_lsn,
(('x' || lpad(split_part(local_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(local_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS local_lsn
FROM pg_replication_origin o LEFT JOIN pg_replication_origin_status os ON o.roident = os.local_id;
ttl: 10
min_version: 90500
skip: true
tags: [ cluster ]
metrics:
- roname: { usage: LABEL ,description: "The external, user defined, name of a replication origin." }
- remote_lsn: { usage: COUNTER ,description: "The origin node's LSN up to which data has been replicated." }
- local_lsn: { usage: COUNTER ,description: "This node's LSN at which remote_lsn has been replicated." }
#==============================================================#
# 0310 pg_size
#==============================================================#
pg_size:
name: pg_size
desc: PostgreSQL database size (legacy 9.1-9.6)
query: |-
SELECT datname, pg_database_size(oid) AS bytes FROM pg_database;
ttl: 60
timeout: 1
min_version: 90100
tags: [ cluster ]
metrics:
- datname: { usage: LABEL ,description: "Database name" }
- bytes: { usage: GAUGE ,description: "Database size in bytes" }
#==============================================================#
# 0320 pg_archiver
#==============================================================#
pg_archiver:
name: pg_archiver
desc: PostgreSQL archiver process statistics
query: |-
SELECT archived_count AS finish_count,failed_count,
extract(epoch FROM last_archived_time) AS finish_time,
extract(epoch FROM last_failed_time) AS failed_time,
extract(epoch FROM stats_reset) AS reset_time
FROM pg_stat_archiver;
ttl: 60
min_version: 90400
tags: [ cluster ]
metrics:
- finish_count: { usage: COUNTER ,description: "Number of WAL files that have been successfully archived" }
- failed_count: { usage: COUNTER ,description: "Number of failed attempts for archiving WAL files" }
- finish_time: { usage: GAUGE ,description: "Time of the last successful archive operation" }
- failed_time: { usage: GAUGE ,description: "Time of the last failed archival operation" }
- reset_time: { usage: GAUGE ,description: "Time at which archive statistics were last reset" }
#==============================================================#
# 0330 pg_bgwriter
#==============================================================#
# https://pgpedia.info/p/pg_stat_bgwriter.html
pg_bgwriter_94:
name: pg_bgwriter
desc: "PostgreSQL background writer metrics (PG 9.4-16)"
query: SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, buffers_backend, maxwritten_clean, buffers_backend_fsync, buffers_alloc, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_bgwriter;
ttl: 10
min_version: 90400
max_version: 170000
tags: [ cluster ]
metrics:
- checkpoints_timed: { usage: COUNTER ,description: "Number of scheduled checkpoints that have been performed" }
- checkpoints_req: { usage: COUNTER ,description: "Number of requested checkpoints that have been performed" }
- checkpoint_write_time: { usage: COUNTER ,scale: 1e-3 ,description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in seconds" }
- checkpoint_sync_time: { usage: COUNTER ,scale: 1e-3 ,description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in seconds" }
- buffers_checkpoint: { usage: COUNTER ,description: "Number of buffers written during checkpoints" }
- buffers_clean: { usage: COUNTER ,description: "Number of buffers written by the background writer" }
- buffers_backend: { usage: COUNTER ,description: "Number of buffers written directly by a backend" }
- maxwritten_clean: { usage: COUNTER ,description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" }
- buffers_backend_fsync: { usage: COUNTER ,description: "Number of times a backend had to execute its own fsync call" }
- buffers_alloc: { usage: COUNTER ,description: "Number of buffers allocated" }
- reset_time: { usage: GAUGE ,description: "Time at which bgwriter statistics were last reset" }
pg_bgwriter_91:
name: pg_bgwriter
desc: "PostgreSQL background writer metrics (PG 9.1-9.3)"
query: SELECT checkpoints_timed, checkpoints_req, 0::BIGINT AS checkpoint_write_time, 0::BIGINT AS checkpoint_sync_time, buffers_checkpoint, buffers_clean, buffers_backend, maxwritten_clean, buffers_backend_fsync, buffers_alloc, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_bgwriter;
ttl: 10
min_version: 90100
max_version: 90400
tags: [ cluster ]
metrics:
- checkpoints_timed: { usage: COUNTER ,description: "Number of scheduled checkpoints that have been performed" }
- checkpoints_req: { usage: COUNTER ,description: "Number of requested checkpoints that have been performed" }
- checkpoint_write_time: { usage: COUNTER ,scale: 1e-3 ,description: "Total amount of time spent writing checkpoint files, in seconds (N/A on 9.1-9.3, always 0)" }
- checkpoint_sync_time: { usage: COUNTER ,scale: 1e-3 ,description: "Total amount of time spent syncing checkpoint files, in seconds (N/A on 9.1-9.3, always 0)" }
- buffers_checkpoint: { usage: COUNTER ,description: "Number of buffers written during checkpoints" }
- buffers_clean: { usage: COUNTER ,description: "Number of buffers written by the background writer" }
- buffers_backend: { usage: COUNTER ,description: "Number of buffers written directly by a backend" }
- maxwritten_clean: { usage: COUNTER ,description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" }
- buffers_backend_fsync: { usage: COUNTER ,description: "Number of times a backend had to execute its own fsync call" }
- buffers_alloc: { usage: COUNTER ,description: "Number of buffers allocated" }
- reset_time: { usage: GAUGE ,description: "Time at which bgwriter statistics were last reset" }
#==============================================================#
# 0331 pg_checkpointer
#==============================================================#
pg_checkpointer_94:
name: pg_checkpointer
desc: "PostgreSQL checkpointer stat metrics for pg 9.4-16"
query: SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_bgwriter;
ttl: 10
min_version: 90400
max_version: 170000
tags: [ cluster ]
metrics:
- checkpoints_timed: { usage: COUNTER ,rename: timed ,description: "Number of scheduled checkpoints that have been performed" }
- checkpoints_req: { usage: COUNTER ,rename: req ,description: "Number of requested checkpoints that have been performed" }
- checkpoint_write_time: { usage: COUNTER ,rename: write_time ,scale: 1e-3 ,description: "Total amount of time that has been spent writing checkpoint files, in seconds" }
- checkpoint_sync_time: { usage: COUNTER ,rename: sync_time ,scale: 1e-3 ,description: "Total amount of time that has been spent synchronizing checkpoint files to disk, in seconds" }
- buffers_checkpoint: { usage: COUNTER ,rename: buffers_written ,description: "Number of buffers written during checkpoints and restartpoints" }
- reset_time: { usage: GAUGE ,description: "Time at which checkpointer statistics were last reset" }
pg_checkpointer_91:
name: pg_checkpointer
desc: "PostgreSQL checkpointer stat metrics for pg 9.1-9.3"
query: SELECT checkpoints_timed, checkpoints_req, 0::BIGINT AS checkpoint_write_time, 0::BIGINT AS checkpoint_sync_time, buffers_checkpoint, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_bgwriter;
ttl: 10
min_version: 90100
max_version: 90400
tags: [ cluster ]
metrics:
- checkpoints_timed: { usage: COUNTER ,rename: timed ,description: "Number of scheduled checkpoints that have been performed" }
- checkpoints_req: { usage: COUNTER ,rename: req ,description: "Number of requested checkpoints that have been performed" }
- checkpoint_write_time: { usage: COUNTER ,rename: write_time ,scale: 1e-3 ,description: "Total amount of time that has been spent writing checkpoint files, in seconds (N/A on 9.1-9.3, always 0)" }
- checkpoint_sync_time: { usage: COUNTER ,rename: sync_time ,scale: 1e-3 ,description: "Total amount of time that has been spent synchronizing checkpoint files to disk, in seconds (N/A on 9.1-9.3, always 0)" }
- buffers_checkpoint: { usage: COUNTER ,rename: buffers_written ,description: "Number of buffers written during checkpoints and restartpoints" }
- reset_time: { usage: GAUGE ,description: "Time at which checkpointer statistics were last reset" }
#==============================================================#
# 0340 pg_ssl
#==============================================================#
pg_ssl:
name: pg_ssl
desc: PostgreSQL SSL client connection count
query: |
SELECT count(*) FILTER (WHERE ssl) AS enabled, count(*) FILTER ( WHERE NOT ssl) AS disabled FROM pg_stat_ssl;
ttl: 10
min_version: 90500
tags: [ cluster ]
metrics:
- enabled: { usage: GAUGE ,description: "Number of client connection that use ssl" }
- disabled: { usage: GAUGE ,description: "Number of client connection that does not use ssl" }
#==============================================================#
# 0350 pg_checkpoint
#==============================================================#
pg_checkpoint:
name: pg_checkpoint
desc: checkpoint information from pg_control_checkpoint (9.6)
query: |-
SELECT
(('x' || lpad(split_part(checkpoint_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(checkpoint_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS checkpoint_lsn,
(('x' || lpad(split_part(redo_location::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(redo_location::text, '/', 2), 8, '0'))::bit(32)::bigint) AS redo_lsn,
timeline_id AS tli,
prev_timeline_id AS prev_tli,
full_page_writes,
split_part(next_xid, ':', 1) AS next_xid_epoch,
split_part(next_xid, ':', 2) AS next_xid,
next_oid::BIGINT,
next_multixact_id::text::BIGINT,
next_multi_offset::text::BIGINT,
oldest_xid::text::BIGINT,
oldest_xid_dbid::text::BIGINT,
oldest_active_xid::text::BIGINT,
oldest_multi_xid::text::BIGINT,
oldest_multi_dbid::BIGINT,
oldest_commit_ts_xid::text::BIGINT,
newest_commit_ts_xid::text::BIGINT,
checkpoint_time AS time,
extract(epoch from now() - checkpoint_time) AS elapse
FROM pg_control_checkpoint();
ttl: 60
min_version: 90600
tags: [ cluster ]
metrics:
- checkpoint_lsn: { usage: COUNTER ,description: "Latest checkpoint location" }
- redo_lsn: { usage: COUNTER ,description: "Latest checkpoint's REDO location" }
- tli: { usage: COUNTER ,description: "Latest checkpoint's TimeLineID" }
- prev_tli: { usage: COUNTER ,description: "Latest checkpoint's PrevTimeLineID" }
- full_page_writes: { usage: GAUGE ,description: "Latest checkpoint's full_page_writes enabled" }
- next_xid_epoch: { usage: COUNTER ,description: "Latest checkpoint's NextXID epoch" }
- next_xid: { usage: COUNTER ,description: "Latest checkpoint's NextXID xid" }
- next_oid: { usage: COUNTER ,description: "Latest checkpoint's NextOID" }
- next_multixact_id: { usage: COUNTER ,description: "Latest checkpoint's NextMultiXactId" }
- next_multi_offset: { usage: COUNTER ,description: "Latest checkpoint's NextMultiOffset" }
- oldest_xid: { usage: COUNTER ,description: "Latest checkpoint's oldestXID" }
- oldest_xid_dbid: { usage: GAUGE ,description: "Latest checkpoint's oldestXID's DB OID" }
- oldest_active_xid: { usage: COUNTER ,description: "Latest checkpoint's oldestActiveXID" }
- oldest_multi_xid: { usage: COUNTER ,description: "Latest checkpoint's oldestMultiXid" }
- oldest_multi_dbid: { usage: GAUGE ,description: "Latest checkpoint's oldestMulti's DB OID" }
- oldest_commit_ts_xid: { usage: COUNTER ,description: "Latest checkpoint's oldestCommitTsXid" }
- newest_commit_ts_xid: { usage: COUNTER ,description: "Latest checkpoint's newestCommitTsXid" }
- time: { usage: COUNTER ,description: "Time of latest checkpoint" }
- elapse: { usage: GAUGE ,description: "Seconds elapsed since latest checkpoint in seconds" }
#==============================================================#
# 0355 pg_timeline
#==============================================================#
pg_timeline:
name: pg_timeline
desc: Current timeline ID from primary or replica
query: |
SELECT COALESCE(
(SELECT received_tli FROM pg_stat_wal_receiver),
(SELECT timeline_id FROM pg_control_checkpoint())
) AS id;
ttl: 10
min_version: 90600
tags: [ cluster ]
metrics:
- id: { usage: GAUGE ,description: "Current timeline ID" }
#==============================================================#
# 0360 pg_recovery
#==============================================================#
pg_recovery:
name: pg_recovery
desc: PostgreSQL control recovery metrics (9.6)
query: |
SELECT min_recovery_end_timeline AS min_timeline,
(('x' || lpad(split_part(min_recovery_end_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(min_recovery_end_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS min_lsn,
(('x' || lpad(split_part(backup_start_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(backup_start_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS backup_start_lsn,
(('x' || lpad(split_part(backup_end_lsn::text, '/', 1), 8, '0'))::bit(32)::bigint * 4294967296
+ ('x' || lpad(split_part(backup_end_lsn::text, '/', 2), 8, '0'))::bit(32)::bigint) AS backup_end_lsn,
end_of_backup_record_required AS require_record
FROM pg_control_recovery();
ttl: 10
min_version: 90600
tags: [ cluster, replica ]
metrics:
- min_timeline: { usage: COUNTER ,description: "Min recovery ending loc's timeline" }
- min_lsn: { usage: COUNTER ,description: "Minimum recovery ending location" }
- backup_start_lsn: { usage: COUNTER ,description: "Backup start location" }
- backup_end_lsn: { usage: COUNTER ,description: "Backup end location" }
- require_record: { usage: GAUGE ,description: "End-of-backup record required" }
#==============================================================#
# 0410 pg_activity
#==============================================================#
pg_activity_92:
name: pg_activity
desc: PostgreSQL backend activity group by database and state (9.2+)
query: |-
SELECT datname, state, coalesce(count, 0) AS count, coalesce(max_duration, 0) AS max_duration, coalesce(max_tx_duration, 0) AS max_tx_duration, coalesce(max_conn_duration, 0) AS max_conn_duration FROM
(SELECT d.datname, a.state FROM pg_database d, unnest(ARRAY ['active','idle','idle in transaction','idle in transaction (aborted)','fastpath function call','disabled']) a(state) WHERE d.datallowconn AND NOT d.datistemplate) base
LEFT JOIN (SELECT datname, state, count(*) AS count, max(extract(epoch from now() - state_change)) AS max_duration, max(extract(epoch from now() - xact_start))
AS max_tx_duration, max(extract(epoch from now() - backend_start)) AS max_conn_duration FROM pg_stat_activity WHERE pid <> pg_backend_pid() GROUP BY 1,2) data USING (datname,state);
ttl: 10
min_version: 90200
tags: [ cluster ]
metrics:
- datname: { usage: LABEL ,description: "Name of the database this backend is connected to" }
- state: { usage: LABEL ,description: "Current overall state of this backend." }
- count: { usage: GAUGE ,description: "Count of connection among (datname,state)" }
- max_duration: { usage: GAUGE ,description: "Max duration since last state change among (datname, state)" }
- max_tx_duration: { usage: GAUGE ,description: "Max transaction duration since state change among (datname, state)" }
- max_conn_duration: { usage: GAUGE ,description: "Max backend session duration since state change among (datname, state)" }