-
Notifications
You must be signed in to change notification settings - Fork 56
Expand file tree
/
Copy path0240-pg_slot.yml
More file actions
201 lines (190 loc) · 19.2 KB
/
0240-pg_slot.yml
File metadata and controls
201 lines (190 loc) · 19.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#==============================================================#
# 0240 pg_slot
#==============================================================#
pg_slot_17:
name: pg_slot
desc: PostgreSQL replication slot metrics v17, slot also exists on standby
query: |-
SELECT s.slot_name, s.slot_type, plugin, database AS datname,datoid,active_pid,
active,temporary,two_phase,conflicting,failover,synced,
xmin::TEXT::BIGINT AS xmin,catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn - '0/0' AS restart_lsn, confirmed_flush_lsn - '0/0' AS confirm_lsn,
CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - restart_lsn AS retained_bytes,
safe_wal_size, CASE wal_status WHEN 'reserved' THEN 0 WHEN 'extended' THEN 1 WHEN 'unreserved' THEN 2 WHEN 'lost' THEN 3 ELSE -1 END AS wal_status,
spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,total_txns,total_bytes,extract(EPOCH FROM stats_reset) AS reset_time,
extract(EPOCH FROM inactive_since) AS inactive_since, CASE invalidation_reason WHEN 'wal_removed' THEN 1 WHEN 'rows_removed' THEN 2 WHEN 'wal_level_insufficient' THEN 3 ELSE 0 END AS invalidation_reason
FROM pg_replication_slots s LEFT OUTER JOIN pg_stat_replication_slots ss ON s.slot_name = ss.slot_name;
ttl: 10
min_version: 170000
tags: [ cluster ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "The process ID of the session streaming data for this slot. NULL if inactive." }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot." }
- two_phase: { usage: GAUGE ,description: "True(1) if the slot is enabled for decoding prepared transactions. Always false for physical slots." }
- conflicting: { usage: GAUGE ,description: "True(1) if this logical slot conflicted with recovery. Always NULL for physical slots." }
- failover: { usage: GAUGE ,description: "True(1) if this is a logical slot enabled to be synced to the standbys" }
- synced: { usage: GAUGE ,description: "True(1) if this is a logical slot that was synced from a primary server" }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "The address (LSN) up to which the logical slot's consumer has confirmed receiving data." }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }
- safe_wal_size: { usage: GAUGE ,description: "bytes that can be written to WAL which will not make slot into lost" }
- wal_status: { usage: GAUGE ,description: "WAL reserve status 0-3 means reserved,extended,unreserved,lost, -1 means other" }
- spill_txns: { usage: COUNTER ,description: "Xacts that spilled to disk due to logical decode mem exceeding (subtrans included)" }
- spill_count: { usage: COUNTER ,description: "Xacts that spilled to disk due to logical decode mem exceeding" }
- spill_bytes: { usage: COUNTER ,description: "Bytes that spilled to disk due to logical decode mem exceeding" }
- stream_txns: { usage: COUNTER ,description: "Xacts that streamed to decoding output plugin after mem exceed" }
- stream_count: { usage: COUNTER ,description: "Xacts that streamed to decoding output plugin after mem exceed" }
- stream_bytes: { usage: COUNTER ,description: "Bytes that streamed to decoding output plugin after mem exceed" }
- total_txns: { usage: COUNTER ,description: "Number of decoded xacts sent to the decoding output plugin for this slot" }
- total_bytes: { usage: COUNTER ,description: "Number of decoded bytes sent to the decoding output plugin for this slot" }
- reset_time: { usage: GAUGE ,description: "When statistics were last reset" }
- invalidation_reason: { usage: GAUGE ,description: "ok=0, wal_removed=1, rows_removed=2, wal_level_insufficient=3" }
- inactive_since: { usage: GAUGE ,description: "The time when the slot became inactive" }
pg_slot_16:
name: pg_slot
desc: PostgreSQL replication slot metrics v16 with conflicting, now slot also exists on standby
query: |-
SELECT s.slot_name, s.slot_type, plugin, database AS datname,datoid,active_pid,
active,temporary,two_phase,conflicting,xmin::TEXT::BIGINT AS xmin,catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn - '0/0' AS restart_lsn, confirmed_flush_lsn - '0/0' AS confirm_lsn,
CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - restart_lsn AS retained_bytes,
safe_wal_size, CASE wal_status WHEN 'reserved' THEN 0 WHEN 'extended' THEN 1 WHEN 'unreserved' THEN 2 WHEN 'lost' THEN 3 ELSE -1 END AS wal_status,
spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,total_txns,total_bytes,extract(EPOCH FROM stats_reset) AS reset_time
FROM pg_replication_slots s LEFT OUTER JOIN pg_stat_replication_slots ss ON s.slot_name = ss.slot_name;
ttl: 10
min_version: 160000
max_version: 170000
tags: [ cluster ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "The process ID of the session streaming data for this slot. NULL if inactive." }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot." }
- two_phase: { usage: GAUGE ,description: "True(1) if the slot is enabled for decoding prepared transactions. Always false for physical slots." }
- conflicting: { usage: GAUGE ,description: "True if this logical slot conflicted with recovery. Always NULL for physical slots." }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "The address (LSN) up to which the logical slot's consumer has confirmed receiving data." }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }
- safe_wal_size: { usage: GAUGE ,description: "bytes that can be written to WAL which will not make slot into lost" }
- wal_status: { usage: GAUGE ,description: "WAL reserve status 0-3 means reserved,extended,unreserved,lost, -1 means other" }
- spill_txns: { usage: COUNTER ,description: "Xacts that spilled to disk due to logical decode mem exceeding (subtrans included)" }
- spill_count: { usage: COUNTER ,description: "Xacts that spilled to disk due to logical decode mem exceeding" }
- spill_bytes: { usage: COUNTER ,description: "Bytes that spilled to disk due to logical decode mem exceeding" }
- stream_txns: { usage: COUNTER ,description: "Xacts that streamed to decoding output plugin after mem exceed" }
- stream_count: { usage: COUNTER ,description: "Xacts that streamed to decoding output plugin after mem exceed" }
- stream_bytes: { usage: COUNTER ,description: "Bytes that streamed to decoding output plugin after mem exceed" }
- total_txns: { usage: COUNTER ,description: "Number of decoded xacts sent to the decoding output plugin for this slot" }
- total_bytes: { usage: COUNTER ,description: "Number of decoded bytes sent to the decoding output plugin for this slot" }
- reset_time: { usage: GAUGE ,description: "When statistics were last reset" }
pg_slot_14:
name: pg_slot
desc: PostgreSQL replication slot metrics v14 with pg_stat_replication_slots metrics
query: |-
SELECT s.slot_name, s.slot_type, plugin, database AS datname,datoid,active_pid,
active,temporary,two_phase,xmin::TEXT::BIGINT AS xmin,catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn - '0/0' AS restart_lsn, confirmed_flush_lsn - '0/0' AS confirm_lsn,
CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - restart_lsn AS retained_bytes,
safe_wal_size, CASE wal_status WHEN 'reserved' THEN 0 WHEN 'extended' THEN 1 WHEN 'unreserved' THEN 2 WHEN 'lost' THEN 3 ELSE -1 END AS wal_status,
spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,total_txns,total_bytes,extract(EPOCH FROM stats_reset) AS reset_time
FROM pg_replication_slots s LEFT OUTER JOIN pg_stat_replication_slots ss ON s.slot_name = ss.slot_name;
ttl: 10
min_version: 140000
max_version: 160000
tags: [ cluster, primary ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "The process ID of the session streaming data for this slot. NULL if inactive." }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot." }
- two_phase: { usage: GAUGE ,description: "True(1) if the slot is enabled for decoding prepared transactions. Always false for physical slots." }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "The address (LSN) up to which the logical slot's consumer has confirmed receiving data." }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }
- safe_wal_size: { usage: GAUGE ,description: "bytes that can be written to WAL which will not make slot into lost" }
- wal_status: { usage: GAUGE ,description: "WAL reserve status 0-3 means reserved,extended,unreserved,lost, -1 means other" }
- spill_txns: { usage: COUNTER ,description: "Xacts that spilled to disk due to logical decode mem exceeding (subtrans included)" }
- spill_count: { usage: COUNTER ,description: "Xacts that spilled to disk due to logical decode mem exceeding" }
- spill_bytes: { usage: COUNTER ,description: "Bytes that spilled to disk due to logical decode mem exceeding" }
- stream_txns: { usage: COUNTER ,description: "Xacts that streamed to decoding output plugin after mem exceed" }
- stream_count: { usage: COUNTER ,description: "Xacts that streamed to decoding output plugin after mem exceed" }
- stream_bytes: { usage: COUNTER ,description: "Bytes that streamed to decoding output plugin after mem exceed" }
- total_txns: { usage: COUNTER ,description: "Number of decoded xacts sent to the decoding output plugin for this slot" }
- total_bytes: { usage: COUNTER ,description: "Number of decoded bytes sent to the decoding output plugin for this slot" }
- reset_time: { usage: GAUGE ,description: "When statistics were last reset" }
pg_slot_13:
name: pg_slot
desc: PostgreSQL replication slot metrics v13 (wal safe size and status)
query: |-
SELECT slot_name, slot_type, plugin, database AS datname,datoid,active_pid,
active,temporary,xmin::TEXT::BIGINT AS xmin,catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn - '0/0' AS restart_lsn, confirmed_flush_lsn - '0/0' AS confirm_lsn,
CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - restart_lsn AS retained_bytes,
safe_wal_size, CASE wal_status WHEN 'reserved' THEN 0 WHEN 'extended' THEN 1 WHEN 'unreserved' THEN 2 WHEN 'lost' THEN 3 ELSE -1 END AS wal_status
FROM pg_replication_slots;
ttl: 10
min_version: 130000
max_version: 140000
tags: [ cluster, primary ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "The process ID of the session streaming data for this slot. NULL if inactive." }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot." }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "The address (LSN) up to which the logical slot's consumer has confirmed receiving data." }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }
- safe_wal_size: { usage: GAUGE ,description: "bytes that can be written to WAL which will not make slot into lost" }
- wal_status: { usage: GAUGE ,description: "WAL reserve status 0-3 means reserved,extended,unreserved,lost, -1 means other" }
pg_slot_10:
name: pg_slot
desc: PostgreSQL replication slot metrics 10 ~ 12
query: |-
SELECT slot_name, slot_type, plugin, database AS datname,datoid,active_pid,
active,temporary,xmin::TEXT::BIGINT AS xmin,catalog_xmin::TEXT::BIGINT AS catalog_xmin,
restart_lsn - '0/0' AS restart_lsn, confirmed_flush_lsn - '0/0' AS confirm_lsn,
CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - restart_lsn AS retained_bytes
FROM pg_replication_slots;
ttl: 10
min_version: 100000
max_version: 130000
tags: [ cluster, primary ]
metrics:
- slot_name: { usage: LABEL ,description: "A unique, cluster-wide identifier for the replication slot" }
- slot_type: { usage: LABEL ,description: "The slot type, physical or logical" }
- plugin: { usage: LABEL ,description: "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots." }
- datname: { usage: LABEL ,description: "The name of the database this slot is associated with, logical slots only, null for physical slot" }
- datoid: { usage: GAUGE ,description: "The OID of the database this slot is associated with, logical slots only, null for physical slot" }
- active_pid: { usage: GAUGE ,description: "The process ID of the session streaming data for this slot. NULL if inactive." }
- active: { usage: GAUGE ,description: "True(1) if this slot is currently actively being used" }
- temporary: { usage: GAUGE ,description: "True(1) if this is a temporary replication slot." }
- xmin: { usage: COUNTER ,description: "The oldest transaction that this slot needs the database to retain." }
- catalog_xmin: { usage: COUNTER ,description: "The oldest transaction affecting the system catalogs that this slot needs the database to retain." }
- restart_lsn: { usage: COUNTER ,description: "The address (LSN) of oldest WAL which still might be required by the consumer of this slot" }
- confirm_lsn: { usage: COUNTER ,description: "The address (LSN) up to which the logical slot's consumer has confirmed receiving data." }
- retained_bytes: { usage: GAUGE ,description: "Size of bytes that retained for this slot" }