Skip to content

Commit 5f86443

Browse files
committed
feat (pg): optimize graph schema for larger datasets
1 parent 4b82ee3 commit 5f86443

5 files changed

Lines changed: 30 additions & 30 deletions

File tree

drivers/pg/query/definitions.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@ import "regexp"
44

55
var (
66
pgPropertyIndexRegex = regexp.MustCompile(`(?i)^create\s+(unique)?(?:\s+)?index\s+([^ ]+)\s+on\s+\S+\s+using\s+([^ ]+)\s+\(+properties\s+->>\s+'([^:]+)::.+$`)
7-
pgColumnIndexRegex = regexp.MustCompile(`(?i)^create\s+(unique)?(?:\s+)?index\s+([^ ]+)\s+on\s+\S+\s+using\s+([^ ]+)\s+\(([^)]+)\)$`)
7+
pgColumnIndexRegex = regexp.MustCompile(`(?i)^create\s+(unique)?(?:\s+)?index\s+([^ ]+)\s+on\s+\S+\s+using\s+([^ ]+)\s+\(([^)]+)\)(?:\s+include\s+\(([^)]+)\))?$`)
88
)
99

1010
const (
11-
pgIndexRegexGroupUnique = 1
12-
pgIndexRegexGroupName = 2
13-
pgIndexRegexGroupIndexType = 3
14-
pgIndexRegexGroupFields = 4
15-
pgIndexRegexNumExpectedGroups = 5
11+
pgIndexRegexGroupUnique = 1
12+
pgIndexRegexGroupName = 2
13+
pgIndexRegexGroupIndexType = 3
14+
pgIndexRegexGroupUsingFields = 4
15+
pgIndexRegexGroupIncludeFields = 5
16+
pgIndexRegexNumExpectedGroups = 6
1617

1718
pgIndexTypeBTree = "btree"
1819
pgIndexTypeGIN = "gin"

drivers/pg/query/format.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ func FormatRelationshipPartitionUpsert(graphTarget model.Graph, identityProperti
161161
return join("insert into ", graphTarget.Partitions.Edge.Name, " as e ",
162162
"(graph_id, start_id, end_id, kind_id, properties) ",
163163
"select $1, unnest($2::int8[]), unnest($3::int8[]), unnest($4::int2[]), unnest($5::jsonb[]) ",
164-
formatConflictMatcher(identityProperties, "graph_id, start_id, end_id, kind_id"),
164+
formatConflictMatcher(identityProperties, "start_id, end_id, kind_id, graph_id"),
165165
"do update set properties = e.properties || excluded.properties;",
166166
)
167167
}

drivers/pg/query/query.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,13 @@ func (s Query) describeGraphPartition(name string) (model.GraphPartition, error)
4545
if captureGroups[pgIndexRegexGroupUnique] == pgIndexUniqueStr {
4646
graphPartition.Constraints[indexName] = graph.Constraint{
4747
Name: indexName,
48-
Field: captureGroups[pgIndexRegexGroupFields],
48+
Field: captureGroups[pgIndexRegexGroupUsingFields],
4949
Type: parsePostgresIndexType(captureGroups[pgIndexRegexGroupIndexType]),
5050
}
5151
} else {
5252
graphPartition.Indexes[indexName] = graph.Index{
5353
Name: indexName,
54-
Field: captureGroups[pgIndexRegexGroupFields],
54+
Field: captureGroups[pgIndexRegexGroupUsingFields],
5555
Type: parsePostgresIndexType(captureGroups[pgIndexRegexGroupIndexType]),
5656
}
5757
}

drivers/pg/query/sql/schema_up.sql

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ create table if not exists edge
143143
primary key (id, graph_id),
144144
foreign key (graph_id) references graph (id) on delete cascade,
145145

146-
unique (graph_id, start_id, end_id, kind_id)
146+
unique (start_id, end_id, kind_id, graph_id)
147147
) partition by list (graph_id);
148148

149149
-- delete_node_edges is a trigger and associated plpgsql function to cascade delete edges when attached nodes are
@@ -176,22 +176,20 @@ execute procedure delete_node_edges();
176176
alter table edge
177177
alter column properties set storage main;
178178

179-
-- Remove the old graph ID index.
179+
-- Remove old indexes that are now redundant or superseded.
180180
drop index if exists edge_graph_id_index;
181-
182-
-- Index on the start vertex of each edge.
183-
create index if not exists edge_start_id_index on edge using btree (start_id);
184-
185-
-- Index on the start vertex of each edge.
186-
create index if not exists edge_end_id_index on edge using btree (end_id);
187-
188-
-- Index on the kind of each edge.
189-
create index if not exists edge_kind_index on edge using btree (kind_id);
190-
191-
-- Index lookups that include the edge's start or end id along with a filter for the edge type. This is the most
192-
-- common join filter during traversal.
193-
create index if not exists edge_start_kind_index on edge using btree (start_id, kind_id);
194-
create index if not exists edge_end_kind_index on edge using btree (end_id, kind_id);
181+
drop index if exists edge_start_id_index;
182+
drop index if exists edge_end_id_index;
183+
drop index if exists edge_kind_index;
184+
drop index if exists edge_start_kind_index;
185+
drop index if exists edge_end_kind_index;
186+
187+
-- Covering indexes for traversal joins. The INCLUDE columns allow index-only scans for the common case where
188+
-- the join needs (id, start_id, end_id, kind_id) without fetching from the heap. The standalone start_id,
189+
-- end_id, and kind_id indexes are intentionally omitted: the composite indexes satisfy left-prefix lookups
190+
-- on start_id or end_id alone, and kind_id is never queried in isolation during traversal.
191+
create index if not exists edge_start_id_kind_id_id_end_id_index on edge using btree (start_id, kind_id) include (id, end_id);
192+
create index if not exists edge_end_id_kind_id_id_start_id_index on edge using btree (end_id, kind_id) include (id, start_id);
195193

196194
-- Path composite type
197195
do
@@ -365,15 +363,17 @@ create or replace function public.create_unidirectional_pathspace_tables()
365363
returns void as
366364
$$
367365
begin
366+
-- The path column is not used as a primary key. Deduplication is handled by DISTINCT ON clauses in the
367+
-- harness functions. Removing the PK on the variable-length int8[] array eliminates O(n)-key B-tree
368+
-- maintenance that grows with traversal depth.
368369
create temporary table forward_front
369370
(
370371
root_id int8 not null,
371372
next_id int8 not null,
372373
depth int4 not null,
373374
satisfied bool,
374375
is_cycle bool not null,
375-
path int8[] not null,
376-
primary key (path)
376+
path int8[] not null
377377
) on commit drop;
378378

379379
create temporary table next_front
@@ -383,8 +383,7 @@ begin
383383
depth int4 not null,
384384
satisfied bool,
385385
is_cycle bool not null,
386-
path int8[] not null,
387-
primary key (path)
386+
path int8[] not null
388387
) on commit drop;
389388

390389
create index forward_front_next_id_index on forward_front using btree (next_id);

drivers/pg/statements.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ const (
1212
// Azure post-processing. This was done because Azure post will submit the same creation request hundreds of
1313
// times for the same edge. In PostgreSQL this results in a constraint violation. For now this is best-effort
1414
// until Azure post-processing can be refactored.
15-
createEdgeBatchStatement = `insert into edge as e (graph_id, start_id, end_id, kind_id, properties) select $1, unnest($2::int8[]), unnest($3::int8[]), unnest($4::int2[]), unnest($5::jsonb[]) on conflict (graph_id, start_id, end_id, kind_id) do update set properties = e.properties || excluded.properties;`
15+
createEdgeBatchStatement = `insert into edge as e (graph_id, start_id, end_id, kind_id, properties) select $1, unnest($2::int8[]), unnest($3::int8[]), unnest($4::int2[]), unnest($5::jsonb[]) on conflict (start_id, end_id, kind_id, graph_id) do update set properties = e.properties || excluded.properties;`
1616
deleteEdgeWithIDStatement = `delete from edge as e where e.id = any($1)`
1717

1818
edgePropertySetOnlyStatement = `update edge set properties = properties || $1::jsonb where edge.id = $2`

0 commit comments

Comments
 (0)