Skip to content

Commit adf6aaf

Browse files
committed
improvements
1 parent f34206e commit adf6aaf

11 files changed

Lines changed: 371 additions & 65 deletions

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbFilterSelectivityStats.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class LmdbFilterSelectivityStats
6868
private static final String SIDECAR_SUFFIX = ".filters";
6969
private static final int PERSIST_VERSION = 3;
7070
private static final int SAMPLE_RESERVOIR_SIZE = 256;
71+
private static final int ZERO_HIT_SAMPLE_MIN_EVIDENCE = 1024;
7172
private static final double LOW_BENEFIT_ROWS_THRESHOLD = 32.0d;
7273
private static final double LOW_BENEFIT_RATIO_THRESHOLD = 0.25d;
7374
private static final double FULL_SCAN_ROW_BUDGET = 1_000_000.0d;
@@ -261,7 +262,7 @@ public SketchBasedJoinEstimator.PatternFilterSampleEstimate estimateFilterPass(F
261262

262263
synchronized (this) {
263264
SampledPassRatio cached = sampledByFilter.get(key);
264-
if (cached != null) {
265+
if (isUsableSampledPassRatio(cached)) {
265266
return new SketchBasedJoinEstimator.PatternFilterSampleEstimate(cached.passRatio, cached.sampleSize);
266267
}
267268
}
@@ -271,7 +272,7 @@ public SketchBasedJoinEstimator.PatternFilterSampleEstimate estimateFilterPass(F
271272
}
272273

273274
SampledPassRatio sampled = sampleFilterPassRatio(filter, pattern, candidate);
274-
if (sampled == null || !isValidPassRatio(sampled.passRatio)) {
275+
if (!isUsableSampledPassRatio(sampled)) {
275276
return new SketchBasedJoinEstimator.PatternFilterSampleEstimate(-1.0d, -1L);
276277
}
277278

@@ -406,7 +407,7 @@ private void loadIfPresent() {
406407
for (int i = 0; i < sampledEntries; i++) {
407408
PatternFilterKey key = PatternFilterKey.readFrom(in);
408409
SampledPassRatio sampled = SampledPassRatio.readFrom(in);
409-
if (persistedVersion != PERSIST_VERSION || !isValidPassRatio(sampled.passRatio)) {
410+
if (persistedVersion != PERSIST_VERSION || !isUsableSampledPassRatio(sampled)) {
410411
continue;
411412
}
412413
loadedSampled.put(key, sampled);
@@ -523,6 +524,10 @@ private SampledPassRatio sampleFilterPassRatio(Filter filter, StatementPattern p
523524
}
524525
}
525526

527+
if (passed == 0 && samples.size() < ZERO_HIT_SAMPLE_MIN_EVIDENCE) {
528+
return null;
529+
}
530+
526531
return new SampledPassRatio((double) passed / samples.size(), samples.size());
527532
}
528533

@@ -811,6 +816,11 @@ private static boolean isValidPassRatio(double value) {
811816
return Double.isFinite(value) && value >= 0.0d && value <= 1.0d;
812817
}
813818

819+
private static boolean isUsableSampledPassRatio(SampledPassRatio sampled) {
820+
return sampled != null && isValidPassRatio(sampled.passRatio)
821+
&& (sampled.passRatio > 0.0d || sampled.sampleSize >= ZERO_HIT_SAMPLE_MIN_EVIDENCE);
822+
}
823+
814824
private static void writeString(DataOutputStream out, String value) throws IOException {
815825
byte[] bytes = value.getBytes(java.nio.charset.StandardCharsets.UTF_8);
816826
out.writeInt(bytes.length);

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSketchJoinOptimizer.java

Lines changed: 101 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ final class LmdbSketchJoinOptimizer implements QueryOptimizer {
7272

7373
private static final double FINITE_BINDING_GUARD_PRODUCT_LIMIT = 5000.0d;
7474
private static final int CORRELATED_ANTI_JOIN_COMPLEX_SUFFIX_PATTERN_LIMIT = 3;
75+
private static final String FINITE_ANCHOR_PLANNER_ID = "lmdb-finite-anchor";
76+
private static final String FINITE_ANCHOR_PLANNER_PATH = "CANONICAL_FINITE_ANCHOR";
77+
private static final String LMDB_PHYSICAL_REFINEMENT = "costModel=lmdb, accessPathSelection=per-step";
7578
private final EvaluationStatistics statistics;
7679
private final boolean trackResultSize;
7780

@@ -1330,6 +1333,7 @@ private boolean relocateLeftJoinLocalFilters(Filter filter, LeftJoin leftJoin) {
13301333
private TupleExpr buildOrderedRoot(CollectedJoinArgs collected, Set<String> outerBoundVars) {
13311334
rewriteSmallLiteralDeferredFilterAnchors(collected);
13321335
deduplicateEquivalentBindingSetAssignments(collected.joinArgs);
1336+
moveUnionIndependentFiniteSuffixAfterScopedUnion(collected.joinArgs, outerBoundVars);
13331337
Set<String> scopeBindingNames = new HashSet<>(outerBoundVars);
13341338
for (TupleExpr joinArg : collected.joinArgs) {
13351339
scopeBindingNames.addAll(joinArg.getBindingNames());
@@ -1347,6 +1351,7 @@ private TupleExpr buildOrderedRoot(CollectedJoinArgs collected, Set<String> oute
13471351
LmdbSmallLiteralFilterAnchors.add(collected.joinArgs, filters, outerBoundVars);
13481352
deduplicateEquivalentBindingSetAssignments(collected.joinArgs);
13491353
splitCartesianBindingSetAssignments(collected.joinArgs);
1354+
moveUnionIndependentFiniteSuffixAfterScopedUnion(collected.joinArgs, outerBoundVars);
13501355
List<TupleExpr> roots = new ArrayList<>();
13511356
List<TupleExpr> currentSegment = new ArrayList<>();
13521357
List<DelayedExtension> delayedExtensions = new ArrayList<>();
@@ -1391,6 +1396,84 @@ && shouldPlaceBindingOnlySegmentAfterSeparator(currentSegment, optimizedSeparato
13911396
return root;
13921397
}
13931398

1399+
private void moveUnionIndependentFiniteSuffixAfterScopedUnion(List<TupleExpr> joinArgs,
1400+
Set<String> outerBoundVars) {
1401+
int unionIndex = singleScopedUnionIndex(joinArgs);
1402+
if (unionIndex <= 1) {
1403+
return;
1404+
}
1405+
TupleExpr union = joinArgs.get(unionIndex);
1406+
Set<String> unionBindings = plannerBindingNames(union.getBindingNames());
1407+
if (unionBindings.isEmpty()) {
1408+
return;
1409+
}
1410+
int suffixStart = unionIndependentFiniteSuffixStart(joinArgs, unionIndex, unionBindings, outerBoundVars);
1411+
if (suffixStart < 0) {
1412+
return;
1413+
}
1414+
List<TupleExpr> suffix = new ArrayList<>(joinArgs.subList(suffixStart, unionIndex));
1415+
joinArgs.subList(suffixStart, unionIndex).clear();
1416+
joinArgs.addAll(suffixStart + 1, suffix);
1417+
}
1418+
1419+
private int singleScopedUnionIndex(List<TupleExpr> joinArgs) {
1420+
int unionIndex = -1;
1421+
for (int i = 0; i < joinArgs.size(); i++) {
1422+
TupleExpr joinArg = joinArgs.get(i);
1423+
if (!(joinArg instanceof Union) || !TupleExprs.isVariableScopeChange(joinArg)) {
1424+
continue;
1425+
}
1426+
if (unionIndex >= 0) {
1427+
return -1;
1428+
}
1429+
unionIndex = i;
1430+
}
1431+
return unionIndex;
1432+
}
1433+
1434+
private int unionIndependentFiniteSuffixStart(List<TupleExpr> joinArgs, int unionIndex,
1435+
Set<String> unionBindings, Set<String> outerBoundVars) {
1436+
Set<String> prefixBindings = new HashSet<>(outerBoundVars);
1437+
boolean hasFiniteUnionAnchor = false;
1438+
for (int i = 0; i < unionIndex - 1; i++) {
1439+
TupleExpr prefixFactor = joinArgs.get(i);
1440+
Optional<Set<String>> assignmentNames = LmdbJoinPlanSupport
1441+
.positionableBindingSetAssignmentNames(prefixFactor);
1442+
if (assignmentNames.isPresent() && !Collections.disjoint(assignmentNames.get(), unionBindings)) {
1443+
hasFiniteUnionAnchor = true;
1444+
}
1445+
prefixBindings.addAll(plannerBindingNames(prefixFactor.getBindingNames()));
1446+
if (hasFiniteUnionAnchor
1447+
&& canMoveFiniteSuffixAfterUnion(joinArgs.subList(i + 1, unionIndex), prefixBindings,
1448+
unionBindings)) {
1449+
return i + 1;
1450+
}
1451+
}
1452+
return -1;
1453+
}
1454+
1455+
private boolean canMoveFiniteSuffixAfterUnion(List<TupleExpr> suffix, Set<String> prefixBindings,
1456+
Set<String> unionBindings) {
1457+
if (suffix.isEmpty()) {
1458+
return false;
1459+
}
1460+
Set<String> availableBindings = new HashSet<>(prefixBindings);
1461+
for (TupleExpr factor : suffix) {
1462+
if (TupleExprs.isVariableScopeChange(factor)
1463+
|| !(factor instanceof BindingSetAssignment || factor instanceof StatementPattern)) {
1464+
return false;
1465+
}
1466+
Set<String> factorBindings = plannerBindingNames(factor.getBindingNames());
1467+
Set<String> introducedBindings = new HashSet<>(factorBindings);
1468+
introducedBindings.removeAll(availableBindings);
1469+
if (!Collections.disjoint(introducedBindings, unionBindings)) {
1470+
return false;
1471+
}
1472+
availableBindings.addAll(factorBindings);
1473+
}
1474+
return true;
1475+
}
1476+
13941477
private DelayedExtension delayableIndependentExtension(TupleExpr joinArg, List<TupleExpr> joinArgs, int index,
13951478
List<DeferredFilter> filters) {
13961479
if (!(joinArg instanceof Extension extension) || TupleExprs.isVariableScopeChange(extension)) {
@@ -1729,8 +1812,7 @@ private void appendSegmentRoot(List<TupleExpr> roots, List<TupleExpr> segment, L
17291812
LmdbJoinPlanSupport.markRedundantRequiredExistsFilters(segment, filters);
17301813
List<DeferredFilter> segmentFilters = new ArrayList<>();
17311814
for (DeferredFilter filter : filters) {
1732-
if (!filter.applied && segmentBindings.containsAll(filter.requiredVars)
1733-
&& !Collections.disjoint(segmentLocalBindings, filter.requiredVars)) {
1815+
if (!filter.applied && segmentLocalBindings.containsAll(filter.requiredVars)) {
17341816
segmentFilters.add(filter);
17351817
}
17361818
}
@@ -1782,7 +1864,9 @@ private OrderedSegment orderSegment(List<TupleExpr> segment, Set<String> boundBe
17821864
Optional<List<TupleExpr>> canonicalFiniteAnchorOrder = canonicalFiniteAnchorOrder(segment,
17831865
boundBeforeSegment, plannerFilters);
17841866
if (canonicalFiniteAnchorOrder.isPresent()) {
1785-
return new OrderedSegment(new ArrayDeque<>(canonicalFiniteAnchorOrder.get()), Map.of(), true);
1867+
List<TupleExpr> orderedArgs = canonicalFiniteAnchorOrder.get();
1868+
applyFiniteAnchorPlannerMetrics(orderedArgs);
1869+
return new OrderedSegment(new ArrayDeque<>(orderedArgs), Map.of(), true);
17861870
}
17871871
}
17881872
JoinOrderPlanner planner = (JoinOrderPlanner) statistics;
@@ -1795,7 +1879,9 @@ private OrderedSegment orderSegment(List<TupleExpr> segment, Set<String> boundBe
17951879
Optional<List<TupleExpr>> canonicalFiniteAnchorOrder = canonicalFiniteAnchorOrder(segment,
17961880
boundBeforeSegment, plannerFilters);
17971881
if (canonicalFiniteAnchorOrder.isPresent()) {
1798-
return new OrderedSegment(new ArrayDeque<>(canonicalFiniteAnchorOrder.get()), Map.of(), true);
1882+
List<TupleExpr> orderedArgs = canonicalFiniteAnchorOrder.get();
1883+
applyFiniteAnchorPlannerMetrics(orderedArgs);
1884+
return new OrderedSegment(new ArrayDeque<>(orderedArgs), Map.of(), true);
17991885
}
18001886
}
18011887
return locallySelectiveFallbackOrder(segment);
@@ -2293,6 +2379,17 @@ private void applyPlannerStepEstimates(JoinOrderPlanner.JoinOrderPlan plan) {
22932379
}
22942380
}
22952381

2382+
private void applyFiniteAnchorPlannerMetrics(List<TupleExpr> orderedArgs) {
2383+
if (orderedArgs.isEmpty()) {
2384+
return;
2385+
}
2386+
TupleExpr root = orderedArgs.get(0);
2387+
root.setStringMetricPlanned(TelemetryMetricNames.PLANNER_ID, FINITE_ANCHOR_PLANNER_ID);
2388+
root.setStringMetricPlanned(TelemetryMetricNames.PLANNER_PATH, FINITE_ANCHOR_PLANNER_PATH);
2389+
root.setStringMetricPlanned(TelemetryMetricNames.OPTIMIZER_PHYSICAL_REFINEMENT,
2390+
LMDB_PHYSICAL_REFINEMENT);
2391+
}
2392+
22962393
private final class AntiJoinCost {
22972394
private final double workRows;
22982395
private final double outputRows;

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionFilterDistributor.java

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,10 @@ static TupleExpr tryDistribute(List<TupleExpr> joinArgs, List<DeferredFilter> fi
4747
}
4848

4949
List<TupleExpr> prefixFactors = new ArrayList<>(joinArgs.size() - 1);
50+
List<TupleExpr> suffixFactors = new ArrayList<>(joinArgs.subList(unionIndex + 1, joinArgs.size()));
5051
Set<String> prefixBindings = new HashSet<>(outerBoundVars);
5152
boolean hasFinitePrefixAnchor = false;
52-
for (int i = 0; i < joinArgs.size(); i++) {
53-
if (i == unionIndex) {
54-
continue;
55-
}
53+
for (int i = 0; i < unionIndex; i++) {
5654
TupleExpr joinArg = joinArgs.get(i);
5755
Optional<Set<String>> assignmentNames = finitePrefixAnchorNames(joinArg);
5856
if (assignmentNames.isPresent()) {
@@ -77,21 +75,39 @@ static TupleExpr tryDistribute(List<TupleExpr> joinArgs, List<DeferredFilter> fi
7775
|| !isSafeUnionDistributionBranch(union.getRightArg(), prefixBindings)) {
7876
return null;
7977
}
80-
Set<String> availableBindings = new HashSet<>(prefixBindings);
81-
availableBindings.addAll(union.getBindingNames());
78+
Set<String> branchAvailableBindings = new HashSet<>(prefixBindings);
79+
branchAvailableBindings.addAll(union.getBindingNames());
80+
Set<String> finalAvailableBindings = new HashSet<>(branchAvailableBindings);
81+
for (TupleExpr suffixFactor : suffixFactors) {
82+
finalAvailableBindings.addAll(suffixFactor.getBindingNames());
83+
}
84+
List<DeferredFilter> branchFilters = new ArrayList<>();
85+
List<DeferredFilter> postUnionFilters = new ArrayList<>();
8286
for (DeferredFilter filter : filters) {
83-
if (!availableBindings.containsAll(filter.requiredVars)) {
87+
if (branchAvailableBindings.containsAll(filter.requiredVars)) {
88+
branchFilters.add(filter);
89+
} else if (finalAvailableBindings.containsAll(filter.requiredVars)) {
90+
postUnionFilters.add(filter);
91+
} else {
8492
return null;
8593
}
8694
}
8795

88-
TupleExpr left = buildBranch(union.getLeftArg(), prefixFactors, filters, outerBoundVars, branchOptimizer,
96+
TupleExpr left = buildBranch(union.getLeftArg(), prefixFactors, branchFilters, outerBoundVars, branchOptimizer,
8997
joinFactory, filterWrapper);
90-
TupleExpr right = buildBranch(union.getRightArg(), prefixFactors, filters, outerBoundVars, branchOptimizer,
98+
TupleExpr right = buildBranch(union.getRightArg(), prefixFactors, branchFilters, outerBoundVars,
99+
branchOptimizer,
91100
joinFactory, filterWrapper);
92101
Union distributed = new Union(left, right);
93102
distributed.setVariableScopeChange(union.isVariableScopeChange());
94-
return distributed;
103+
if (suffixFactors.isEmpty()) {
104+
return distributed;
105+
}
106+
107+
TupleExpr suffix = joinFactors(suffixFactors, joinFactory);
108+
suffix = branchOptimizer.optimize(suffix, branchAvailableBindings);
109+
TupleExpr root = joinFactory.create(distributed, suffix);
110+
return filterWrapper.wrap(root, postUnionFilters, "unionSuffix");
95111
}
96112

97113
private static int singleUnionIndex(List<TupleExpr> joinArgs) {
@@ -126,12 +142,17 @@ private static TupleExpr prependPrefix(TupleExpr branch, List<TupleExpr> prefixF
126142
}
127143

128144
private static TupleExpr joinPrefix(List<TupleExpr> prefixFactors, TupleExpr branch, JoinFactory joinFactory) {
145+
TupleExpr root = joinFactors(prefixFactors, joinFactory);
146+
return root == null ? branch : joinFactory.create(root, branch);
147+
}
148+
149+
private static TupleExpr joinFactors(List<TupleExpr> factors, JoinFactory joinFactory) {
129150
TupleExpr root = null;
130-
for (TupleExpr prefixFactor : prefixFactors) {
131-
TupleExpr prefix = prefixFactor.clone();
132-
root = root == null ? prefix : joinFactory.create(root, prefix);
151+
for (TupleExpr factor : factors) {
152+
TupleExpr clone = factor.clone();
153+
root = root == null ? clone : joinFactory.create(root, clone);
133154
}
134-
return root == null ? branch : joinFactory.create(root, branch);
155+
return root;
135156
}
136157

137158
private static Optional<Set<String>> finitePrefixAnchorNames(TupleExpr tupleExpr) {

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSketchJoinOptimizerTest.java

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
package org.eclipse.rdf4j.sail.lmdb;
1313

1414
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
import static org.junit.jupiter.api.Assertions.assertFalse;
1516
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
1617
import static org.junit.jupiter.api.Assertions.assertTrue;
1718

@@ -363,6 +364,33 @@ void placesDuplicatedValuesAfterScopedUnionBranchGraph() {
363364
assertScopedBranchBeforeValues(optimized.getRightArg());
364365
}
365366

367+
@Test
368+
void keepsUnionIndependentFiniteSuffixAfterScopedUnion() {
369+
BindingSetAssignment users = values("u", "user7", "user8");
370+
BindingSetAssignment names = values("optName", "user7", "user8");
371+
StatementPattern nameLookup = statementPattern("u", "name", "optName");
372+
Union union = new Union(
373+
new Extension(new Join(statementPattern("u", "follows", "v"), statementPattern("v", "follows", "u")),
374+
new ExtensionElem(new Var("v"), "activity")),
375+
new Extension(statementPattern("post", "authored", "u"), new ExtensionElem(new Var("post"),
376+
"activity")));
377+
union.setVariableScopeChange(true);
378+
QueryRoot root = new QueryRoot(new Join(users, new Join(names, new Join(nameLookup, union))));
379+
380+
new LmdbSketchJoinOptimizer(PlanningStatistics.rejected(), false).optimize(root, null, null);
381+
382+
List<TupleExpr> args = joinArgs(root.getArg());
383+
assertEquals(3, args.size());
384+
Union distributedUnion = assertInstanceOf(Union.class, args.get(0));
385+
assertTrue(containsBindingSetAssignment(distributedUnion.getLeftArg(), "u"));
386+
assertTrue(containsBindingSetAssignment(distributedUnion.getRightArg(), "u"));
387+
assertFalse(containsBindingSetAssignment(distributedUnion, "optName"));
388+
assertFalse(statementPatterns(distributedUnion).stream()
389+
.anyMatch(pattern -> "optName".equals(pattern.getObjectVar().getName())));
390+
assertTrue(containsBindingSetAssignment(args.get(1), "optName"));
391+
assertEquals("optName", assertInstanceOf(StatementPattern.class, args.get(2)).getObjectVar().getName());
392+
}
393+
366394
private static StatementPattern statementPattern(String subjectName, String predicateName, String objectName) {
367395
return new StatementPattern(new Var(subjectName), new Var(predicateName, VF.createIRI("urn:" + predicateName)),
368396
new Var(objectName));
@@ -424,6 +452,16 @@ private static void collectStatementPatterns(TupleExpr tupleExpr, List<Statement
424452
collectStatementPatterns(((Filter) tupleExpr).getArg(), patterns);
425453
return;
426454
}
455+
if (tupleExpr instanceof Extension) {
456+
collectStatementPatterns(((Extension) tupleExpr).getArg(), patterns);
457+
return;
458+
}
459+
if (tupleExpr instanceof Union) {
460+
Union union = (Union) tupleExpr;
461+
collectStatementPatterns(union.getLeftArg(), patterns);
462+
collectStatementPatterns(union.getRightArg(), patterns);
463+
return;
464+
}
427465
if (tupleExpr instanceof Join) {
428466
Join join = (Join) tupleExpr;
429467
collectStatementPatterns(join.getLeftArg(), patterns);
@@ -486,6 +524,14 @@ private static boolean containsBindingSetAssignment(TupleExpr tupleExpr, String
486524
if (tupleExpr instanceof Filter) {
487525
return containsBindingSetAssignment(((Filter) tupleExpr).getArg(), bindingName);
488526
}
527+
if (tupleExpr instanceof Extension) {
528+
return containsBindingSetAssignment(((Extension) tupleExpr).getArg(), bindingName);
529+
}
530+
if (tupleExpr instanceof Union) {
531+
Union union = (Union) tupleExpr;
532+
return containsBindingSetAssignment(union.getLeftArg(), bindingName)
533+
|| containsBindingSetAssignment(union.getRightArg(), bindingName);
534+
}
489535
if (tupleExpr instanceof Join) {
490536
Join join = (Join) tupleExpr;
491537
return containsBindingSetAssignment(join.getLeftArg(), bindingName)
@@ -495,10 +541,11 @@ private static boolean containsBindingSetAssignment(TupleExpr tupleExpr, String
495541
}
496542

497543
private static void assertScopedBranchBeforeValues(TupleExpr tupleExpr) {
498-
Join join = assertInstanceOf(Join.class, tupleExpr);
544+
assertTrue(containsFilter(tupleExpr));
545+
TupleExpr joinRoot = tupleExpr instanceof Filter ? ((Filter) tupleExpr).getArg() : tupleExpr;
546+
Join join = assertInstanceOf(Join.class, joinRoot);
499547
assertTrue(assertInstanceOf(VariableScopeChange.class, join.getLeftArg()).isVariableScopeChange());
500548
assertTrue(containsBindingSetAssignment(join.getRightArg(), "target"));
501-
assertTrue(containsFilter(join.getRightArg()));
502549
}
503550

504551
private static final class PlanningStatistics extends EvaluationStatistics implements JoinOrderPlanner {

0 commit comments

Comments
 (0)