Skip to content

Commit 9791909

Browse files
diipak-bishtshuaitian-git
authored andcommitted
Merged PR 1768548: [Revert] Ignore collation spec in aggregate and find queries
### Does this PR have any customer impact? Yes ### Type (Feature, Refactoring, Bugfix, DevOps, Testing, Perf, etc) Revert ### Does it involve schema level changes? (Table, Column, Index, UDF, etc level changes) No ### Are you introducing any new config? If yes, do you have tests with and without them being set? No ### ChangeLog (Refer [Template](../oss/CHANGELOG.md)) - Ignore collation spec for find and aggregate queries ### Description Ignore collation spec for find and aggregate queries ---- #### AI description (iteration 1) #### PR Classification This PR reverts recent changes to collation handling in aggregate and find queries, ensuring that collation specifications are ignored when the feature is disabled. #### PR Summary The PR modifies the collation processing in aggregation and find queries by gating the handling of the "collation" field with the EnableCollation flag—removing the error path when the flag is off—and updates test expectations accordingly. - `oss/pg_documentdb/src/aggregation/bson_aggregation_pipeline.c`: Adjusted the collation handling to check `EnableCollation` before processing and removed the branch that raised errors. - Test expectation files in both `oss/internal/pg_documentdb_distributed` and `pgmongo/src/test/docdb_compat/expected`: Updated expected outputs (including subplan identifiers) and added commands to toggle the collation feature. - `oss/internal/pg_documentdb_distributed/src/test/regress/sql/bson_dollar_ops_collation_tests_runtime.sql`: Modified to set collation off during tests, ensuring that collation specs are ignored without throwing an error. <!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
1 parent c82c6ef commit 9791909

3 files changed

Lines changed: 37 additions & 28 deletions

File tree

internal/pg_documentdb_distributed/src/test/regress/expected/bson_dollar_ops_collation_tests_runtime.out

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ SET search_path to documentdb_core,documentdb_api,documentdb_api_catalog,pg_cata
22
SET citus.next_shard_id TO 7990000;
33
SET documentdb.next_collection_id TO 7990;
44
SET documentdb.next_collection_index_id TO 7990;
5-
SET documentdb_core.enableCollation TO on;
65
-- (1) insert some docs
76
SELECT documentdb_api.insert_one('db', 'ci_search', '{ "_id": 1, "a": "Cat" }');
87
NOTICE: creating collection
@@ -161,6 +160,24 @@ SELECT documentdb_api.insert_one('db', 'ci_search', '{ "_id": 26, "a": [{ "b": "
161160
{ "n" : { "$numberInt" : "1" }, "ok" : { "$numberDouble" : "1.0" } }
162161
(1 row)
163162

163+
SET documentdb_core.enableCollation TO off;
164+
-- With collation off and colation string in find and aggregate commands should not throw an error
165+
SELECT document FROM bson_aggregation_pipeline('db', '{ "aggregate": "ci_search", "pipeline": [ { "$sort": { "_id": 1 } }, { "$match": { "a": { "$eq": "cat" } } } ], "cursor": {}, "collation": { "locale": "en", "strength" : 1} }');
166+
document
167+
---------------------------------------------------------------------
168+
{ "_id" : { "$numberInt" : "3" }, "a" : "cat" }
169+
{ "_id" : { "$numberInt" : "18" }, "a" : [ "dog", "cat", "CAT" ] }
170+
{ "_id" : { "$numberInt" : "19" }, "a" : [ "cat", "rabbit", "bAt" ] }
171+
{ "_id" : { "$numberInt" : "22" }, "a" : [ "cat" ] }
172+
(4 rows)
173+
174+
SELECT document FROM bson_aggregation_find('db', '{ "find": "ci_search", "filter": { "b": { "$eq": "cat" } }, "sort": { "_id": 1 }, "skip": 0, "limit": 5, "collation": { "locale": "en", "strength" : 1} }');
175+
document
176+
---------------------------------------------------------------------
177+
{ "_id" : { "$numberInt" : "11" }, "b" : "cat" }
178+
(1 row)
179+
180+
SET documentdb_core.enableCollation TO on;
164181
-- (2) Find query unsharded collection
165182
SELECT document FROM bson_aggregation_find('db', '{ "find": "ci_search", "filter": { "a": { "$eq": "cat" } }, "sort": { "_id": 1 }, "skip": 0, "limit": 5, "collation": { "locale": "en", "strength" : 1} }');
166183
document
@@ -2041,7 +2058,7 @@ EXPLAIN (VERBOSE ON, COSTS OFF) SELECT document FROM bson_aggregation_pipeline('
20412058
---------------------------------------------------------------------
20422059
Custom Scan (Citus Adaptive)
20432060
Output: remote_scan.document
2044-
-> Distributed Subplan 376_1
2061+
-> Distributed Subplan 378_1
20452062
-> Custom Scan (Citus Adaptive)
20462063
Output: remote_scan.document, remote_scan.lookup_filter
20472064
Task Count: 8
@@ -2051,7 +2068,7 @@ EXPLAIN (VERBOSE ON, COSTS OFF) SELECT document FROM bson_aggregation_pipeline('
20512068
Node: host=localhost port=58070 dbname=regression
20522069
-> Seq Scan on documentdb_data.documents_7997_7990200 collection
20532070
Output: document, documentdb_api_internal.bson_dollar_lookup_extract_filter_expression(document, '{ "_id" : "_id", "collation" : "en-u-ks-level1" }'::documentdb_core.bson)
2054-
-> Distributed Subplan 376_2
2071+
-> Distributed Subplan 378_2
20552072
-> Custom Scan (Citus Adaptive)
20562073
Output: remote_scan.document
20572074
Task Count: 8
@@ -2065,18 +2082,18 @@ EXPLAIN (VERBOSE ON, COSTS OFF) SELECT document FROM bson_aggregation_pipeline('
20652082
Task Count: 1
20662083
Tasks Shown: All
20672084
-> Task
2068-
Query: SELECT documentdb_api_internal.bson_dollar_merge_documents(lookup_stage_1.document, "lookupRight_stage_1".document, true) AS document FROM ((SELECT intermediate_result.document, intermediate_result.lookup_filter FROM read_intermediate_result('376_1'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson, lookup_filter documentdb_core.bson)) lookup_stage_1 JOIN LATERAL (SELECT COALESCE(documentdb_api_catalog.bson_array_agg(lookup_right_query_stage_0.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson) AS document FROM (SELECT intermediate_result.document FROM read_intermediate_result('376_2'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson)) lookup_right_query_stage_0 WHERE documentdb_api_internal.bson_dollar_lookup_join_filter(lookup_right_query_stage_0.document, lookup_stage_1.lookup_filter, '_id'::text)) "lookupRight_stage_1" ON (true))
2085+
Query: SELECT documentdb_api_internal.bson_dollar_merge_documents(lookup_stage_1.document, "lookupRight_stage_1".document, true) AS document FROM ((SELECT intermediate_result.document, intermediate_result.lookup_filter FROM read_intermediate_result('378_1'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson, lookup_filter documentdb_core.bson)) lookup_stage_1 JOIN LATERAL (SELECT COALESCE(documentdb_api_catalog.bson_array_agg(lookup_right_query_stage_0.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson) AS document FROM (SELECT intermediate_result.document FROM read_intermediate_result('378_2'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson)) lookup_right_query_stage_0 WHERE documentdb_api_internal.bson_dollar_lookup_join_filter(lookup_right_query_stage_0.document, lookup_stage_1.lookup_filter, '_id'::text)) "lookupRight_stage_1" ON (true))
20692086
Node: host=localhost port=58070 dbname=regression
20702087
-> Nested Loop
20712088
Output: documentdb_api_internal.bson_dollar_merge_documents(intermediate_result.document, (COALESCE(documentdb_api_catalog.bson_array_agg(intermediate_result_1.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson)), true)
20722089
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
20732090
Output: intermediate_result.document, intermediate_result.lookup_filter
2074-
Function Call: read_intermediate_result('376_1'::text, 'binary'::citus_copy_format)
2091+
Function Call: read_intermediate_result('378_1'::text, 'binary'::citus_copy_format)
20752092
-> Aggregate
20762093
Output: COALESCE(documentdb_api_catalog.bson_array_agg(intermediate_result_1.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson)
20772094
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
20782095
Output: intermediate_result_1.document
2079-
Function Call: read_intermediate_result('376_2'::text, 'binary'::citus_copy_format)
2096+
Function Call: read_intermediate_result('378_2'::text, 'binary'::citus_copy_format)
20802097
Filter: documentdb_api_internal.bson_dollar_lookup_join_filter(intermediate_result_1.document, intermediate_result.lookup_filter, '_id'::text)
20812098
(39 rows)
20822099

@@ -2103,7 +2120,7 @@ EXPLAIN (VERBOSE ON, COSTS OFF) SELECT document FROM bson_aggregation_pipeline('
21032120
---------------------------------------------------------------------
21042121
Custom Scan (Citus Adaptive)
21052122
Output: remote_scan.document
2106-
-> Distributed Subplan 384_1
2123+
-> Distributed Subplan 386_1
21072124
-> Custom Scan (Citus Adaptive)
21082125
Output: remote_scan.document, remote_scan.lookup_filter
21092126
Task Count: 8
@@ -2113,7 +2130,7 @@ EXPLAIN (VERBOSE ON, COSTS OFF) SELECT document FROM bson_aggregation_pipeline('
21132130
Node: host=localhost port=58070 dbname=regression
21142131
-> Seq Scan on documentdb_data.documents_7997_7990200 collection
21152132
Output: document, documentdb_api_internal.bson_dollar_lookup_extract_filter_expression(document, '{ "a.b" : "a.b", "collation" : "en-u-ks-level1" }'::documentdb_core.bson)
2116-
-> Distributed Subplan 384_2
2133+
-> Distributed Subplan 386_2
21172134
-> Custom Scan (Citus Adaptive)
21182135
Output: remote_scan.document
21192136
Task Count: 8
@@ -2127,18 +2144,18 @@ EXPLAIN (VERBOSE ON, COSTS OFF) SELECT document FROM bson_aggregation_pipeline('
21272144
Task Count: 1
21282145
Tasks Shown: All
21292146
-> Task
2130-
Query: SELECT documentdb_api_internal.bson_dollar_merge_documents(lookup_stage_1.document, "lookupRight_stage_1".document, true) AS document FROM ((SELECT intermediate_result.document, intermediate_result.lookup_filter FROM read_intermediate_result('384_1'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson, lookup_filter documentdb_core.bson)) lookup_stage_1 JOIN LATERAL (SELECT COALESCE(documentdb_api_catalog.bson_array_agg(lookup_right_query_stage_0.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson) AS document FROM (SELECT intermediate_result.document FROM read_intermediate_result('384_2'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson)) lookup_right_query_stage_0 WHERE documentdb_api_internal.bson_dollar_lookup_join_filter(lookup_right_query_stage_0.document, lookup_stage_1.lookup_filter, 'a.b'::text)) "lookupRight_stage_1" ON (true))
2147+
Query: SELECT documentdb_api_internal.bson_dollar_merge_documents(lookup_stage_1.document, "lookupRight_stage_1".document, true) AS document FROM ((SELECT intermediate_result.document, intermediate_result.lookup_filter FROM read_intermediate_result('386_1'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson, lookup_filter documentdb_core.bson)) lookup_stage_1 JOIN LATERAL (SELECT COALESCE(documentdb_api_catalog.bson_array_agg(lookup_right_query_stage_0.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson) AS document FROM (SELECT intermediate_result.document FROM read_intermediate_result('386_2'::text, 'binary'::citus_copy_format) intermediate_result(document documentdb_core.bson)) lookup_right_query_stage_0 WHERE documentdb_api_internal.bson_dollar_lookup_join_filter(lookup_right_query_stage_0.document, lookup_stage_1.lookup_filter, 'a.b'::text)) "lookupRight_stage_1" ON (true))
21312148
Node: host=localhost port=58070 dbname=regression
21322149
-> Nested Loop
21332150
Output: documentdb_api_internal.bson_dollar_merge_documents(intermediate_result.document, (COALESCE(documentdb_api_catalog.bson_array_agg(intermediate_result_1.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson)), true)
21342151
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
21352152
Output: intermediate_result.document, intermediate_result.lookup_filter
2136-
Function Call: read_intermediate_result('384_1'::text, 'binary'::citus_copy_format)
2153+
Function Call: read_intermediate_result('386_1'::text, 'binary'::citus_copy_format)
21372154
-> Aggregate
21382155
Output: COALESCE(documentdb_api_catalog.bson_array_agg(intermediate_result_1.document, 'matched_docs'::text), '{ "matched_docs" : [ ] }'::documentdb_core.bson)
21392156
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
21402157
Output: intermediate_result_1.document
2141-
Function Call: read_intermediate_result('384_2'::text, 'binary'::citus_copy_format)
2158+
Function Call: read_intermediate_result('386_2'::text, 'binary'::citus_copy_format)
21422159
Filter: documentdb_api_internal.bson_dollar_lookup_join_filter(intermediate_result_1.document, intermediate_result.lookup_filter, 'a.b'::text)
21432160
(39 rows)
21442161

internal/pg_documentdb_distributed/src/test/regress/sql/bson_dollar_ops_collation_tests_runtime.sql

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ SET citus.next_shard_id TO 7990000;
33
SET documentdb.next_collection_id TO 7990;
44
SET documentdb.next_collection_index_id TO 7990;
55

6-
SET documentdb_core.enableCollation TO on;
7-
86
-- (1) insert some docs
97
SELECT documentdb_api.insert_one('db', 'ci_search', '{ "_id": 1, "a": "Cat" }');
108
SELECT documentdb_api.insert_one('db', 'ci_search', '{ "_id": 2, "a": "dog" }');
@@ -33,6 +31,13 @@ SELECT documentdb_api.insert_one('db', 'ci_search', '{ "_id": 24, "a": ["cAt"] }
3331
SELECT documentdb_api.insert_one('db', 'ci_search', '{ "_id": 25, "a": { "b" : "cAt"} }');
3432
SELECT documentdb_api.insert_one('db', 'ci_search', '{ "_id": 26, "a": [{ "b": "CAT"}] }');
3533

34+
SET documentdb_core.enableCollation TO off;
35+
36+
-- With collation off and colation string in find and aggregate commands should not throw an error
37+
SELECT document FROM bson_aggregation_pipeline('db', '{ "aggregate": "ci_search", "pipeline": [ { "$sort": { "_id": 1 } }, { "$match": { "a": { "$eq": "cat" } } } ], "cursor": {}, "collation": { "locale": "en", "strength" : 1} }');
38+
SELECT document FROM bson_aggregation_find('db', '{ "find": "ci_search", "filter": { "b": { "$eq": "cat" } }, "sort": { "_id": 1 }, "skip": 0, "limit": 5, "collation": { "locale": "en", "strength" : 1} }');
39+
40+
SET documentdb_core.enableCollation TO on;
3641

3742
-- (2) Find query unsharded collection
3843
SELECT document FROM bson_aggregation_find('db', '{ "find": "ci_search", "filter": { "a": { "$eq": "cat" } }, "sort": { "_id": 1 }, "skip": 0, "limit": 5, "collation": { "locale": "en", "strength" : 1} }');

pg_documentdb/src/aggregation/bson_aggregation_pipeline.c

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,19 +1266,13 @@ GenerateAggregationQuery(text *database, pgbson *aggregationSpec, QueryData *que
12661266
else if (StringViewEqualsCString(&keyView, "collation"))
12671267
{
12681268
ReportFeatureUsage(FEATURE_COLLATION);
1269-
12701269
if (EnableCollation)
12711270
{
1271+
/* Ignore collation until enabled for aggregate */
12721272
EnsureTopLevelFieldType("collation", &aggregationIterator,
12731273
BSON_TYPE_DOCUMENT);
12741274
ParseAndGetCollationString(value, context.collationString);
12751275
}
1276-
else
1277-
{
1278-
ereport(ERROR, (errcode(ERRCODE_DOCUMENTDB_COMMANDNOTSUPPORTED),
1279-
errmsg(
1280-
"aggregation with collation is not currently supported.")));
1281-
}
12821276
}
12831277
else if (setStatementTimeout &&
12841278
StringViewEqualsCString(&keyView, "maxTimeMS"))
@@ -1539,20 +1533,13 @@ GenerateFindQuery(text *databaseDatum, pgbson *findSpec, QueryData *queryData, b
15391533
if (StringViewEqualsCString(&keyView, "collation"))
15401534
{
15411535
ReportFeatureUsage(FEATURE_COLLATION);
1542-
15431536
if (EnableCollation)
15441537
{
1538+
/* Ignore collation until enabled for find */
15451539
EnsureTopLevelFieldType("collation", &findIterator,
15461540
BSON_TYPE_DOCUMENT);
15471541
ParseAndGetCollationString(value, context.collationString);
15481542
}
1549-
else
1550-
{
1551-
ereport(ERROR, (errcode(ERRCODE_DOCUMENTDB_COMMANDNOTSUPPORTED),
1552-
errmsg(
1553-
"Find with collation is not currently supported.")));
1554-
}
1555-
15561543
continue;
15571544
}
15581545

0 commit comments

Comments
 (0)