Skip to content

Commit a69a5d8

Browse files
Continue work on #2054: complete SortClause decoding for Lexical Sort support for Tables (but not actual Sort execution) (#2130)
1 parent 48857a2 commit a69a5d8

10 files changed

Lines changed: 312 additions & 98 deletions

File tree

src/main/java/io/stargate/sgv2/jsonapi/api/model/command/builders/CollectionSortClauseBuilder.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,39 @@ public SortClause buildClauseFromDefinition(ObjectNode sortNode) {
102102
return new SortClause(sortExpressions);
103103
}
104104

105+
/**
106+
* Helper method to build a "non-special" sort expression for given definition; validates
107+
* expression value and builds the {@link SortExpression} object.
108+
*
109+
* @param path Path to the field to sort by, already validated
110+
* @param innerValue JSON value of the sort expression to use
111+
* @return {@link SortExpression} for the regular sort
112+
*/
113+
private SortExpression buildRegularSortExpression(String path, JsonNode innerValue) {
114+
if (!innerValue.isInt()) {
115+
// Special checking for String and ArrayNode to give less confusing error messages
116+
if (innerValue.isArray()) {
117+
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
118+
"Sort ordering value can be Array only for Vector search");
119+
}
120+
if (innerValue.isTextual()) {
121+
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
122+
"Sort ordering value can be String only for Lexical or Vectorize search");
123+
}
124+
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
125+
"Sort ordering value should be integer `1` or `-1`; or Array (Vector); or String (Lexical or Vectorize), was: %s",
126+
JsonUtil.nodeTypeAsString(innerValue));
127+
}
128+
if (!(innerValue.intValue() == 1 || innerValue.intValue() == -1)) {
129+
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
130+
"Sort ordering value can only be `1` for ascending or `-1` for descending (not `%s`)",
131+
innerValue);
132+
}
133+
134+
boolean ascending = innerValue.intValue() == 1;
135+
return SortExpression.sort(path, ascending);
136+
}
137+
105138
private void validateSortExpressionPaths(ObjectNode sortNode) {
106139
Iterator<String> it = sortNode.fieldNames();
107140
while (it.hasNext()) {

src/main/java/io/stargate/sgv2/jsonapi/api/model/command/builders/SortClauseBuilder.java

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,10 @@
55
import io.stargate.sgv2.jsonapi.api.model.command.clause.filter.EJSONWrapper;
66
import io.stargate.sgv2.jsonapi.api.model.command.clause.filter.SortDefinition;
77
import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortClause;
8-
import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortExpression;
98
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
109
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.SchemaObject;
1110
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableSchemaObject;
1211
import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject;
13-
import io.stargate.sgv2.jsonapi.util.JsonUtil;
1412
import java.util.Objects;
1513

1614
/**
@@ -59,39 +57,6 @@ public SortClause build(JsonNode node) {
5957
*/
6058
protected abstract SortClause buildClauseFromDefinition(ObjectNode sortNode);
6159

62-
/**
63-
* Helper method to build a sort expression for given definition. Base implementation is for
64-
* regular sorts (not vector, vectorize or lexical).
65-
*
66-
* @param path Path to the field to sort by, already validated
67-
* @param innerValue JSON value of the sort expression to use
68-
* @return {@link SortExpression} for the regular sort
69-
*/
70-
protected SortExpression buildRegularSortExpression(String path, JsonNode innerValue) {
71-
if (!innerValue.isInt()) {
72-
// Special checking for String and ArrayNode to give less confusing error messages
73-
if (innerValue.isArray()) {
74-
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
75-
"Sort ordering value can be Array only for Vector search");
76-
}
77-
if (innerValue.isTextual()) {
78-
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
79-
"Sort ordering value can be String only for Lexical or Vectorize search");
80-
}
81-
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
82-
"Sort ordering value should be integer `1` or `-1`; or Array (Vector); or String (Lexical or Vectorize), was: %s",
83-
JsonUtil.nodeTypeAsString(innerValue));
84-
}
85-
if (!(innerValue.intValue() == 1 || innerValue.intValue() == -1)) {
86-
throw ErrorCodeV1.INVALID_SORT_CLAUSE_VALUE.toApiException(
87-
"Sort ordering value can only be `1` for ascending or `-1` for descending (not `%s`)",
88-
innerValue);
89-
}
90-
91-
boolean ascending = innerValue.intValue() == 1;
92-
return SortExpression.sort(path, ascending);
93-
}
94-
9560
protected float[] tryDecodeBinaryVector(String path, JsonNode innerValue) {
9661
if (innerValue instanceof ObjectNode innerObject) {
9762
var ejsonWrapped = EJSONWrapper.maybeFrom(innerObject);

src/main/java/io/stargate/sgv2/jsonapi/api/model/command/builders/TableSortClauseBuilder.java

Lines changed: 133 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errFmtApiColumnDef;
44
import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errFmtCqlIdentifier;
5+
import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errFmtJoin;
56
import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errVars;
67
import static io.stargate.sgv2.jsonapi.util.JsonUtil.arrayNodeToVector;
78

@@ -16,6 +17,7 @@
1617
import io.stargate.sgv2.jsonapi.service.schema.tables.ApiColumnDef;
1718
import io.stargate.sgv2.jsonapi.service.schema.tables.ApiColumnDefContainer;
1819
import io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil;
20+
import io.stargate.sgv2.jsonapi.util.JsonUtil;
1921
import java.util.ArrayList;
2022
import java.util.List;
2123
import java.util.Map;
@@ -31,10 +33,53 @@ public TableSortClauseBuilder(TableSchemaObject table) {
3133
@Override
3234
protected SortClause buildClauseFromDefinition(ObjectNode sortNode) {
3335
// First, resolve the paths to column definitions
34-
List<SortExpressionDefinition> sortExprDefs = resolveColumns(sortNode);
35-
final List<SortExpression> sortExpressions = new ArrayList<>();
36+
var sortExprDefs = resolveColumns(sortNode);
37+
38+
// Then split into "special" (vector/vectorize, lexical) and regular expressions
39+
var lexicalExprs = new ArrayList<SortExpressionDefinition>();
40+
var vectorExprs = new ArrayList<SortExpressionDefinition>();
41+
var regularExprs = new ArrayList<SortExpressionDefinition>();
42+
3643
for (SortExpressionDefinition sortExprDef : sortExprDefs) {
37-
sortExpressions.add(buildSortExpression(sortExprDef));
44+
var column = sortExprDef.column;
45+
switch (column.type().typeName()) {
46+
case VECTOR -> vectorExprs.add(sortExprDef);
47+
case ASCII, TEXT -> {
48+
if (sortExprDef.sortValue.isTextual()) {
49+
lexicalExprs.add(sortExprDef);
50+
} else {
51+
regularExprs.add(sortExprDef);
52+
}
53+
}
54+
default -> regularExprs.add(sortExprDef);
55+
}
56+
}
57+
58+
// Lexical(s)? Must have but one expression, cannot be combined with other sorts
59+
// Ditto for vector/vectorize
60+
if (!lexicalExprs.isEmpty() || !vectorExprs.isEmpty()) {
61+
if (sortExprDefs.size() > 1) {
62+
throw SortException.Code.CANNOT_SORT_ON_SPECIAL_WITH_OTHERS.get(
63+
errVars(
64+
schema,
65+
map -> {
66+
map.put("lexicalSorts", columnsDesc(lexicalExprs));
67+
map.put("regularSorts", columnsDesc(regularExprs));
68+
map.put("vectorSorts", columnsDesc(vectorExprs));
69+
}));
70+
}
71+
72+
if (!lexicalExprs.isEmpty()) {
73+
return new SortClause(List.of(buildLexicalSortExpression(lexicalExprs.getFirst())));
74+
}
75+
76+
return new SortClause(List.of(buildVectorOrVectorizeSortExpression(vectorExprs.getFirst())));
77+
}
78+
79+
// Otherwise, we can build regular sort expression(s)
80+
final List<SortExpression> sortExpressions = new ArrayList<>();
81+
for (SortExpressionDefinition exprDef : regularExprs) {
82+
sortExpressions.add(buildRegularSortExpression(exprDef));
3883
}
3984
return new SortClause(sortExpressions);
4085
}
@@ -70,32 +115,98 @@ private List<SortExpressionDefinition> resolveColumns(ObjectNode sortNode) {
70115
return sortExprDefs;
71116
}
72117

73-
protected SortExpression buildSortExpression(SortExpressionDefinition exprDef) {
74-
final String path = exprDef.path();
75-
final JsonNode innerValue = exprDef.sortValue();
118+
protected SortExpression buildLexicalSortExpression(SortExpressionDefinition lexicalExpr) {
119+
// caller validated JsonNode is textual; for now nothing more to validate
120+
return SortExpression.tableLexicalSort(lexicalExpr.path(), lexicalExpr.sortValue.textValue());
121+
}
76122

77-
float[] vectorFloats = tryDecodeBinaryVector(path, innerValue);
123+
protected SortExpression buildVectorOrVectorizeSortExpression(
124+
SortExpressionDefinition vectorExpr) {
125+
final String path = vectorExpr.path();
126+
final JsonNode exprValue = vectorExpr.sortValue();
78127

79-
// handle table vector sort
128+
// So we know we have a Vector column; now can check if value is a binary vector or an array
129+
// of floats, or a string to vectorize.
130+
// For Vectorize, further checks are done in the TableSortClauseResolver.
131+
132+
// First: vector data either as EJSON binary or as JSON Array (of floats)
133+
float[] vectorFloats = tryDecodeBinaryVector(path, exprValue);
80134
if (vectorFloats != null) {
81135
return SortExpression.tableVectorSort(path, vectorFloats);
82136
}
83-
if (innerValue instanceof ArrayNode innerArray) {
84-
// TODO: HACK: quick support for tables, if the value is an array we will assume the
85-
// column is a vector then need to check on table pathway that the sort is correct.
86-
// NOTE: does not check if there are more than one sort expression, the
87-
// TableSortClauseResolver will take care of that so we can get proper ApiExceptions
137+
if (exprValue instanceof ArrayNode innerArray) {
88138
return SortExpression.tableVectorSort(path, arrayNodeToVector(innerArray));
89139
}
90-
if (innerValue.isTextual()) {
91-
// TODO: HACK: quick support for tables, if the value is an text we will assume the column
92-
// is a vector and the user wants to do vectorize then need to check on table pathway that
93-
// the sort is correct.
94-
// NOTE: does not check if there are more than one sort expression, the
95-
// TableSortClauseResolver will take care of that so we can get proper ApiExceptions
96-
// this is also why we do not break the look here
97-
return SortExpression.tableVectorizeSort(path, innerValue.textValue());
140+
141+
// Otherwise, check if it is a String to vectorize
142+
if (exprValue.isTextual()) {
143+
return SortExpression.tableVectorizeSort(path, exprValue.textValue());
98144
}
99-
return super.buildRegularSortExpression(path, innerValue);
145+
146+
// Otherwise, invalid (cannot be a regular sort as it is a Vector column)
147+
throw SortException.Code.INVALID_VECTOR_SORT_EXPRESSION.get(
148+
errVars(
149+
schema,
150+
map -> {
151+
map.put("jsonType", JsonUtil.nodeTypeAsString(exprValue));
152+
}));
153+
}
154+
155+
/**
156+
* Helper method to build a "non-special" sort expression for given definition; validates
157+
* expression value and builds the {@link SortExpression} object.
158+
*/
159+
private SortExpression buildRegularSortExpression(SortExpressionDefinition exprDef) {
160+
JsonNode sortValue = exprDef.sortValue();
161+
162+
// First: valid cases
163+
if (sortValue.isInt()) {
164+
// Yes, we have an integer value. But is it valid?
165+
if (sortValue.intValue() == 1) {
166+
return SortExpression.sort(exprDef.path(), true);
167+
}
168+
if (sortValue.intValue() == -1) {
169+
return SortExpression.sort(exprDef.path(), false);
170+
}
171+
} else if (sortValue.isArray()) {
172+
// Special checking for ArrayNode and String to give less confusing error messages
173+
174+
throw SortException.Code.CANNOT_VECTOR_SORT_NON_VECTOR_COLUMNS.get(
175+
errVars(
176+
schema,
177+
map -> {
178+
map.put(
179+
"vectorColumns",
180+
errFmtApiColumnDef(
181+
schema.apiTableDef().allColumns().filterVectorColumnsToList()));
182+
map.put("sortColumns", exprDef.path());
183+
}));
184+
} else if (sortValue.isTextual()) {
185+
// We only end up here for non-TEXT/ASCII/VECTOR columns (other cases are handled
186+
// by caller and further validated later on)
187+
throw SortException.Code.CANNOT_VECTORIZE_SORT_NON_VECTOR_COLUMN.get(
188+
errVars(
189+
schema,
190+
map -> {
191+
map.put(
192+
"vectorColumns",
193+
errFmtApiColumnDef(
194+
schema.apiTableDef().allColumns().filterVectorColumnsToList()));
195+
map.put("sortColumns", exprDef.path());
196+
}));
197+
}
198+
199+
// Otherwise general failure message wrt use of 1 or -1 for Regular sort expression
200+
throw SortException.Code.INVALID_REGULAR_SORT_EXPRESSION.get(
201+
errVars(
202+
schema,
203+
map -> {
204+
map.put("jsonExpr", sortValue.toString());
205+
map.put("jsonType", JsonUtil.nodeTypeAsString(sortValue));
206+
}));
207+
}
208+
209+
private String columnsDesc(List<SortExpressionDefinition> sortExprDefs) {
210+
return errFmtJoin(sortExprDefs.stream().map(SortExpressionDefinition::path).toList());
100211
}
101212
}

src/main/java/io/stargate/sgv2/jsonapi/exception/SortException.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ public SortException(ErrorInstance errorInstance) {
1616
public enum Code implements ErrorCode<SortException> {
1717
CANNOT_SORT_ON_MULTIPLE_VECTORIZE,
1818
CANNOT_SORT_ON_MULTIPLE_VECTORS,
19+
CANNOT_SORT_ON_SPECIAL_WITH_OTHERS,
1920
CANNOT_SORT_UNKNOWN_COLUMNS,
2021
CANNOT_SORT_VECTOR_AND_NON_VECTOR_COLUMNS,
2122
CANNOT_VECTORIZE_SORT_NON_VECTOR_COLUMN,
@@ -24,6 +25,8 @@ public enum Code implements ErrorCode<SortException> {
2425
CANNOT_VECTOR_SORT_NON_VECTOR_COLUMNS,
2526
CANNOT_VECTOR_SORT_WITH_LIMIT_EXCEEDS_MAX,
2627
CANNOT_VECTOR_SORT_WITH_SKIP_OPTION,
28+
INVALID_REGULAR_SORT_EXPRESSION,
29+
INVALID_VECTOR_SORT_EXPRESSION,
2730
OVERLOADED_SORT_ROW_LIMIT,
2831
UNSUPPORTED_PAGINATION_WITH_IN_MEMORY_SORTING,
2932
UNSUPPORTED_VECTOR_SORT_FOR_COLLECTION,

src/main/resources/errors.yaml

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,17 @@ snippets:
7676
body: |-
7777
Resend the command using only defined columns.
7878
79+
- name: REGULAR_SORT_EXPLANATION
80+
body: |-
81+
A regular sort expression in the sort clause identifies the column by name and then provides either:
82+
- `1` for ascending
83+
- `-1` for descending
84+
7985
- name: VECTOR_SORT_EXPLANATION
8086
body: |-
8187
A vector sort in the sort clause identifies the vector column by name and then provides either:
8288
- The vector as an array of decimal numbers.
83-
- The vector as a base64 encoded `{"$binary": "base64-encoded-vector"}` object.
89+
- The vector as a base64-encoded `{"$binary": "base64-encoded-vector"}` object.
8490
- A string to be vectorized if enabled for the column.
8591
8692
- name: CURRENTLY_UNSUPPORTED
@@ -1347,6 +1353,19 @@ request-errors:
13471353
13481354
Resend the command with only one vectorize sort.
13491355
1356+
- scope: SORT
1357+
code: CANNOT_SORT_ON_SPECIAL_WITH_OTHERS
1358+
title: Special sort combined with other sort expressions
1359+
body: |-
1360+
The command used a sort clause with a special (lexical/vector/vectorize) sort combined with one or more other sort expressions:
1361+
Special sorts can only be used on their own, and cannot be combined with other sort expressions.
1362+
1363+
The command attempted to use lexical sort on columns: ${lexicalSorts}.
1364+
The command attempted to use vector/vectorize sort on columns: ${vectorSorts}.
1365+
The command attempted to use regular sort on columns: ${regularSorts}.
1366+
1367+
Resend the command with only one special sort expression.
1368+
13501369
- scope: SORT
13511370
code: CANNOT_SORT_UNKNOWN_COLUMNS
13521371
title: Sorted columns are not defined in the table schema
@@ -1372,12 +1391,12 @@ request-errors:
13721391
code: CANNOT_VECTOR_SORT_NON_VECTOR_COLUMNS
13731392
title: Vector sort columns are not `vector` type
13741393
body: |-
1375-
The command attempted to vectorize sort on a column that is not of `vector` type.
1394+
The command attempted to vector sort on a column that is not of `vector` type.
13761395
13771396
${SNIPPET.VECTOR_SORT_EXPLANATION}
13781397
1379-
The table ${keyspace}.${table} defines the columns: ${vectorColumns}.
1380-
The command attempted to sort the non vector columns: ${sortColumns}.
1398+
The table ${keyspace}.${table} defines vector columns: ${vectorColumns}.
1399+
The command attempted to sort the non-vector columns: ${sortColumns}.
13811400
13821401
Resend the command using only `vector` columns.
13831402
@@ -1460,6 +1479,26 @@ request-errors:
14601479
14611480
Resend the command using either vector or non-vector sorting.
14621481
1482+
- scope: SORT
1483+
code: INVALID_REGULAR_SORT_EXPRESSION
1484+
title: Sort expression is not valid for regular sort
1485+
body: |-
1486+
The command attempted to use unsupported JSON expression `${jsonExpr}` (type ${jsonType}) for sort clause.
1487+
1488+
${SNIPPET.REGULAR_SORT_EXPLANATION}
1489+
1490+
Resend the command with only valid sort expressions.
1491+
1492+
- scope: SORT
1493+
code: INVALID_VECTOR_SORT_EXPRESSION
1494+
title: Sort expression is not valid for vector/vectorize sort
1495+
body: |-
1496+
The command attempted to use unsupported JSON type (${jsonType}) for vector or vectorize sort.
1497+
1498+
${SNIPPET.VECTOR_SORT_EXPLANATION}
1499+
1500+
Resend the command with valid vector/vectorize sort expression.
1501+
14631502
- scope: SORT
14641503
code: UNSUPPORTED_SORT_FOR_TABLE_DELETE_COMMAND
14651504
title: Sorting not supported by delete command on Tables

0 commit comments

Comments
 (0)