From 48edfb9ee7baed85676a035a46eb7e157b17c540 Mon Sep 17 00:00:00 2001 From: Simon Meierhans Date: Fri, 3 Jul 2026 02:57:03 -0700 Subject: [PATCH] Add temporal support to graph schema. PiperOrigin-RevId: 942044600 --- dgf/src/data/BUILD | 1 + dgf/src/data/schema.cc | 10 +- dgf/src/data/schema.h | 2 + dgf/src/data/schema.py | 8 + dgf/src/data/schema_cc_test.py | 96 ++++++++ dgf/src/data/schema_nb.cc | 16 ++ dgf/src/io/hgraph_in_beam.py | 1 - dgf/src/validate/BUILD | 1 + dgf/src/validate/in_memory_graph.py | 95 ++++++++ dgf/src/validate/in_memory_graph_test.py | 269 ++++++++++++++++++++++- 10 files changed, 485 insertions(+), 14 deletions(-) diff --git a/dgf/src/data/BUILD b/dgf/src/data/BUILD index f357c3f..053bfe7 100644 --- a/dgf/src/data/BUILD +++ b/dgf/src/data/BUILD @@ -294,6 +294,7 @@ py_test( name = "schema_cc_test", srcs = ["schema_cc_test.py"], deps = [ + ":schema", ":schema_ext", # absl/testing:absltest dep, "//dgf/src/util:gen_test_graph", diff --git a/dgf/src/data/schema.cc b/dgf/src/data/schema.cc index b3e13d3..34622c1 100644 --- a/dgf/src/data/schema.cc +++ b/dgf/src/data/schema.cc @@ -41,9 +41,13 @@ std::string GraphSchema::Feature::to_string(int indent) const { auto shape_formatter = [](std::string* out, int dim) { absl::StrAppend(out, dim == -1 ? "None" : std::to_string(dim)); }; - return absl::StrCat(prefix, "Feature(name='", name, "', shape=[", - absl::StrJoin(shape, ", ", shape_formatter), - "], format=", FormatToString(format), ")"); + return absl::StrCat( + prefix, "Feature(name='", name, "', shape=[", + absl::StrJoin(shape, ", ", shape_formatter), + "], format=", FormatToString(format), + is_timeseries ? ", is_timeseries=true" : "", + timestamps.empty() ? "" : absl::StrCat(", timestamps='", timestamps, "'"), + ")"); } std::string GraphSchema::Nodeset::to_string(int indent) const { diff --git a/dgf/src/data/schema.h b/dgf/src/data/schema.h index 04efa70..4c05c35 100644 --- a/dgf/src/data/schema.h +++ b/dgf/src/data/schema.h @@ -28,6 +28,8 @@ struct GraphSchema { // Shape of the feature. -1 (in cc) is equivalent to None (in python). std::vector shape; eFormat format; + bool is_timeseries = false; + std::string timestamps; std::string to_string(int indent) const; diff --git a/dgf/src/data/schema.py b/dgf/src/data/schema.py index 8c60b89..b30f6a5 100644 --- a/dgf/src/data/schema.py +++ b/dgf/src/data/schema.py @@ -90,6 +90,12 @@ class FeatureSchema: is_utf8_string: Whether the feature is a UTF-8 string. This is only relevant when feature_format is BYTES, to distinguish between Spanner STRING (True) and Spanner BYTES (False). + is_timeseries: Whether the feature represents a temporal series / sequence. + timestamps: For temporal sequence features, the name of the feature + containing the corresponding timestamp sequence (e.g., "time"). The + length of the corresponding timestamps feature must equal the length of + the timeseries feature along the 0th dimension. Cannot be set for non + timeseries features. """ format: FeatureFormat @@ -97,6 +103,8 @@ class FeatureSchema: shape: Shape = None num_categorical_values: Optional[int] = None is_utf8_string: Optional[bool] = False + is_timeseries: Optional[bool] = False + timestamps: Optional[str] = None def is_static_shape(self) -> bool: """Returns true if the feature has a fully static shape.""" diff --git a/dgf/src/data/schema_cc_test.py b/dgf/src/data/schema_cc_test.py index 48dab6a..064b1ad 100644 --- a/dgf/src/data/schema_cc_test.py +++ b/dgf/src/data/schema_cc_test.py @@ -13,6 +13,7 @@ # limitations under the License. from absl.testing import absltest +from dgf.src.data import schema as schema_lib from dgf.src.data import schema_ext as lib from dgf.src.util import gen_test_graph @@ -42,6 +43,101 @@ def test_parse_schema(self): ])""", ) + def test_parse_temporal_schema(self): + schema = schema_lib.GraphSchema( + node_sets={ + "n1": schema_lib.NodeSchema( + features={ + "#id": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.BYTES, + semantic=schema_lib.FeatureSemantic.PRIMARY_ID, + ), + "#creation_time": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + ), + "time": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + shape=(None,), + is_timeseries=True, + ), + "f1_seq": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(None,), + is_timeseries=True, + timestamps="time", + ), + "f2_seq": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.EMBEDDING, + shape=(None, 4), + is_timeseries=True, + timestamps="time", + ), + } + ), + "n2": schema_lib.NodeSchema( + features={ + "#id": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.PRIMARY_ID, + ), + "sensor_ts": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.TIMESERIES, + shape=(20, 8), + is_timeseries=True, + ), + } + ), + }, + edge_sets={ + "e1": schema_lib.EdgeSchema( + source="n1", + target="n2", + features={ + "edge_time": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + shape=(None,), + is_timeseries=True, + ), + "edge_val": schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_32, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(None,), + is_timeseries=True, + timestamps="edge_time", + ), + }, + ) + }, + ) + self.assertEqual( + lib.ParseAndDebugPrintSchema(schema), + """\ +GraphSchema(nodesets=[ + Nodeset(name='n1', features=[ + Feature(name='#creation_time', shape=[], format=INTEGER_64), + Feature(name='#id', shape=[], format=BYTES), + Feature(name='f1_seq', shape=[None], format=FLOAT_32, is_timeseries=true, timestamps='time'), + Feature(name='f2_seq', shape=[None, 4], format=FLOAT_32, is_timeseries=true, timestamps='time'), + Feature(name='time', shape=[None], format=INTEGER_64, is_timeseries=true) + ]), + Nodeset(name='n2', features=[ + Feature(name='#id', shape=[], format=INTEGER_64), + Feature(name='sensor_ts', shape=[20, 8], format=FLOAT_32, is_timeseries=true) + ]) +], edgesets=[ + Edgeset(name='e1', source_nodeset=0, target_nodeset=1, features=[ + Feature(name='edge_time', shape=[None], format=INTEGER_64, is_timeseries=true), + Feature(name='edge_val', shape=[None], format=INTEGER_32, is_timeseries=true, timestamps='edge_time') + ]) +])""", + ) + if __name__ == "__main__": absltest.main() diff --git a/dgf/src/data/schema_nb.cc b/dgf/src/data/schema_nb.cc index 7b797a4..633db34 100644 --- a/dgf/src/data/schema_nb.cc +++ b/dgf/src/data/schema_nb.cc @@ -62,6 +62,22 @@ absl::StatusOr ParseFeatureSchema( return absl::InvalidArgumentError( absl::StrCat("Invalid shape type for feature '", feature_name, "'")); } + + if (nb::hasattr(py_feature_schema, "is_timeseries")) { + nb::object py_is_timeseries = py_feature_schema.attr("is_timeseries"); + if (!py_is_timeseries.is_none() && + nb::isinstance(py_is_timeseries)) { + feature.is_timeseries = nb::cast(py_is_timeseries); + } + } + + if (nb::hasattr(py_feature_schema, "timestamps")) { + nb::object py_timestamps = py_feature_schema.attr("timestamps"); + if (!py_timestamps.is_none() && nb::isinstance(py_timestamps)) { + feature.timestamps = nb::cast(py_timestamps); + } + } + return feature; } diff --git a/dgf/src/io/hgraph_in_beam.py b/dgf/src/io/hgraph_in_beam.py index 81f8326..5e741cd 100644 --- a/dgf/src/io/hgraph_in_beam.py +++ b/dgf/src/io/hgraph_in_beam.py @@ -83,7 +83,6 @@ def read_graphai_hgraph( node_id_column=node_id_column, edge_id_column=edge_id_column, override_schema=override_schema, - research_node_format=research_node_format, remove_dangling_edges=remove_dangling_edges, ) diff --git a/dgf/src/validate/BUILD b/dgf/src/validate/BUILD index bc8a283..8385316 100644 --- a/dgf/src/validate/BUILD +++ b/dgf/src/validate/BUILD @@ -54,5 +54,6 @@ py_test( "//dgf/src/data:schema", "//dgf/src/util:gen_test_graph", "//dgf/src/util:test_util", + # numpy dep, ], ) diff --git a/dgf/src/validate/in_memory_graph.py b/dgf/src/validate/in_memory_graph.py index 7c23a9d..dbadcb7 100644 --- a/dgf/src/validate/in_memory_graph.py +++ b/dgf/src/validate/in_memory_graph.py @@ -122,6 +122,101 @@ def feature_set_issues( ) ) + if feature_schema.timestamps is not None: + if not feature_schema.is_timeseries: + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} has timestamps set" + f" to {feature_schema.timestamps!r}, but is_timeseries is" + " False." + ) + ) + ts_name = feature_schema.timestamps + if ts_name not in featureset_schema: + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} references" + f" timestamps feature {ts_name!r} which is not defined in the" + " schema." + ) + ) + else: + ts_schema = featureset_schema[ts_name] + if ( + not ts_schema.is_timeseries + or ts_schema.semantic != schema_lib.FeatureSemantic.TIMESTAMP + ): + if not ts_schema.is_timeseries: + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} references" + f" timestamps feature {ts_name!r}, but {ts_name!r} does not" + " have is_timeseries=True." + ) + ) + if ts_schema.semantic != schema_lib.FeatureSemantic.TIMESTAMP: + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} references" + f" timestamps feature {ts_name!r}, but {ts_name!r} does not" + " have semantic=TIMESTAMP." + ) + ) + continue + ts_shape = ts_schema.shape or () + feat_shape = feature_schema.shape or () + if len(ts_shape) != 1: + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} references" + f" timestamps feature {ts_name!r}, but {ts_name!r} must have" + " exactly 1 sequence dimension in schema shape." + ) + ) + if len(feat_shape) < 1: + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} references" + f" timestamps feature {ts_name!r}, but {feature_name!r} must" + " have at least 1 sequence dimension in schema shape." + ) + ) + if ( + len(ts_shape) == 1 + and len(feat_shape) >= 1 + and ts_shape[0] != feat_shape[0] + ): + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} has schema shape" + f" {feat_shape} whose 0th dimension ({feat_shape[0]}) does not" + f" match timestamps feature {ts_name!r} schema shape 0th" + f" dimension ({ts_shape[0]})." + ) + ) + if feature_name in featureset_data and ts_name in featureset_data: + if feat_shape and feat_shape[0] is None: + feature_data = featureset_data[feature_name] + ts_data = featureset_data[ts_name] + if len(feature_data) == len(ts_data): + for i in range(len(feature_data)): + f_val = feature_data[i] + t_val = ts_data[i] + if f_val is not None and t_val is not None: + f_len = len(f_val) if hasattr(f_val, "__len__") else 1 + t_len = len(t_val) if hasattr(t_val, "__len__") else 1 + if f_len != t_len: + items.append( + Issue.error( + f"The feature {feature_name!r} in {source} has a" + f" variable-length timeseries at index {i} of" + f" length {f_len}, which does not match the" + f" timestamps sequence {ts_name!r} of length" + f" {t_len}." + ) + ) + break + return items diff --git a/dgf/src/validate/in_memory_graph_test.py b/dgf/src/validate/in_memory_graph_test.py index 6f73035..7dc1809 100644 --- a/dgf/src/validate/in_memory_graph_test.py +++ b/dgf/src/validate/in_memory_graph_test.py @@ -20,13 +20,14 @@ from dgf.src.util import test_util from dgf.src.validate import in_memory_graph as in_memory_graph_validate_lib from dgf.src.validate import validate as validate_lib +import numpy as np Issue = validate_lib.Issue test_util.disable_diff_truncation() -def good_graph() -> ( +def valid_graph() -> ( Tuple[in_memory_graph_lib.InMemoryGraph, schema_lib.GraphSchema] ): graph = gen_test_graph.generate_in_memory_graph( @@ -43,8 +44,8 @@ def good_graph() -> ( class InMemoryGraphTest(absltest.TestCase): - def test_good(self): - graph, schema = good_graph() + def test_valid(self): + graph, schema = valid_graph() issues = in_memory_graph_validate_lib.issues(graph, schema) self.assertEqual(issues, []) @@ -76,7 +77,7 @@ def test_warning(self): ) def test_missing_nodeset(self): - graph, schema = good_graph() + graph, schema = valid_graph() del graph.node_sets["n1"] issues = in_memory_graph_validate_lib.issues(graph, schema) self.assertEqual( @@ -84,7 +85,7 @@ def test_missing_nodeset(self): ) def test_missing_feature(self): - graph, schema = good_graph() + graph, schema = valid_graph() del graph.node_sets["n1"].features["f1"] issues = in_memory_graph_validate_lib.issues(graph, schema) self.assertEqual( @@ -92,7 +93,7 @@ def test_missing_feature(self): ) def test_wrong_feature_type(self): - graph, schema = good_graph() + graph, schema = valid_graph() schema.node_sets["n1"].features[ "f1" ].format = schema_lib.FeatureFormat.INTEGER_32 @@ -110,7 +111,7 @@ def test_wrong_feature_type(self): ) def test_wrong_shape(self): - graph, schema = good_graph() + graph, schema = valid_graph() schema.node_sets["n1"].features["f1"].shape = (2, 3, 4) issues = in_memory_graph_validate_lib.issues(graph, schema) self.assertEqual( @@ -125,7 +126,7 @@ def test_wrong_shape(self): ) def test_non_existing_source(self): - graph, schema = good_graph() + graph, schema = valid_graph() schema.edge_sets["e1"].source = "non_existing" issues = in_memory_graph_validate_lib.issues(graph, schema) self.assertEqual( @@ -139,7 +140,7 @@ def test_non_existing_source(self): ) def test_non_existing_target(self): - graph, schema = good_graph() + graph, schema = valid_graph() schema.edge_sets["e1"].target = "non_existing" issues = in_memory_graph_validate_lib.issues(graph, schema) self.assertEqual( @@ -153,7 +154,7 @@ def test_non_existing_target(self): ) def test_out_bound_source(self): - graph, schema = good_graph() + graph, schema = valid_graph() graph.edge_sets["e1"].adjacency[:] += 1 issues = in_memory_graph_validate_lib.issues(graph, schema) self.assertEqual( @@ -167,6 +168,254 @@ def test_out_bound_source(self): ], ) + def test_timestamps_missing_reference(self): + graph, schema = valid_graph() + schema.node_sets["n1"].features["f1"].is_timeseries = True + schema.node_sets["n1"].features["f1"].timestamps = "missing_time" + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + "The feature 'f1' in nodeset 'n1' references timestamps feature" + " 'missing_time' which is not defined in the schema." + ) + ], + ) + + def test_timestamps_not_timeseries(self): + graph, schema = valid_graph() + schema.node_sets["n1"].features["f1"].is_timeseries = False + schema.node_sets["n1"].features["f1"].timestamps = "time" + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + "The feature 'f1' in nodeset 'n1' has timestamps set to 'time'," + " but is_timeseries is False." + ), + Issue.error( + "The feature 'f1' in nodeset 'n1' references timestamps feature" + " 'time' which is not defined in the schema." + ), + ], + ) + + def test_timeseries_length_mismatch(self): + graph, schema = valid_graph() + num_nodes = graph.node_sets["n1"].num_nodes + schema.node_sets["n1"].features["time"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + shape=(None,), + is_timeseries=True, + ) + schema.node_sets["n1"].features["val"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(None,), + is_timeseries=True, + timestamps="time", + ) + graph.node_sets["n1"].features["time"] = np.array( + [[10, 20]] + [[10]] * (num_nodes - 1), dtype=object + ) + graph.node_sets["n1"].features["val"] = np.array( + [[1.5]] + [[2.5]] * (num_nodes - 1), dtype=object + ) + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + "The feature 'val' in nodeset 'n1' has a variable-length" + " timeseries at index 0 of length 1, which does not match the" + " timestamps sequence 'time' of length 2." + ) + ], + ) + + def test_valid_temporal_graph(self): + graph, schema = valid_graph() + num_nodes = graph.node_sets["n1"].num_nodes + schema.node_sets["n1"].features["time"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + shape=(None,), + is_timeseries=True, + ) + schema.node_sets["n1"].features["val"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(None,), + is_timeseries=True, + timestamps="time", + ) + graph.node_sets["n1"].features["time"] = np.array( + [[10, 20]] + [[30]] * (num_nodes - 1), dtype=object + ) + graph.node_sets["n1"].features["val"] = np.array( + [[1.5, 2.5]] + [[3.5]] * (num_nodes - 1), dtype=object + ) + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual(issues, []) + + def test_timestamps_target_not_timeseries(self): + graph, schema = valid_graph() + num_nodes = graph.node_sets["n1"].num_nodes + schema.node_sets["n1"].features["time"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + is_timeseries=False, + ) + graph.node_sets["n1"].features["time"] = np.zeros( + num_nodes, dtype=np.int64 + ) + schema.node_sets["n1"].features["f1"].is_timeseries = True + schema.node_sets["n1"].features["f1"].timestamps = "time" + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + "The feature 'f1' in nodeset 'n1' references timestamps feature" + " 'time', but 'time' does not have is_timeseries=True." + ) + ], + ) + + def test_timestamps_target_wrong_semantic(self): + graph, schema = valid_graph() + num_nodes = graph.node_sets["n1"].num_nodes + schema.node_sets["n1"].features["time"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(None,), + is_timeseries=True, + ) + graph.node_sets["n1"].features["time"] = np.array( + [[10]] * num_nodes, dtype=object + ) + schema.node_sets["n1"].features["f1"].is_timeseries = True + schema.node_sets["n1"].features["f1"].timestamps = "time" + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + "The feature 'f1' in nodeset 'n1' references timestamps feature" + " 'time', but 'time' does not have semantic=TIMESTAMP." + ) + ], + ) + + def test_timeseries_ndarray_length_mismatch(self): + graph, schema = valid_graph() + num_nodes = graph.node_sets["n1"].num_nodes + schema.node_sets["n1"].features["time"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + shape=(5,), + is_timeseries=True, + ) + schema.node_sets["n1"].features["val"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(3,), + is_timeseries=True, + timestamps="time", + ) + graph.node_sets["n1"].features["time"] = np.zeros( + (num_nodes, 5), dtype=np.int64 + ) + graph.node_sets["n1"].features["val"] = np.zeros( + (num_nodes, 3), dtype=np.float32 + ) + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + "The feature 'val' in nodeset 'n1' has schema shape (3,) whose" + " 0th dimension (3) does not match timestamps feature 'time'" + " schema shape 0th dimension (5)." + ) + ], + ) + + def test_timestamps_schema_shape_incompatible(self): + graph, schema = valid_graph() + num_nodes = graph.node_sets["n1"].num_nodes + schema.node_sets["n1"].features["time"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + shape=(10, 2), + is_timeseries=True, + ) + schema.node_sets["n1"].features["val"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(), + is_timeseries=True, + timestamps="time", + ) + graph.node_sets["n1"].features["time"] = np.zeros( + (num_nodes, 10, 2), dtype=np.int64 + ) + graph.node_sets["n1"].features["val"] = np.zeros( + num_nodes, dtype=np.float32 + ) + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + "The feature 'val' in nodeset 'n1' references timestamps" + " feature 'time', but 'time' must have exactly 1 sequence" + " dimension in schema shape." + ), + Issue.error( + "The feature 'val' in nodeset 'n1' references timestamps" + " feature 'time', but 'val' must have at least 1 sequence" + " dimension in schema shape." + ), + ], + ) + + def test_static_timeseries_data_length_mismatch(self): + graph, schema = valid_graph() + num_nodes = graph.node_sets["n1"].num_nodes + schema.node_sets["n1"].features["time"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.INTEGER_64, + semantic=schema_lib.FeatureSemantic.TIMESTAMP, + shape=(5,), + is_timeseries=True, + ) + schema.node_sets["n1"].features["val"] = schema_lib.FeatureSchema( + format=schema_lib.FeatureFormat.FLOAT_32, + semantic=schema_lib.FeatureSemantic.NUMERICAL, + shape=(5,), + is_timeseries=True, + timestamps="time", + ) + graph.node_sets["n1"].features["time"] = np.zeros( + (num_nodes, 5), dtype=np.int64 + ) + graph.node_sets["n1"].features["val"] = np.zeros( + (num_nodes, 3), dtype=np.float32 + ) + issues = in_memory_graph_validate_lib.issues(graph, schema) + self.assertEqual( + issues, + [ + Issue.error( + f"The feature 'val' in nodeset 'n1' has shape ({num_nodes}, 3)," + " but the schema expects dimension 0 to be 5." + ) + ], + ) + if __name__ == "__main__": absltest.main()