Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,95 @@ jobs:
--metadata_type=${{ matrix.metadata_type }} \
--table_type=${{ matrix.table_type }} \
--profile=${{ matrix.profile }}

rest-tests:
needs: build-tea
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install runtime dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
libevent-2.1-7 \
libipc-run-perl \
libxerces-c3.2 \
libxml2 \
python2 \
software-properties-common

wget -qO- https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
sudo add-apt-repository -y "deb http://apt.postgresql.org/pub/repos/apt jammy-pgdg main"
DEBIAN_FRONTEND=noninteractive sudo apt-get install -y postgresql-16 postgresql-client-16
sudo pg_dropcluster 14 main

sudo ln -s -f python2 /usr/bin/python
sudo locale-gen "en_US.UTF-8"

- name: Download runtime artifacts
uses: actions/download-artifact@v4
with:
name: tea-runtime
path: ci-artifacts

- name: Restore runtime files
run: |
mkdir -p "$HOME/local"
tar -xzf ci-artifacts/gpdb-with-tea.tar.gz -C "$HOME/local"

- name: Initialize Greenplum cluster
run: |
: # TODO(gmusya): consider using make create-demo-cluster
sudo locale-gen "ru_RU.CP1251"
sudo mkdir -p /gpdata
sudo chown $USER /gpdata

source $HOME/local/gpdb/greenplum_path.sh
export MASTER_DATA_DIRECTORY=/gpdata/master/gpsne-1
NUM_SEGS=2 bash test/start-gp.sh $HOME/local/gpdb /gpdata

- name: Start Minio and upload test data
run: |
wget -q https://dl.min.io/server/minio/release/linux-amd64/minio -O /tmp/minio
wget -q https://dl.min.io/client/mc/release/linux-amd64/mc -O /tmp/mc
chmod +x /tmp/minio /tmp/mc

export CI_PROJECT_DIR=$PWD

MINIO_EXECUTABLE=/tmp/minio MC_EXECUTABLE=/tmp/mc \
MINIO_DATA_DIR=/tmp/minio-data \
bash test/iceberg/gen/init_minio.sh

- name: Deploy tea config
run: |
source $HOME/local/gpdb/greenplum_path.sh
mkdir -p $GPHOME/tea
cp test/config/tea-config-rest.json $GPHOME/tea/tea-config.json
cp test/config/tea-config-schema.json $GPHOME/tea/tea-config-schema.json

- name: Start Lakekeeper
run: |
sudo pg_createcluster -p 5433 16 main
sudo sed -i 's/scram-sha-256/trust/g' /etc/postgresql/16/main/pg_hba.conf
sudo /etc/init.d/postgresql start

wget -q https://github.com/lakekeeper/lakekeeper/releases/download/v0.12.1/lakekeeper-x86_64-unknown-linux-gnu.tar.gz -O - | tar -xzf -
export LAKEKEEPER__PG_DATABASE_URL_READ="postgres://postgres@localhost:5433/postgres"
export LAKEKEEPER__PG_DATABASE_URL_WRITE="postgres://postgres@localhost:5433/postgres"
export LAKEKEEPER__METRICS__PORT=0
./lakekeeper migrate
psql -h localhost -p 5433 -U postgres -f test/iceberg/gen/setup_lakekeeper.sql postgres
./lakekeeper serve &
sleep 5

- name: Check REST catalog support
run: |
source $HOME/local/gpdb/greenplum_path.sh
export PGDATABASE=tea_ci
psql -c 'CREATE EXTENSION tea'
psql -c "CREATE FOREIGN TABLE rest_test (a bigint, b bigint) server tea_server options(location 'tea://iceberg://gperov.test')"
OUT=`psql -Atc "SELECT count(*), sum(a), sum(b) FROM rest_test"`
echo $OUT
[[ "$OUT" = "9999|49992899|99985798" ]] || exit 1
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ option(TEA_BUILD_FDW "Build Foreign Data Wrapper" ON)
option(HAS_ARROW_CSV "Arrow built with csv" ON)
option(ICECXX_GENERATOR "" ON)
option(USE_NESSIE "Enable Nessie catalog" ON)
option(USE_REST "Enable REST catalog" ON)
option(TEA_USE_THREAD_SANITIZER "Enable running tests with ThreadSanitizer" ON)

cmake_minimum_required(VERSION 3.25)
Expand Down Expand Up @@ -59,6 +60,10 @@ if(USE_NESSIE)
add_compile_definitions(USE_NESSIE)
endif()

if(USE_REST)
add_compile_definitions(USE_REST)
endif()

enable_testing()
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND})

Expand Down
13 changes: 12 additions & 1 deletion tea/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,14 @@ bool Get(const rapidjson::Value* doc, std::string_view section_prefix, std::stri
*out = CatalogConfig::CatalogType::kNessie;
return true;
}
return false;
#if USE_REST
if (str == "rest") {
*out = CatalogConfig::CatalogType::kREST;
return true;
}
#endif

throw std::runtime_error("Catalog type '" + str + "' is not supported");
}

bool Get(const rapidjson::Value* doc, std::string_view section_prefix, std::string_view section, const std::string& key,
Expand Down Expand Up @@ -377,6 +384,10 @@ arrow::Status ReadValues(Source* src, Config* config, std::string_view section_p
Get(src, section_prefix, "catalog", "type", &config->catalog.type);
GetEndpoints(src, section_prefix, "catalog", "hms", &config->catalog.hms_endpoints);
GetEndpoints(src, section_prefix, "catalog", "nessie", &config->catalog.nessie_endpoints);
#if USE_REST
Get(src, section_prefix, "catalog", "rest_url", &config->catalog.rest_url);
Get(src, section_prefix, "catalog", "rest_warehouse_id", &config->catalog.rest_warehouse_id);
#endif

GetEndpoints(src, section_prefix, "hms", "hms", &config->hms_catalog.hms_endpoints);

Expand Down
7 changes: 7 additions & 0 deletions tea/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,17 @@ struct CatalogConfig {
enum class CatalogType {
kNessie,
kHMS,
#if USE_REST
kREST,
#endif
} type = CatalogType::kHMS;

std::vector<Endpoint> hms_endpoints;
std::vector<Endpoint> nessie_endpoints;
#if USE_REST
std::string rest_url;
std::string rest_warehouse_id;
#endif

bool operator==(const CatalogConfig&) const = default;
};
Expand Down
2 changes: 1 addition & 1 deletion tea/gpext/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ target_link_libraries(
tea PRIVATE reader tea_log teapot_file_filter Arrow::arrow_static
Parquet::parquet_static teapot_grpc_proto gp_filter_convert)

if (USE_NESSIE)
if (USE_NESSIE OR USE_REST)
target_link_libraries(tea PRIVATE cpr)
endif()

Expand Down
14 changes: 14 additions & 0 deletions tea/metadata/access_iceberg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "iceberg/schema.h"
#include "iceberg/tea_hive_catalog.h"
#include "iceberg/tea_nessie_catalog.h"
#include "iceberg/tea_rest_catalog.h"
#include "iceberg/tea_scan.h"

#include "tea/common/cancel.h"
Expand Down Expand Up @@ -73,6 +74,19 @@ std::shared_ptr<iceberg::ice_tea::RemoteCatalog> GetCatalog(const Config& config
#endif
throw std::runtime_error("No correct Nessie endpoints for iceberg catalog were provided");
}
#if USE_REST
case CatalogConfig::CatalogType::kREST: {
if (config.catalog.rest_url.empty()) {
throw std::runtime_error("REST URL for iceberg catalog is not provided");
}

if (config.catalog.rest_warehouse_id.empty()) {
throw std::runtime_error("Warehouse id for iceberg catalog is not provided");
}

return std::make_shared<iceberg::ice_tea::RESTCatalog>(config.catalog.rest_url, config.catalog.rest_warehouse_id);
}
#endif
}
throw std::runtime_error("No any correct endpoint for iceberg catalog were provided");
}
Expand Down
15 changes: 15 additions & 0 deletions test/config/tea-config-rest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"common": {
"s3": {
"access_key": "minioadmin",
"secret_key": "minioadmin",
"endpoint_override": "127.0.0.1:9000",
"scheme": "http"
},
"catalog": {
"type": "rest",
"rest_url": "http://127.0.0.1:8181/catalog",
"rest_warehouse_id": "b498836e-6ecd-11f1-9c23-533b70a81474"
}
}
}
6 changes: 4 additions & 2 deletions test/config/tea-config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
"catalog": {
"type": "object",
"properties": {
"type": { "type": "string", "enum": ["hms", "nessie"] },
"type": { "type": "string", "enum": ["hms", "nessie", "rest"] },
"hms": { "type": "string" },
"nessie": { "type": "string" }
"nessie": { "type": "string" },
"rest_url": { "type": "string" },
"rest_warehouse_id": { "type": "string" }
},
"required": ["type", "hms", "nessie"]
},
Expand Down
13 changes: 13 additions & 0 deletions test/iceberg/gen/setup_lakekeeper.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
insert into project(project_name, project_id) values ('Default Project', '00000000-0000-0000-0000-000000000000');
insert into warehouse(warehouse_id, warehouse_name, storage_profile, status, tabular_delete_mode, project_id) values ('b498836e-6ecd-11f1-9c23-533b70a81474', 'wh1', '{"type": "s3", "bucket": "warehouse", "flavor": "s3-compat", "region": "local", "endpoint": "http://127.0.0.1:9000/", "key-prefix": null, "sts-enabled": false, "sts-endpoint": null, "sts-role-arn": null, "storage-layout": null, "assume-role-arn": null, "aws-kms-key-arn": null, "sts-session-tags": {}, "path-style-access": null, "legacy-md5-behavior": null, "remote-signing-enabled": true, "push-s3-delete-disabled": true, "remote-signing-url-style": "auto", "sts-token-validity-seconds": 3600, "allow-alternative-protocols": true}', 'active', 'hard', '00000000-0000-0000-0000-000000000000');
insert into namespace(namespace_id, warehouse_id, namespace_name, namespace_properties) values ('019ef331-4f46-7c02-a050-c2c81b9b6888', 'b498836e-6ecd-11f1-9c23-533b70a81474', '{gperov}', '{"location": "s3://warehouse/019ef331-4f46-7c02-a050-c2c81b9b6888"}');
insert into tabular(tabular_id, namespace_id, name, typ, metadata_location, fs_protocol, fs_location, warehouse_id, tabular_namespace_name) values ('4412d001-c6df-4adb-8854-d3b9e762440c', '019ef331-4f46-7c02-a050-c2c81b9b6888', 'test', 'table', 's3://warehouse/gperov/test/metadata/00003-ca406d8e-6c7b-4672-87ff-bfd76f84f949.metadata.json', 's3', 'warehouse/gperov/test', 'b498836e-6ecd-11f1-9c23-533b70a81474', '{gperov}');
insert into "table"(table_id, table_format_version, last_column_id, last_sequence_number, last_updated_ms, last_partition_id, warehouse_id, next_row_id) values ('4412d001-c6df-4adb-8854-d3b9e762440c', '2', 2, 3, 1713951998102, 999, 'b498836e-6ecd-11f1-9c23-533b70a81474', 0);
insert into table_partition_spec(partition_spec_id, table_id, partition_spec, warehouse_id) values (0, '4412d001-c6df-4adb-8854-d3b9e762440c', '{"fields": [], "spec-id": 0}', 'b498836e-6ecd-11f1-9c23-533b70a81474');
insert into table_schema (schema_id, table_id, schema, warehouse_id) values (0, '4412d001-c6df-4adb-8854-d3b9e762440c', '{"type": "struct", "fields": [{"id": 1, "name": "a", "type": "long", "required": false}, {"id": 2, "name": "b", "type": "long", "required": false}], "schema-id": 0}', 'b498836e-6ecd-11f1-9c23-533b70a81474');
insert into table_snapshot (snapshot_id, table_id, parent_snapshot_id, sequence_number, manifest_list, summary, schema_id, timestamp_ms, warehouse_id) values (5231658854638766100, '4412d001-c6df-4adb-8854-d3b9e762440c', 1638951453256129678, 2, 's3://warehouse/gperov/test/metadata/snap-5231658854638766100-1-7e6e13cb-31fd-4de7-8811-02ce7cec44a9.avro', '{"operation": "append", "spark.app.id": "local-1713951981838", "added-records": "10000", "total-records": "10000", "added-data-files": "6", "added-files-size": "25206", "total-data-files": "6", "total-files-size": "25206", "total-delete-files": "0", "total-equality-deletes": "0", "total-position-deletes": "0", "changed-partition-count": "1"}', 0, 1713951995410, 'b498836e-6ecd-11f1-9c23-533b70a81474'), (7558608030923099867, '4412d001-c6df-4adb-8854-d3b9e762440c', 5231658854638766100, 3, 's3://warehouse/gperov/test/metadata/snap-7558608030923099867-1-41f34bc8-eedf-4573-96b0-10c04e7c84c4.avro', '{"operation": "overwrite", "spark.app.id": "local-1713951981838", "total-records": "10000", "added-files-size": "1391", "total-data-files": "6", "total-files-size": "26597", "added-delete-files": "1", "total-delete-files": "1", "added-position-deletes": "1", "total-equality-deletes": "0", "total-position-deletes": "1", "changed-partition-count": "1", "added-position-delete-files": "1"}', 0, 1713951998102, 'b498836e-6ecd-11f1-9c23-533b70a81474'), (1638951453256129678, '4412d001-c6df-4adb-8854-d3b9e762440c', NULL, 1, 's3://warehouse/gperov/test/metadata/snap-1638951453256129678-1-eea762e4-1b7a-4717-b361-eae34da54fd4.avro', '{"operation": "append", "spark.app.id": "local-1713951981838", "total-records": "0", "total-data-files": "0", "total-files-size": "0", "total-delete-files": "0", "total-equality-deletes": "0", "total-position-deletes": "0", "changed-partition-count": "0"}', 0, 1713951992417, 'b498836e-6ecd-11f1-9c23-533b70a81474');
insert into table_sort_order (sort_order_id, table_id, sort_order, warehouse_id) values (0, '4412d001-c6df-4adb-8854-d3b9e762440c', '{"fields": [], "order-id": 0}', 'b498836e-6ecd-11f1-9c23-533b70a81474');
insert into table_default_sort_order (table_id, sort_order_id, warehouse_id) values ('4412d001-c6df-4adb-8854-d3b9e762440c', 0, 'b498836e-6ecd-11f1-9c23-533b70a81474');
insert into table_refs (table_id, table_ref_name, snapshot_id, retention, warehouse_id) values ('4412d001-c6df-4adb-8854-d3b9e762440c', 'main', '7558608030923099867', '{"type": "branch"}','b498836e-6ecd-11f1-9c23-533b70a81474');
insert into table_default_partition_spec (table_id, partition_spec_id, warehouse_id) values ('4412d001-c6df-4adb-8854-d3b9e762440c', 0, 'b498836e-6ecd-11f1-9c23-533b70a81474');
insert into table_current_schema (table_id, schema_id, warehouse_id) values ('4412d001-c6df-4adb-8854-d3b9e762440c', 0, 'b498836e-6ecd-11f1-9c23-533b70a81474');
3 changes: 2 additions & 1 deletion vendor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ FetchContent_Declare(
iceberg-cxx
EXCLUDE_FROM_ALL
GIT_REPOSITORY ${GITHUB}/lithium-tech/iceberg-cxx.git
GIT_TAG 614d392831675cdae9569ca70a1e2e281ef60509
GIT_TAG 3e148ac96d07e9bd3a07dbe9a6ae2efdc6559f2a
)

FetchContent_MakeAvailable(googletest hiredis)
Expand All @@ -40,4 +40,5 @@ set(ICECXX_BUILD_ABSEIL OFF CACHE BOOL "")
set(ICECXX_BUILD_TOOLS ON CACHE BOOL "")
set(ICECXX_GENERATOR ${ICECXX_GENERATOR} CACHE BOOL "")
set(ICECXX_USE_NESSIE ${USE_NESSIE} CACHE BOOL "")
set(ICECXX_USE_REST ${USE_REST} CACHE BOOL "")
FetchContent_MakeAvailable(iceberg-cxx)
Loading