|
28 | 28 | from avro.io import DatumReader |
29 | 29 | import json |
30 | 30 |
|
| 31 | +from tests.beeswax.impala_beeswax import ImpalaBeeswaxException |
31 | 32 | from tests.common.impala_test_suite import ImpalaTestSuite, LOG |
32 | 33 | from tests.common.skip import SkipIf |
33 | 34 |
|
@@ -71,18 +72,54 @@ def test_drop_incomplete_table(self, vector, unique_database): |
71 | 72 | self.hdfs_client.delete_file_dir(cat_location, True) |
72 | 73 | self.execute_query_expect_success(self.client, """drop table {0}""".format(tbl_name)) |
73 | 74 |
|
74 | | - @SkipIf.not_hdfs |
75 | 75 | def test_insert(self, vector, unique_database): |
76 | 76 | self.run_test_case('QueryTest/iceberg-insert', vector, use_db=unique_database) |
77 | 77 |
|
78 | 78 | def test_partitioned_insert(self, vector, unique_database): |
79 | 79 | self.run_test_case('QueryTest/iceberg-partitioned-insert', vector, |
80 | 80 | use_db=unique_database) |
81 | 81 |
|
82 | | - @SkipIf.not_hdfs |
83 | 82 | def test_insert_overwrite(self, vector, unique_database): |
| 83 | + """Run iceberg-overwrite tests, then test that INSERT INTO/OVERWRITE queries running |
| 84 | + concurrently with a long running INSERT OVERWRITE are handled gracefully. query_a is |
| 85 | + started before query_b/query_c, but query_b/query_c supposed to finish before query_a. |
| 86 | + query_a should fail because the overwrite should not erase query_b/query_c's result. |
| 87 | + """ |
| 88 | + # Run iceberg-overwrite.test |
84 | 89 | self.run_test_case('QueryTest/iceberg-overwrite', vector, use_db=unique_database) |
85 | 90 |
|
| 91 | + # Create test dataset for concurrency tests and warm-up the test table |
| 92 | + tbl_name = unique_database + ".overwrite_tbl" |
| 93 | + self.client.execute("""create table {0} (i int) |
| 94 | + partitioned by spec (truncate(3, i)) |
| 95 | + stored as iceberg""".format(tbl_name)) |
| 96 | + self.client.execute("insert into {0} values (1), (2), (3);".format(tbl_name)) |
| 97 | + |
| 98 | + # Test queries: 'a' is the long running query while 'b' and 'c' are the short ones |
| 99 | + query_a = """insert overwrite {0} select sleep(5000);""".format(tbl_name) |
| 100 | + query_b = """insert overwrite {0} select * from {0};""".format(tbl_name) |
| 101 | + query_c = """insert into {0} select * from {0};""".format(tbl_name) |
| 102 | + |
| 103 | + # Test concurrent INSERT OVERWRITEs, the exception closes the query handle. |
| 104 | + handle = self.client.execute_async(query_a) |
| 105 | + time.sleep(1) |
| 106 | + self.client.execute(query_b) |
| 107 | + try: |
| 108 | + self.client.wait_for_finished_timeout(handle, 30) |
| 109 | + assert False |
| 110 | + except ImpalaBeeswaxException as e: |
| 111 | + assert "Found conflicting files" in str(e) |
| 112 | + |
| 113 | + # Test INSERT INTO during INSERT OVERWRITE, the exception closes the query handle. |
| 114 | + handle = self.client.execute_async(query_a) |
| 115 | + time.sleep(1) |
| 116 | + self.client.execute(query_c) |
| 117 | + try: |
| 118 | + self.client.wait_for_finished_timeout(handle, 30) |
| 119 | + assert False |
| 120 | + except ImpalaBeeswaxException as e: |
| 121 | + assert "Found conflicting files" in str(e) |
| 122 | + |
86 | 123 | def test_ctas(self, vector, unique_database): |
87 | 124 | self.run_test_case('QueryTest/iceberg-ctas', vector, use_db=unique_database) |
88 | 125 |
|
|
0 commit comments