Skip to content

Commit 798368c

Browse files
author
David Stirling
committed
Check_tables improvements. Exclude empty columns before removing rows.
1 parent 1eeaef7 commit 798368c

1 file changed

Lines changed: 30 additions & 4 deletions

File tree

cpa/dbconnect.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,7 +1618,31 @@ def CreateObjectCheckedTable(self):
16181618
object_table = p.object_table
16191619
object_table = object_table.split('_checked')[0]
16201620

1621+
# Try to get a quick count of how many table rows we started with.
1622+
try:
1623+
query = f"SELECT COUNT() FROM {object_table}"
1624+
initial_count = self.execute(query)[0][0]
1625+
except:
1626+
logging.error("Unable to count table rows")
1627+
initial_count = 0
1628+
16211629
all_cols = [str(x) for x in self.GetColumnNames(object_table)]
1630+
1631+
# We don't want to obliterate the table if there's an entirely empty column. Let's exclude those.
1632+
# Checking entire columns is expensive, let's restrict our search.
1633+
# First we sample 1 row and identify columns with missing values.
1634+
query = f"SELECT * FROM {object_table} LIMIT 1"
1635+
res = self.execute(query)[0]
1636+
maybe_empty_cols = [col for col, val in zip(all_cols, res) if val is None]
1637+
if len(maybe_empty_cols) > 0:
1638+
# Now let's check whether those missing columns are entirely empty (count will be 0).
1639+
query = f"SELECT {', '.join([f'count({col})' for col in maybe_empty_cols])} from {object_table}"
1640+
res = self.execute(query)[0]
1641+
empty_cols = set([col for col, count in zip(maybe_empty_cols, res) if count == 0])
1642+
# Now we rebuild our table column list without the empty columns
1643+
all_cols = [col for col in all_cols if col not in empty_cols]
1644+
logging.info(f"Table checking dropped {len(empty_cols)} blank columns")
1645+
16221646
AreaShape_Area = [x for x in all_cols if 'AreaShape_Area' in x]
16231647
if DB_TYPE == 'mysql':
16241648
if len(AreaShape_Area) > 0:
@@ -1637,13 +1661,15 @@ def CreateObjectCheckedTable(self):
16371661
query = 'CREATE TABLE %s AS SELECT * FROM %s WHERE (%s) AND (%s)'%(p.object_table, object_table, " IS NOT NULL AND ".join(all_cols), " != '' AND ".join(all_cols))
16381662
self.execute(query)
16391663

1640-
# Check whether we nuked the table.
1664+
# Inform user of what we did. Also check whether we nuked the table.
16411665
try:
1642-
query = f"SELECT COUNT(*) FROM {p.object_table}"
1643-
res = self.execute(query)
1644-
if res[0][0] == 0:
1666+
query = f"SELECT COUNT() FROM {p.object_table}"
1667+
res = self.execute(query)[0][0]
1668+
if res == 0:
16451669
logging.error("Table checking removed all rows, you may have an empty column in your database. "
16461670
"Disable check_tables in your properties file if this is expected.")
1671+
else:
1672+
logging.info(f"Table checking removed {initial_count - res} rows with missing values")
16471673
except:
16481674
logging.error("Unable to validate checked object table")
16491675

0 commit comments

Comments
 (0)