Skip to content

Commit 593a5c9

Browse files
authored
Merge pull request #14 from zbrookle/documentation
ENH: Add support for other sql datatypes
2 parents 4b173d9 + 277905f commit 593a5c9

5 files changed

Lines changed: 184 additions & 12 deletions

File tree

dataframe_sql/data/avocado.csv

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
avocado_id,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
2+
0,0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
3+
1,1,2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
4+
2,2,2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
5+
3,3,2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
6+
4,4,2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany
7+
5,5,2015-11-22,1.26,55979.78,1184.27,48067.99,43.61,6683.91,6556.47,127.44,0.0,conventional,2015,Albany
8+
6,6,2015-11-15,0.99,83453.76,1368.92,73672.72,93.26,8318.86,8196.81,122.05,0.0,conventional,2015,Albany
9+
7,7,2015-11-08,0.98,109428.33,703.75,101815.36,80.0,6829.22,6266.85,562.37,0.0,conventional,2015,Albany
10+
8,8,2015-11-01,1.02,99811.42,1022.15,87315.57,85.34,11388.36,11104.53,283.83,0.0,conventional,2015,Albany
11+
9,9,2015-10-25,1.07,74338.76,842.4,64757.44,113.0,8625.92,8061.47,564.45,0.0,conventional,2015,Albany
12+
10,10,2015-10-18,1.12,84843.44,924.86,75595.85,117.07,8205.66,7877.86,327.8,0.0,conventional,2015,Albany
13+
11,11,2015-10-11,1.28,64489.17,1582.03,52677.92,105.32,10123.9,9866.27,257.63,0.0,conventional,2015,Albany
14+
12,12,2015-10-04,1.31,61007.1,2268.32,49880.67,101.36,8756.75,8379.98,376.77,0.0,conventional,2015,Albany
15+
13,13,2015-09-27,0.99,106803.39,1204.88,99409.21,154.84,6034.46,5888.87,145.59,0.0,conventional,2015,Albany
16+
14,14,2015-09-20,1.33,69759.01,1028.03,59313.12,150.5,9267.36,8489.1,778.26,0.0,conventional,2015,Albany
17+
15,15,2015-09-13,1.28,76111.27,985.73,65696.86,142.0,9286.68,8665.19,621.49,0.0,conventional,2015,Albany
18+
16,16,2015-09-06,1.11,99172.96,879.45,90062.62,240.79,7990.1,7762.87,227.23,0.0,conventional,2015,Albany
19+
17,17,2015-08-30,1.07,105693.84,689.01,94362.67,335.43,10306.73,10218.93,87.8,0.0,conventional,2015,Albany
20+
18,18,2015-08-23,1.34,79992.09,733.16,67933.79,444.78,10880.36,10745.79,134.57,0.0,conventional,2015,Albany
21+
19,19,2015-08-16,1.33,80043.78,539.65,68666.01,394.9,10443.22,10297.68,145.54,0.0,conventional,2015,Albany
22+
20,20,2015-08-09,1.12,111140.93,584.63,100961.46,368.95,9225.89,9116.34,109.55,0.0,conventional,2015,Albany
23+
21,21,2015-08-02,1.45,75133.1,509.94,62035.06,741.08,11847.02,11768.52,78.5,0.0,conventional,2015,Albany
24+
22,22,2015-07-26,1.11,106757.1,648.75,91949.05,966.61,13192.69,13061.53,131.16,0.0,conventional,2015,Albany
25+
23,23,2015-07-19,1.26,96617.0,1042.1,82049.4,2238.02,11287.48,11103.49,183.99,0.0,conventional,2015,Albany
26+
24,24,2015-07-12,1.05,124055.31,672.25,94693.52,4257.64,24431.9,24290.08,108.49,33.33,conventional,2015,Albany
27+
25,25,2015-07-05,1.35,109252.12,869.45,72600.55,5883.16,29898.96,29663.19,235.77,0.0,conventional,2015,Albany
28+
26,26,2015-06-28,1.37,89534.81,664.23,57545.79,4662.71,26662.08,26311.76,350.32,0.0,conventional,2015,Albany
29+
27,27,2015-06-21,1.27,104849.39,804.01,76688.55,5481.18,21875.65,21662.0,213.65,0.0,conventional,2015,Albany
30+
28,28,2015-06-14,1.32,89631.3,850.58,55400.94,4377.19,29002.59,28343.14,659.45,0.0,conventional,2015,Albany
31+
29,29,2015-06-07,1.07,122743.06,656.71,99220.82,90.32,22775.21,22314.99,460.22,0.0,conventional,2015,Albany
32+
30,30,2015-05-31,1.23,95123.62,922.37,70469.69,50.55,23681.01,23222.49,458.52,0.0,conventional,2015,Albany
33+
31,31,2015-05-24,1.19,101470.91,680.27,71376.81,58.7,29355.13,28761.81,593.32,0.0,conventional,2015,Albany
34+
32,32,2015-05-17,1.43,109857.47,1150.55,81955.16,94.32,26657.44,26285.43,372.01,0.0,conventional,2015,Albany
35+
33,33,2015-05-10,1.26,120427.91,1420.43,102000.52,185.66,16821.3,16535.55,285.75,0.0,conventional,2015,Albany
36+
34,34,2015-05-03,1.2,59197.67,919.87,45490.05,217.24,12570.51,12201.95,368.56,0.0,conventional,2015,Albany
37+
35,35,2015-04-26,1.22,49585.46,875.65,35841.75,89.62,12778.44,12076.83,701.61,0.0,conventional,2015,Albany
38+
36,36,2015-04-19,1.19,49064.73,774.15,33941.51,47.15,14301.92,13602.97,698.95,0.0,conventional,2015,Albany
39+
37,37,2015-04-12,1.13,48364.29,864.27,30374.15,21.5,17104.37,16438.49,665.88,0.0,conventional,2015,Albany
40+
38,38,2015-04-05,1.16,47362.13,961.77,35577.66,93.76,10728.94,9869.16,755.61,104.17,conventional,2015,Albany
41+
39,39,2015-03-29,1.02,67799.08,1402.28,58623.22,89.5,7684.08,7208.49,475.59,0.0,conventional,2015,Albany
42+
40,40,2015-03-22,1.12,46346.85,2141.83,34313.56,141.8,9749.66,9252.6,497.06,0.0,conventional,2015,Albany
43+
41,41,2015-03-15,1.11,43045.79,2128.26,30447.17,99.67,10370.69,9989.59,381.1,0.0,conventional,2015,Albany
44+
42,42,2015-03-08,1.07,40507.36,795.68,30370.64,159.05,9181.99,8827.55,354.44,0.0,conventional,2015,Albany
45+
43,43,2015-03-01,0.99,55595.74,629.46,45633.34,181.49,9151.45,8986.06,165.39,0.0,conventional,2015,Albany
46+
44,44,2015-02-22,1.07,45675.05,1088.38,35056.13,151.0,9379.54,9000.16,379.38,0.0,conventional,2015,Albany
47+
45,45,2015-02-15,1.06,41567.62,986.66,30045.51,222.42,10313.03,9979.87,333.16,0.0,conventional,2015,Albany
48+
46,46,2015-02-08,0.99,51253.97,1357.37,39111.81,163.25,10621.54,10113.1,508.44,0.0,conventional,2015,Albany
49+
47,47,2015-02-01,0.99,70873.6,1353.9,60017.2,179.32,9323.18,9170.82,152.36,0.0,conventional,2015,Albany
50+
48,48,2015-01-25,1.06,45147.5,941.38,33196.16,164.14,10845.82,10103.35,742.47,0.0,conventional,2015,Albany
51+
49,49,2015-01-18,1.17,44511.28,914.14,31540.32,135.77,11921.05,11651.09,269.96,0.0,conventional,2015,Albany

dataframe_sql/grammar/sql.grammar

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,22 @@ column_name: [NAME "."] NAME
6363

6464

6565
SELECT_CONSTRAINT.9: "ALL"i | "DISTINCT"i
66-
TYPENAME: "object"i | "int64"i | "float64"i | "bool"i | "datetime64"i | "timedelta[ns]"i | "category"i
66+
TYPENAME: "object"i
67+
| "varchar"i
68+
| "int16"i
69+
| "smallint"i
70+
| "int32"i
71+
| "int64"i
72+
| "int"i
73+
| "bigint"i
74+
| "float16"i
75+
| "float32"i
76+
| "float64"i
77+
| "float"i
78+
| "bool"i
79+
| "datetime64"i
80+
| "timestamp"i
81+
| "category"i
6782
?aggregation: NAME -> aggregation_name
6883
alias: NAME -> alias_string
6984
_window_name: NAME

dataframe_sql/parsing/sql_parser.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,34 @@
4242
)
4343
PANDAS_TYPE_PYTHON_TYPE_FUNCTION = {
4444
"object": str,
45+
"string": str,
46+
"int16": int,
47+
"int32": int,
4548
"int64": int,
49+
"float16": float,
50+
"float32": float,
4651
"float64": float,
4752
"bool": bool,
4853
}
54+
55+
TYPE_TO_PANDAS_TYPE = {
56+
"varchar": "string",
57+
"smallint": "int16",
58+
"int": "int32",
59+
"bigint": "int64",
60+
"float": "float64",
61+
"timestamp": "datetime64",
62+
"datetime64": "datetime64",
63+
"timedelta[ns]": "timedelta[ns]",
64+
"category": "category",
65+
}
66+
67+
for TYPE in PANDAS_TYPE_PYTHON_TYPE_FUNCTION:
68+
TYPE_TO_PANDAS_TYPE[TYPE] = TYPE
69+
4970
PANDAS_TYPE_TO_SQL_TYPE = {
5071
"object": String,
72+
"string": String,
5173
"int64": Number,
5274
"float64": Number,
5375
"bool": Bool,
@@ -748,8 +770,6 @@ def rank(self, tokens, rank_function):
748770
:param rank_function: Function to be used in rank evaluation
749771
:return:
750772
"""
751-
print("yes")
752-
753773
expressions = tokens[0]
754774
series_list = []
755775
order_list = []
@@ -846,7 +866,7 @@ def as_type(self, column_and_type):
846866
"""
847867
column = column_and_type[0]
848868
typename = column_and_type[1]
849-
column.typename = typename.value
869+
column.typename = TYPE_TO_PANDAS_TYPE[typename.value]
850870
return column
851871

852872
def literal_cast(self, value_and_type: list):
@@ -1218,7 +1238,10 @@ def handle_non_token_non_tree(query_info: QueryInfo, token, token_pos):
12181238
if isinstance(token, Column):
12191239
query_info.columns.append(token)
12201240
query_info.column_selected[token.name] = True
1241+
# TODO Get rid of collecting this alias information since its part of the
1242+
# column object
12211243
if token.alias:
1244+
print(query_info.aliases)
12221245
query_info.aliases[token.name] = token.alias
12231246

12241247
if isinstance(token, Expression):
@@ -1398,6 +1421,7 @@ def handle_columns(
13981421
:param internal_transformer: Transformer to transform the where clauses
13991422
:return:
14001423
"""
1424+
print(columns)
14011425
where_value = None
14021426
where_plan = ":"
14031427
if where_expr is not None:
@@ -1415,6 +1439,7 @@ def handle_columns(
14151439
new_frame = first_frame.copy()
14161440
else:
14171441
column_names = []
1442+
final_names = []
14181443
for column in columns:
14191444
true_column_name = self.column_name_map[column.table][
14201445
column.name.lower()
@@ -1426,13 +1451,20 @@ def handle_columns(
14261451
):
14271452
aliases[true_column_name] = column.name
14281453

1454+
if column.alias:
1455+
final_names.append(column.alias)
1456+
else:
1457+
final_names.append(column.name)
1458+
1459+
print(final_names)
14291460
if where_value is not None:
14301461
new_frame = first_frame.loc[where_value, column_names]
14311462
else:
14321463
new_frame = first_frame.loc[:, column_names]
14331464
execution_plan += f".loc[{where_plan}, {column_names}]"
14341465
if aliases:
1435-
new_frame = new_frame.rename(columns=aliases)
1466+
# new_frame = new_frame.rename(columns=aliases)
1467+
new_frame.columns = final_names
14361468
execution_plan += f".rename(columns={aliases})"
14371469

14381470
return new_frame, execution_plan

dataframe_sql/tests/pandas_sql_functionality_test.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from dataframe_sql.sql_objects import AmbiguousColumn
2222
from dataframe_sql.sql_select_query import TableInfo
2323
from dataframe_sql.tests.utils import (
24+
AVOCADO,
2425
DIGIMON_MON_LIST,
2526
DIGIMON_MOVE_LIST,
2627
FOREST_FIRES,
@@ -1320,9 +1321,82 @@ def test_case_statement_with_same_conditions():
13201321
tm.assert_frame_equal(pandas_frame, my_frame)
13211322

13221323

1324+
@assert_state_not_change
1325+
def test_multiple_aliases_same_column():
1326+
"""
1327+
Test multiple aliases on the same column
1328+
:return:
1329+
"""
1330+
my_frame = query(
1331+
"""
1332+
select wind as my_wind, wind as also_the_wind, wind as yes_wind
1333+
from
1334+
forest_fires
1335+
"""
1336+
)
1337+
1338+
pandas_frame = FOREST_FIRES[["wind"]].copy()
1339+
pandas_frame.loc[:, "my_wind"] = FOREST_FIRES["wind"].copy()
1340+
pandas_frame.loc[:, "also_the_wind"] = FOREST_FIRES["wind"]
1341+
pandas_frame.loc[:, "yes_wind"] = FOREST_FIRES["wind"]
1342+
pandas_frame = pandas_frame.drop(columns=["wind"])
1343+
tm.assert_frame_equal(pandas_frame, my_frame)
1344+
1345+
1346+
@assert_state_not_change
1347+
def test_sql_data_types():
1348+
"""
1349+
Tests sql data types
1350+
:return:
1351+
"""
1352+
my_frame = query(
1353+
"""
1354+
select
1355+
cast(avocado_id as object) as avocado_id_object,
1356+
cast(avocado_id as int16) as avocado_id_int16,
1357+
cast(avocado_id as smallint) as avocado_id_smallint,
1358+
cast(avocado_id as int32) as avocado_id_int32,
1359+
cast(avocado_id as int) as avocado_id_int,
1360+
cast(avocado_id as int64) as avocado_id_int64,
1361+
cast(avocado_id as bigint) as avocado_id_bigint,
1362+
cast(avocado_id as float) as avocado_id_float,
1363+
cast(avocado_id as float16) as avocado_id_float16,
1364+
cast(avocado_id as float32) as avocado_id_float32,
1365+
cast(avocado_id as float64) as avocado_id_float64,
1366+
cast(avocado_id as bool) as avocado_id_bool,
1367+
cast(avocado_id as category) as avocado_id_category,
1368+
cast(date as datetime64) as date,
1369+
cast(date as timestamp) as time,
1370+
cast(region as varchar) as region_varchar
1371+
from avocado
1372+
"""
1373+
)
1374+
1375+
pandas_frame = AVOCADO.copy()[["avocado_id", "Date", "region"]]
1376+
pandas_frame["avocado_id_object"] = pandas_frame["avocado_id"].astype("object")
1377+
pandas_frame["avocado_id_int16"] = pandas_frame["avocado_id"].astype("int16")
1378+
pandas_frame["avocado_id_smallint"] = pandas_frame["avocado_id"].astype("int16")
1379+
pandas_frame["avocado_id_int32"] = pandas_frame["avocado_id"].astype("int32")
1380+
pandas_frame["avocado_id_int"] = pandas_frame["avocado_id"].astype("int32")
1381+
pandas_frame["avocado_id_int64"] = pandas_frame["avocado_id"].astype("int64")
1382+
pandas_frame["avocado_id_bigint"] = pandas_frame["avocado_id"].astype("int64")
1383+
pandas_frame["avocado_id_float"] = pandas_frame["avocado_id"].astype("float")
1384+
pandas_frame["avocado_id_float16"] = pandas_frame["avocado_id"].astype("float16")
1385+
pandas_frame["avocado_id_float32"] = pandas_frame["avocado_id"].astype("float32")
1386+
pandas_frame["avocado_id_float64"] = pandas_frame["avocado_id"].astype("float64")
1387+
pandas_frame["avocado_id_bool"] = pandas_frame["avocado_id"].astype("bool")
1388+
pandas_frame["avocado_id_category"] = pandas_frame["avocado_id"].astype("category")
1389+
pandas_frame["date"] = pandas_frame["Date"].astype("datetime64")
1390+
pandas_frame["time"] = pandas_frame["Date"].astype("datetime64")
1391+
pandas_frame["region_varchar"] = pandas_frame["region"].astype("string")
1392+
pandas_frame = pandas_frame.drop(columns=["avocado_id", "Date", "region"])
1393+
1394+
tm.assert_frame_equal(pandas_frame, my_frame)
1395+
1396+
13231397
if __name__ == "__main__":
13241398
register_env_tables()
13251399

1326-
test_in_operator()
1400+
test_sql_data_types()
13271401

13281402
remove_env_tables()

dataframe_sql/tests/utils.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
"""
22
Shared functions among the tests like setting up test environment
33
"""
4-
import os
54
from pathlib import Path
65

76
from pandas import DataFrame, read_csv
87

98
from dataframe_sql import register_temp_table, remove_temp_table
109

11-
DATA_PATH = os.path.join(Path(__file__).parent.parent, "data")
10+
DATA_PATH = Path(__file__).parent.parent / "data"
1211

1312

1413
# Import the data for testing
15-
FOREST_FIRES = read_csv(os.path.join(DATA_PATH, "forestfires.csv"))
16-
DIGIMON_MON_LIST = read_csv(os.path.join(DATA_PATH, "DigiDB_digimonlist.csv"))
17-
DIGIMON_MOVE_LIST = read_csv(os.path.join(DATA_PATH, "DigiDB_movelist.csv"))
18-
DIGIMON_SUPPORT_LIST = read_csv(os.path.join(DATA_PATH, "DigiDB_supportlist.csv"))
14+
FOREST_FIRES = read_csv(DATA_PATH / "forestfires.csv")
15+
DIGIMON_MON_LIST = read_csv(DATA_PATH / "DigiDB_digimonlist.csv")
16+
DIGIMON_MOVE_LIST = read_csv(DATA_PATH / "DigiDB_movelist.csv")
17+
DIGIMON_SUPPORT_LIST = read_csv(DATA_PATH / "DigiDB_supportlist.csv")
18+
AVOCADO = read_csv(DATA_PATH / "avocado.csv")
1919

2020
# Name change is for name interference
2121
DIGIMON_MON_LIST["mon_attribute"] = DIGIMON_MON_LIST["Attribute"]

0 commit comments

Comments
 (0)