1+ /* Adding fields: item_cleaned, quantity_cleaned, price_cleaned,
2+ totalspent_cleaned, paymentmethod_cleaned, location_cleaned, transactiondate_cleaned. */
3+
4+ ALTER TABLE public." Cafe_Sales"
5+ ADD COLUMN " Item_Cleaned" TEXT ,
6+ ADD COLUMN " Quantity_Cleaned" NUMERIC ,
7+ ADD COLUMN " PricePerUnit_Cleaned" NUMERIC ,
8+ ADD COLUMN " TotalSpent_Cleaned" NUMERIC ,
9+ ADD COLUMN " PaymentMethod_Cleaned" TEXT ,
10+ ADD COLUMN " Location_Cleaned" TEXT ,
11+ ADD COLUMN " TransactionDate_Cleaned" DATE ;
12+ ADD COLUMN " Day" TEXT ,
13+ ADD COLUMN " Month" TEXT ;
14+
15+ -- Use CAST to convert and clean the numeric columns, replace ERROR and UNKNOWN values.
16+ UPDATE public." Cafe_Sales"
17+ SET
18+ " Item_Cleaned" =
19+ CASE
20+ WHEN " Item" IN (' ERROR' , ' UNKNOWN' ) THEN NULL
21+ ELSE " Item"
22+ END,
23+
24+ " Quantity_Cleaned" =
25+ CASE
26+ WHEN " Quantity" IN (' ERROR' , ' UNKNOWN' ) THEN NULL
27+ ELSE CAST(" Quantity" AS NUMERIC )
28+ END,
29+
30+ " PricePerUnit_Cleaned" =
31+ CASE
32+ WHEN " Price Per Unit" IN (' ERROR' , ' UNKNOWN' ) THEN NULL
33+ ELSE CAST(" Price Per Unit" AS NUMERIC )
34+ END,
35+
36+ " TotalSpent_Cleaned" =
37+ CASE
38+ WHEN " Total Spent" IN (' ERROR' , ' UNKNOWN' ) THEN NULL
39+ ELSE CAST(" Total Spent" AS NUMERIC )
40+ END,
41+
42+ " PaymentMethod_Cleaned" =
43+ CASE
44+ WHEN " Payment Method" IN (' ERROR' , ' UNKNOWN' ) THEN NULL
45+ ELSE CAST(" Payment Method" AS NUMERIC )
46+ END,
47+
48+ " Location_Cleaned" =
49+ CASE
50+ WHEN " Location" IN (' ERROR' , ' UNKNOWN' ) THEN NULL
51+ ELSE " Location"
52+ END,
53+
54+ " TransactionDate_Cleaned" =
55+ CASE
56+ WHEN " Transaction Date" IN (' ERROR' , ' UNKNOWN' ) THEN NULL
57+ ELSE CAST(" Transaction Date" AS NUMERIC )
58+ END;
59+
60+
61+ -- Subquery to create a table with items / pries. Join that back with all rows.
62+ UPDATE public." Cafe_Sales" AS cs
63+ SET " PricePerUnit_Cleaned" = prices." PricePerUnit_Cleaned"
64+ FROM (
65+ SELECT DISTINCT " Item_Cleaned" , " PricePerUnit_Cleaned"
66+ FROM public." Cafe_Sales"
67+ WHERE " Item_Cleaned" IS NOT NULL
68+ AND " PricePerUnit_Cleaned" IS NOT NULL
69+ ) AS prices
70+ WHERE cs." Item_Cleaned" = prices." Item_Cleaned"
71+ AND cs." PricePerUnit_Cleaned" IS NULL ;
72+
73+
74+ -- Calculate the PricePerUnit_Cleaned for missing values.
75+ UPDATE public." Cafe_Sales"
76+ SET " PricePerUnit_Cleaned" = ROUND(" TotalSpent_Cleaned" / " Quantity_Cleaned" , 1 )
77+ WHERE " PricePerUnit_Cleaned" IS NULL ;
78+
79+ -- Make sure data type is NUMERIC and that decimal is correct.
80+ ALTER TABLE public." Cafe_Sales"
81+ ALTER COLUMN " PricePerUnit_Cleaned" TYPE NUMERIC (10 ,1 )
82+ USING ROUND(" PricePerUnit_Cleaned" , 1 );
83+
84+
85+ -- Calculate any missing Quantities.
86+ UPDATE public." Cafe_Sales"
87+ SET " Quantity_Cleaned" = (" TotalSpent_Cleaned" / " PricePerUnit_Cleaned" )
88+ WHERE " Quantity_Cleaned" IS NULL ;
89+
90+
91+ -- Calculate any missing Total Spent values.
92+ UPDATE public." Cafe_Sales"
93+ SET " TotalSpent_Cleaned" = (" Quantity_Cleaned" * " PricePerUnit_Cleaned" )
94+ WHERE " TotalSpent_Cleaned" IS NULL ;
95+
96+
97+ -- Extract day and month values from TransactionDate.
98+ UPDATE public." Cafe_Sales"
99+ SET
100+ " Day" = TRIM (TO_CHAR(" TransactionDate_Cleaned" , ' Day' )),
101+ " Month" = TRIM (TO_CHAR(" TransactionDate_Cleaned" , ' Month' ));
102+
103+ -- Fill in missing Items by using the PricePerUnit as a guide.
104+ UPDATE public." Cafe_Sales"
105+ SET " Item_Cleaned" =
106+ CASE
107+ WHEN " PricePerUnit_Cleaned" = 1 THEN ' Cookie'
108+ WHEN " PricePerUnit_Cleaned" = 1 .5 THEN ' Tea'
109+ WHEN " PricePerUnit_Cleaned" = 2 THEN ' Coffee'
110+ WHEN " PricePerUnit_Cleaned" = 5 THEN ' Salad'
111+ ELSE " Item_Cleaned"
112+ END
113+ WHERE " Item_Cleaned" IS NULL ;
114+
115+
116+ -- OPTIONAL: Select the rows without NULL values.
117+ SELECT " Transaction ID" , " Item_Cleaned" , " Quantity_Cleaned" , " PricePerUnit_Cleaned" ,
118+ " TotalSpent_Cleaned" , " PaymentMethod_Cleaned" , " Location_Cleaned" , " TransactionDate_Cleaned" ,
119+ " Day" , " Month"
120+ FROM public." Cafe_Sales"
121+ WHERE " Item_Cleaned" IS NOT NULL AND
122+ " Quantity_Cleaned" IS NOT NULL AND
123+ " PricePerUnit_Complete" IS NOT NULL AND
124+ " TotalSpent_Cleaned" IS NOT NULL
125+ ORDER BY " Transaction ID" ASC ;
0 commit comments