-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path3554-FindCategoryRecommendationPairs.sql
More file actions
179 lines (173 loc) · 8.36 KB
/
3554-FindCategoryRecommendationPairs.sql
File metadata and controls
179 lines (173 loc) · 8.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
-- 3554. Find Category Recommendation Pairs
-- Table: ProductPurchases
-- +-------------+------+
-- | Column Name | Type |
-- +-------------+------+
-- | user_id | int |
-- | product_id | int |
-- | quantity | int |
-- +-------------+------+
-- (user_id, product_id) is the unique identifier for this table.
-- Each row represents a purchase of a product by a user in a specific quantity.
-- Table: ProductInfo
-- +-------------+---------+
-- | Column Name | Type |
-- +-------------+---------+
-- | product_id | int |
-- | category | varchar |
-- | price | decimal |
-- +-------------+---------+
-- product_id is the unique identifier for this table.
-- Each row assigns a category and price to a product.
-- Amazon wants to understand shopping patterns across product categories. Write a solution to:
-- Find all category pairs (where category1 < category2)
-- For each category pair, determine the number of unique customers who purchased products from both categories
-- A category pair is considered reportable if at least 3 different customers have purchased products from both categories.
-- Return the result table of reportable category pairs ordered by customer_count in descending order, and in case of a tie, by category1 in ascending order lexicographically, and then by category2 in ascending order.
-- The result format is in the following example.
-- Example:
-- Input:
-- ProductPurchases table:
-- +---------+------------+----------+
-- | user_id | product_id | quantity |
-- +---------+------------+----------+
-- | 1 | 101 | 2 |
-- | 1 | 102 | 1 |
-- | 1 | 201 | 3 |
-- | 1 | 301 | 1 |
-- | 2 | 101 | 1 |
-- | 2 | 102 | 2 |
-- | 2 | 103 | 1 |
-- | 2 | 201 | 5 |
-- | 3 | 101 | 2 |
-- | 3 | 103 | 1 |
-- | 3 | 301 | 4 |
-- | 3 | 401 | 2 |
-- | 4 | 101 | 1 |
-- | 4 | 201 | 3 |
-- | 4 | 301 | 1 |
-- | 4 | 401 | 2 |
-- | 5 | 102 | 2 |
-- | 5 | 103 | 1 |
-- | 5 | 201 | 2 |
-- | 5 | 202 | 3 |
-- +---------+------------+----------+
-- ProductInfo table:
-- +------------+-------------+-------+
-- | product_id | category | price |
-- +------------+-------------+-------+
-- | 101 | Electronics | 100 |
-- | 102 | Books | 20 |
-- | 103 | Books | 35 |
-- | 201 | Clothing | 45 |
-- | 202 | Clothing | 60 |
-- | 301 | Sports | 75 |
-- | 401 | Kitchen | 50 |
-- +------------+-------------+-------+
-- Output:
-- +-------------+-------------+----------------+
-- | category1 | category2 | customer_count |
-- +-------------+-------------+----------------+
-- | Books | Clothing | 3 |
-- | Books | Electronics | 3 |
-- | Clothing | Electronics | 3 |
-- | Electronics | Sports | 3 |
-- +-------------+-------------+----------------+
-- Explanation:
-- Books-Clothing:
-- User 1 purchased products from Books (102) and Clothing (201)
-- User 2 purchased products from Books (102, 103) and Clothing (201)
-- User 5 purchased products from Books (102, 103) and Clothing (201, 202)
-- Total: 3 customers purchased from both categories
-- Books-Electronics:
-- User 1 purchased products from Books (102) and Electronics (101)
-- User 2 purchased products from Books (102, 103) and Electronics (101)
-- User 3 purchased products from Books (103) and Electronics (101)
-- Total: 3 customers purchased from both categories
-- Clothing-Electronics:
-- User 1 purchased products from Clothing (201) and Electronics (101)
-- User 2 purchased products from Clothing (201) and Electronics (101)
-- User 4 purchased products from Clothing (201) and Electronics (101)
-- Total: 3 customers purchased from both categories
-- Electronics-Sports:
-- User 1 purchased products from Electronics (101) and Sports (301)
-- User 3 purchased products from Electronics (101) and Sports (301)
-- User 4 purchased products from Electronics (101) and Sports (301)
-- Total: 3 customers purchased from both categories
-- Other category pairs like Clothing-Sports (only 2 customers: Users 1 and 4) and Books-Kitchen (only 1 customer: User 3) have fewer than 3 shared customers and are not included in the result.
-- The result is ordered by customer_count in descending order. Since all pairs have the same customer_count of 3, they are ordered by category1 (then category2) in ascending order.
-- CREATE TABLE if not exists ProductPurchases (
-- user_id INT,
-- product_id INT,
-- quantity INT
-- )
-- CREATE TABLE if not exists ProductInfo (
-- product_id INT,
-- category VARCHAR(100),
-- price DECIMAL(10, 2)
-- )
-- Truncate table ProductPurchases
-- insert into ProductPurchases (user_id, product_id, quantity) values ('1', '101', '2')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('1', '102', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('1', '201', '3')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('1', '301', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('2', '101', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('2', '102', '2')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('2', '103', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('2', '201', '5')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('3', '101', '2')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('3', '103', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('3', '301', '4')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('3', '401', '2')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('4', '101', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('4', '201', '3')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('4', '301', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('4', '401', '2')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('5', '102', '2')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('5', '103', '1')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('5', '201', '2')
-- insert into ProductPurchases (user_id, product_id, quantity) values ('5', '202', '3')
-- Truncate table ProductInfo
-- insert into ProductInfo (product_id, category, price) values ('101', 'Electronics', '100')
-- insert into ProductInfo (product_id, category, price) values ('102', 'Books', '20')
-- insert into ProductInfo (product_id, category, price) values ('103', 'Books', '35')
-- insert into ProductInfo (product_id, category, price) values ('201', 'Clothing', '45')
-- insert into ProductInfo (product_id, category, price) values ('202', 'Clothing', '60')
-- insert into ProductInfo (product_id, category, price) values ('301', 'Sports', '75')
-- insert into ProductInfo (product_id, category, price) values ('401', 'Kitchen', '50')
-- Write your PostgreSQL query statement below
WITH tb1 AS (
SELECT
user_id,
category
FROM
ProductPurchases AS p
INNER JOIN
ProductInfo AS i
ON
p.product_id = i.product_id
),
tb2 AS (
SELECT
t1.category AS category1,
t2.category AS category2,
COUNT( DISTINCT t1.user_id ) AS customer_count
FROM
tb1 AS t1
INNER JOIN
tb1 AS t2
ON
t1.category < t2.category AND -- 查找所有 类别对(其中 category1 < category2)
t1.user_id = t2.user_id
GROUP BY
t1.category, t2.category
HAVING(COUNT(DISTINCT t1.user_id)) > 2 -- 对于 每个类别对,确定 同时 购买了两类别产品的 不同用户 数量
)
SELECT
category1,
category2,
customer_count
FROM
tb2
ORDER BY
customer_count DESC, category1, category2 -- 结果表以 customer_count 降序 排序,并且为了防止排序持平,以 category1 字典序 升序 排序,然后以 category2 升序 排序