-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path2356-NumberofUniqueSubjectsTaughtbyEachTeacher.py
More file actions
89 lines (79 loc) · 3.48 KB
/
2356-NumberofUniqueSubjectsTaughtbyEachTeacher.py
File metadata and controls
89 lines (79 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# 2356. Number of Unique Subjects Taught by Each Teacher
# Table: Teacher
# +-------------+------+
# | Column Name | Type |
# +-------------+------+
# | teacher_id | int |
# | subject_id | int |
# | dept_id | int |
# +-------------+------+
# (subject_id, dept_id) is the primary key (combinations of columns with unique values) of this table.
# Each row in this table indicates that the teacher with teacher_id teaches the subject subject_id in the department dept_id.
# Write a solution to calculate the number of unique subjects each teacher teaches in the university.
# Return the result table in any order.
# The result format is shown in the following example.
# Example 1:
# Input:
# Teacher table:
# +------------+------------+---------+
# | teacher_id | subject_id | dept_id |
# +------------+------------+---------+
# | 1 | 2 | 3 |
# | 1 | 2 | 4 |
# | 1 | 3 | 3 |
# | 2 | 1 | 1 |
# | 2 | 2 | 1 |
# | 2 | 3 | 1 |
# | 2 | 4 | 1 |
# +------------+------------+---------+
# Output:
# +------------+-----+
# | teacher_id | cnt |
# +------------+-----+
# | 1 | 2 |
# | 2 | 4 |
# +------------+-----+
# Explanation:
# Teacher 1:
# - They teach subject 2 in departments 3 and 4.
# - They teach subject 3 in department 3.
# Teacher 2:
# - They teach subject 1 in department 1.
# - They teach subject 2 in department 1.
# - They teach subject 3 in department 1.
# - They teach subject 4 in department 1.
import pandas as pd
# def count_unique_subjects(teacher: pd.DataFrame) -> pd.DataFrame:
# # 取值
# teacher = teacher[["teacher_id","subject_id"]]
# # 去重
# teacher = teacher.drop_duplicates()
# # 统计
# teacher = teacher.groupby('teacher_id').count().reset_index('cnt')
# return teacher
def count_unique_subjects(teacher: pd.DataFrame) -> pd.DataFrame:
# 取值
teacher = teacher[['teacher_id', 'subject_id']]
# 去重
teacher = teacher.drop_duplicates()
# 按 teacher_id 分组统计并设定名称
return teacher.groupby('teacher_id').size().reset_index(name='cnt')
# nunique
def count_unique_subjects1(teacher: pd.DataFrame) -> pd.DataFrame:
group = teacher.groupby("teacher_id")['subject_id'].nunique().reset_index()
return group.rename(columns={'subject_id': 'cnt'})
if __name__ == "__main__":
data = [[1, 2, 3], [1, 2, 4], [1, 3, 3], [2, 1, 1], [2, 2, 1], [2, 3, 1], [2, 4, 1]]
teacher = pd.DataFrame(data, columns=['teacher_id', 'subject_id', 'dept_id']).astype({'teacher_id':'Int64', 'subject_id':'Int64', 'dept_id':'Int64'})
print(count_unique_subjects(teacher))
print(count_unique_subjects1(teacher))
# value_counts( )函数
# 在pandas中,value_counts常用于数据表的计数及排序,它可以用来查看数据表中,指定列里有多少个不同的数据值,并计算每个不同值有在该列中的个数,同时还能根据需要进行排序。
# 函数体
# value_counts(values,sort=True, ascending=False, normalize=False,bins=None,dropna=True)
# 主要参数:
# sort=True: 是否要进行排序;默认进行排序
# ascending=False: 默认降序排列;
# normalize=False: 是否要对计算结果进行标准化并显示标准化后的结果,默认是False。
# bins=None: 可以自定义分组区间,默认是否;
# dropna=True:是否删除缺失值nan,默认删除