-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathviews_db.py
More file actions
140 lines (133 loc) · 5.6 KB
/
views_db.py
File metadata and controls
140 lines (133 loc) · 5.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
### Import necessary packages and functions
import sqlite3
from sql_funcs import *
database = 'movies.db'
table_movies = 'joint'
table_dir = 'director'
table_rat = 'rating'
table_link = 'joint_genres'
table_gen = 'genres'
view_core = 'Netflix_Movies'
view_dirs = 'Top_Directors'
view_gross = 'Gross_by_Dir'
view_bestdir = 'Best_Director'
### ----------------------------------------------------
### Connect to the database
try:
conn = sqlite3.connect(database)
except:
print(f'Could not connect to the database to drop table')
### ----------------------------------------------------
### ----------------------------------------------------
### This query will pull out all of the relevant information
### from the table we added to the database, which was the
### union between the netflix and imdb base tables. This
### query also pulls out the information from the tables
### created for the names of the directors, genres, and
### ratings associated with each movie.
full_query = f"""SELECT mov.Title AS 'Title',
mov.Year_Released AS 'Year',
dir.name AS 'Director',
rat.rating AS 'Rating',
GROUP_CONCAT(gen.Genre,', ') AS 'Genres',
mov.Runtime AS 'Run Time',
mov.IMDB_Score AS 'IMDB Score',
mov.Meta_Score AS 'Meta Score',
mov.Votes AS 'Votes',
mov.Gross AS 'Box Office (Gross)'
FROM {table_movies} AS mov
LEFT OUTER JOIN {table_dir} AS dir
ON mov.Director = dir.id
LEFT OUTER JOIN {table_rat} AS rat
ON mov.Rating = rat.id
INNER JOIN {table_link} AS link
ON mov.id = link.joint_id
JOIN {table_gen} AS gen
ON gen.id = link.genre_id
GROUP BY mov.Title
ORDER BY mov.Title;"""
### Drop and create/save a view based on the query
drop_view(conn, view_core)
command = f'CREATE VIEW {view_core} AS ' + full_query
create_view(conn, command)
# query = f'SELECT * FROM {view_core};'
# print(get_query(conn, query))
### ----------------------------------------------------
### ----------------------------------------------------
### This query pulls out the top directors, their movie
### count, the names of their movies, the average runtime
### of their movies, and the average "scores" of their
### movies in terms of website score, votes, and average/
### total gross at the box office. It uses GROUP BY
### to look at these values per director.
topdir_query = f"""SELECT dir.name AS 'Director',
COUNT(mov.Title) AS 'Total Movies',
GROUP_CONCAT(mov.Title,', ') AS 'Movies',
AVG(mov.Runtime) AS 'Average Run Time',
AVG(mov.IMDB_Score) AS 'Average IMDB Score',
AVG(mov.Meta_Score) AS 'Average Meta Score',
AVG(mov.Votes) AS 'Average Votes',
AVG(mov.Gross) AS 'Average Film Gross',
SUM(mov.Gross) AS 'Total Films Gross'
FROM {table_movies} AS mov
LEFT OUTER JOIN {table_dir} AS dir
ON mov.Director = dir.id
GROUP BY mov.Director
ORDER BY AVG(mov.Gross) DESC"""
### Drop and create/save a view based on the query
drop_view(conn, view_dirs)
command = f'CREATE VIEW {view_dirs} AS ' + topdir_query+';'
create_view(conn, command)
# query = f'SELECT * FROM {view_dirs};'
# print(get_query(conn, query))
### ----------------------------------------------------
### ----------------------------------------------------
### This query uses window functions to look at each film
### in the database and partition them by director, creating
### running totals for the count of movies they've done.
### We also track a running total of gross at the box office
### As well as their updated average they earned at the box
### per film. Entries (movies) are ordered by release year.
gross_query = f"""SELECT mov.Title as 'Film',
mov.Year_Released as 'Year',
dir.name AS 'Director',
mov.Gross AS 'Film Gross',
COUNT(mov.Title) OVER
(PARTITION BY dir.name ORDER BY mov.Year_Released)
AS 'No. Films',
AVG(mov.Gross) OVER
(PARTITION BY dir.name ORDER BY mov.Year_Released)
AS 'Average Gross',
SUM(mov.Gross) OVER
(PARTITION BY dir.name ORDER BY mov.Year_Released)
AS 'Total Films Gross'
FROM {table_movies} AS mov
LEFT OUTER JOIN {table_dir} AS dir
ON mov.Director = dir.id
ORDER BY mov.Year_Released;"""
### Drop and create/save a view based on the query
drop_view(conn, view_gross)
command = f'CREATE VIEW {view_gross} AS ' + gross_query
create_view(conn, command)
# query = f'SELECT * FROM {view_gross};'
# print(get_query(conn, query))
### ----------------------------------------------------
### ----------------------------------------------------
### THis query takes the previous "top directors" view
### and creates a CTE from this, then selecting the
### Director with the highest average box office pull
### per movie for directors with more than 1 movie in
### the database.
bestdir_query = f"""WITH topdirs AS ({topdir_query})
SELECT topdirs.Director
FROM topdirs
WHERE topdirs.'Total Movies' > 1
LIMIT 1;"""
### Drop and create/save a view based on the query
drop_view(conn, view_bestdir)
command = f'CREATE VIEW {view_bestdir} AS ' + bestdir_query
create_view(conn, command)
# query = f'SELECT * FROM {view_bestdir};'
# print(get_query(conn, query)[0][0])
### ----------------------------------------------------
conn.close()