Skip to content

Commit 53414dd

Browse files
committed
add apache
1 parent 2a726be commit 53414dd

4 files changed

Lines changed: 228 additions & 0 deletions

File tree

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"%%pyspark\n",
10+
"df = spark.read.load('abfss://etlload@knoxlakegen2.dfs.core.windows.net/sales/SalesOrderDetail.csv',\n",
11+
"format='csv',\n",
12+
"header=True\n",
13+
")\n",
14+
"display(df.limit(10))"
15+
]
16+
}
17+
],
18+
"metadata": {
19+
"language_info": {
20+
"name": "python"
21+
},
22+
"orig_nbformat": 4
23+
},
24+
"nbformat": 4,
25+
"nbformat_minor": 2
26+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from pyspark.sql.types import *\n",
10+
"from pyspark.sql.functions import *\n",
11+
"\n",
12+
"salesSchema = StructType(\n",
13+
" [\n",
14+
" StructField(\"SalesOrderID\", IntegerType()),\n",
15+
" StructField(\"SalesOrderDetailID\",IntegerType()),\n",
16+
" StructField(\"CarrierTrackingNumber\",StringType()),\n",
17+
" StructField(\"OrderQty\",IntegerType()),\n",
18+
" StructField(\"ProductID\",IntegerType()),\n",
19+
" StructField(\"SpecialOfferID\",IntegerType()),\n",
20+
" StructField(\"UnitPrice\",FloatType()),\n",
21+
" StructField(\"UnitPriceDiscount\",FloatType()),\n",
22+
" StructField(\"LineTotal\",FloatType()),\n",
23+
" StructField(\"rowguid\",StringType()),\n",
24+
" StructField(\"ModifiedDate\",TimestampType())\n",
25+
" ]\n",
26+
")\n",
27+
"\n",
28+
"df = spark.read.load('abfss://etlload@knoxlakegen2.dfs.core.windows.net/sales/SalesOrderDetail.csv',\n",
29+
"format='csv',\n",
30+
"header=True,\n",
31+
"schema=salesSchema\n",
32+
")\n",
33+
"display(df)"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": null,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"df.createOrReplaceTempView(\"vw_sales\")"
43+
]
44+
}
45+
],
46+
"metadata": {
47+
"language_info": {
48+
"name": "python"
49+
},
50+
"orig_nbformat": 4
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 2
54+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from pyspark.sql.types import *\n",
10+
"from pyspark.sql.functions import *\n",
11+
"\n",
12+
"salesSchema = StructType(\n",
13+
" [\n",
14+
" StructField(\"SalesOrderID\", IntegerType()),\n",
15+
" StructField(\"SalesOrderDetailID\",IntegerType()),\n",
16+
" StructField(\"CarrierTrackingNumber\",StringType()),\n",
17+
" StructField(\"OrderQty\",IntegerType()),\n",
18+
" StructField(\"ProductID\",IntegerType()),\n",
19+
" StructField(\"SpecialOfferID\",IntegerType()),\n",
20+
" StructField(\"UnitPrice\",FloatType()),\n",
21+
" StructField(\"UnitPriceDiscount\",FloatType()),\n",
22+
" StructField(\"LineTotal\",FloatType()),\n",
23+
" StructField(\"rowguid\",StringType()),\n",
24+
" StructField(\"ModifiedDate\",TimestampType())\n",
25+
" ]\n",
26+
")\n",
27+
"\n",
28+
"df = spark.read.load('abfss://etlload@knoxlakegen2.dfs.core.windows.net/sales/SalesOrderDetail.csv',\n",
29+
"format='csv',\n",
30+
"header=True,\n",
31+
"schema=salesSchema\n",
32+
")\n",
33+
"display(df)"
34+
]
35+
}
36+
],
37+
"metadata": {
38+
"language_info": {
39+
"name": "python"
40+
},
41+
"orig_nbformat": 4
42+
},
43+
"nbformat": 4,
44+
"nbformat_minor": 2
45+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from pyspark.sql.types import *\n",
10+
"from pyspark.sql.functions import *\n",
11+
"\n",
12+
"salesSchema = StructType(\n",
13+
" [\n",
14+
" StructField(\"SalesOrderID\", IntegerType()),\n",
15+
" StructField(\"SalesOrderDetailID\",IntegerType()),\n",
16+
" StructField(\"CarrierTrackingNumber\",StringType()),\n",
17+
" StructField(\"OrderQty\",IntegerType()),\n",
18+
" StructField(\"ProductID\",IntegerType()),\n",
19+
" StructField(\"SpecialOfferID\",IntegerType()),\n",
20+
" StructField(\"UnitPrice\",FloatType()),\n",
21+
" StructField(\"UnitPriceDiscount\",FloatType()),\n",
22+
" StructField(\"LineTotal\",FloatType()),\n",
23+
" StructField(\"rowguid\",StringType()),\n",
24+
" StructField(\"ModifiedDate\",TimestampType())\n",
25+
" ]\n",
26+
")\n",
27+
"\n",
28+
"df = spark.read.load('abfss://etlload@knoxlakegen2.dfs.core.windows.net/sales/SalesOrderDetail.csv',\n",
29+
"format='csv',\n",
30+
"header=True,\n",
31+
"schema=salesSchema\n",
32+
")\n",
33+
"display(df)"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": null,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"df.createOrReplaceTempView(\"vw_sales\")"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": null,
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"sales_df = spark.sql(\"SELECT * FROM vw_sales\")\n",
52+
"display(sales_df)"
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": 1,
58+
"metadata": {},
59+
"outputs": [
60+
{
61+
"name": "stderr",
62+
"output_type": "stream",
63+
"text": [
64+
"UsageError: Cell magic `%%sql` not found.\n"
65+
]
66+
}
67+
],
68+
"source": [
69+
"%%sql\n",
70+
"\n",
71+
"SELECT *\n",
72+
"FROM vw_sales"
73+
]
74+
}
75+
],
76+
"metadata": {
77+
"kernelspec": {
78+
"display_name": "Python 3.9.7 64-bit",
79+
"language": "python",
80+
"name": "python3"
81+
},
82+
"language_info": {
83+
"codemirror_mode": {
84+
"name": "ipython",
85+
"version": 3
86+
},
87+
"file_extension": ".py",
88+
"mimetype": "text/x-python",
89+
"name": "python",
90+
"nbconvert_exporter": "python",
91+
"pygments_lexer": "ipython3",
92+
"version": "3.9.7"
93+
},
94+
"orig_nbformat": 4,
95+
"vscode": {
96+
"interpreter": {
97+
"hash": "0866b0178eadfcb04b475af301c22703d9d67512be1b7043443a666c4451acde"
98+
}
99+
}
100+
},
101+
"nbformat": 4,
102+
"nbformat_minor": 2
103+
}

0 commit comments

Comments
 (0)