Skip to content

Commit 02d8f8d

Browse files
committed
Merge branch 'master' of https://github.com/DataKnox/CodeSamples into master
2 parents 53414dd + faed94f commit 02d8f8d

2 files changed

Lines changed: 195 additions & 0 deletions

File tree

notebook/basic_query.json

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{
2+
"name": "basic_query",
3+
"properties": {
4+
"nbformat": 4,
5+
"nbformat_minor": 2,
6+
"bigDataPool": {
7+
"referenceName": "sparksynapse",
8+
"type": "BigDataPoolReference"
9+
},
10+
"sessionProperties": {
11+
"driverMemory": "28g",
12+
"driverCores": 4,
13+
"executorMemory": "28g",
14+
"executorCores": 4,
15+
"numExecutors": 2,
16+
"runAsWorkspaceSystemIdentity": true,
17+
"conf": {
18+
"spark.dynamicAllocation.enabled": "false",
19+
"spark.dynamicAllocation.minExecutors": "2",
20+
"spark.dynamicAllocation.maxExecutors": "2",
21+
"spark.autotune.trackingId": "127693b3-7914-4dd0-88d9-a1b87978a498"
22+
}
23+
},
24+
"metadata": {
25+
"saveOutput": true,
26+
"enableDebugMode": false,
27+
"kernelspec": {
28+
"name": "synapse_pyspark",
29+
"display_name": "Synapse PySpark"
30+
},
31+
"language_info": {
32+
"name": "python"
33+
},
34+
"a365ComputeOptions": {
35+
"id": "/subscriptions/444d5dc8-f2d8-4aa0-8b7c-20457469c20c/resourceGroups/knox_analytics/providers/Microsoft.Synapse/workspaces/knoxsynapse/bigDataPools/sparksynapse",
36+
"name": "sparksynapse",
37+
"type": "Spark",
38+
"endpoint": "https://knoxsynapse.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sparksynapse",
39+
"auth": {
40+
"type": "AAD",
41+
"authResource": "https://dev.azuresynapse.net"
42+
},
43+
"sparkVersion": "3.2",
44+
"nodeCount": 3,
45+
"cores": 4,
46+
"memory": 28,
47+
"automaticScaleJobs": false
48+
},
49+
"sessionKeepAliveTimeout": 30
50+
},
51+
"cells": [
52+
{
53+
"cell_type": "code",
54+
"metadata": {
55+
"microsoft": {
56+
"language": "python"
57+
},
58+
"collapsed": false
59+
},
60+
"source": [
61+
"%%pyspark\r\n",
62+
"df = spark.read.load('abfss://etlload@knoxlakegen2.dfs.core.windows.net/sales/SalesOrderDetail.csv',\r\n",
63+
"format='csv',\r\n",
64+
"header=True\r\n",
65+
")\r\n",
66+
"display(df.limit(10))"
67+
],
68+
"execution_count": 9
69+
},
70+
{
71+
"cell_type": "code",
72+
"metadata": {
73+
"jupyter": {
74+
"source_hidden": false,
75+
"outputs_hidden": false
76+
},
77+
"nteract": {
78+
"transient": {
79+
"deleting": false
80+
}
81+
}
82+
},
83+
"source": [
84+
""
85+
],
86+
"execution_count": null
87+
}
88+
]
89+
}
90+
}

notebook/define_schema.json

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
{
2+
"name": "define_schema",
3+
"properties": {
4+
"nbformat": 4,
5+
"nbformat_minor": 2,
6+
"bigDataPool": {
7+
"referenceName": "sparksynapse",
8+
"type": "BigDataPoolReference"
9+
},
10+
"sessionProperties": {
11+
"driverMemory": "28g",
12+
"driverCores": 4,
13+
"executorMemory": "28g",
14+
"executorCores": 4,
15+
"numExecutors": 2,
16+
"conf": {
17+
"spark.dynamicAllocation.enabled": "false",
18+
"spark.dynamicAllocation.minExecutors": "2",
19+
"spark.dynamicAllocation.maxExecutors": "2",
20+
"spark.autotune.trackingId": "d16d7171-9ff4-4a75-8ed7-895dae631513"
21+
}
22+
},
23+
"metadata": {
24+
"saveOutput": true,
25+
"enableDebugMode": false,
26+
"kernelspec": {
27+
"name": "synapse_pyspark",
28+
"display_name": "Synapse PySpark"
29+
},
30+
"language_info": {
31+
"name": "python"
32+
},
33+
"a365ComputeOptions": {
34+
"id": "/subscriptions/444d5dc8-f2d8-4aa0-8b7c-20457469c20c/resourceGroups/knox_analytics/providers/Microsoft.Synapse/workspaces/knoxsynapse/bigDataPools/sparksynapse",
35+
"name": "sparksynapse",
36+
"type": "Spark",
37+
"endpoint": "https://knoxsynapse.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sparksynapse",
38+
"auth": {
39+
"type": "AAD",
40+
"authResource": "https://dev.azuresynapse.net"
41+
},
42+
"sparkVersion": "3.2",
43+
"nodeCount": 3,
44+
"cores": 4,
45+
"memory": 28,
46+
"automaticScaleJobs": false
47+
},
48+
"sessionKeepAliveTimeout": 30
49+
},
50+
"cells": [
51+
{
52+
"cell_type": "code",
53+
"metadata": {
54+
"collapsed": false
55+
},
56+
"source": [
57+
"from pyspark.sql.types import *\r\n",
58+
"from pyspark.sql.functions import *\r\n",
59+
"\r\n",
60+
"salesSchema = StructType(\r\n",
61+
" [\r\n",
62+
" StructField(\"SalesOrderID\", IntegerType()),\r\n",
63+
" StructField(\"SalesOrderDetailID\",IntegerType()),\r\n",
64+
" StructField(\"CarrierTrackingNumber\",StringType()),\r\n",
65+
" StructField(\"OrderQty\",IntegerType()),\r\n",
66+
" StructField(\"ProductID\",IntegerType()),\r\n",
67+
" StructField(\"SpecialOfferID\",IntegerType()),\r\n",
68+
" StructField(\"UnitPrice\",FloatType()),\r\n",
69+
" StructField(\"UnitPriceDiscount\",FloatType()),\r\n",
70+
" StructField(\"LineTotal\",FloatType()),\r\n",
71+
" StructField(\"rowguid\",StringType()),\r\n",
72+
" StructField(\"ModifiedDate\",TimestampType())\r\n",
73+
" ]\r\n",
74+
")\r\n",
75+
"\r\n",
76+
"df = spark.read.load('abfss://etlload@knoxlakegen2.dfs.core.windows.net/sales/SalesOrderDetail.csv',\r\n",
77+
"format='csv',\r\n",
78+
"header=True,\r\n",
79+
"schema=salesSchema\r\n",
80+
")\r\n",
81+
"display(df)"
82+
],
83+
"execution_count": 11
84+
},
85+
{
86+
"cell_type": "code",
87+
"metadata": {
88+
"jupyter": {
89+
"source_hidden": false,
90+
"outputs_hidden": false
91+
},
92+
"nteract": {
93+
"transient": {
94+
"deleting": false
95+
}
96+
}
97+
},
98+
"source": [
99+
""
100+
],
101+
"execution_count": null
102+
}
103+
]
104+
}
105+
}

0 commit comments

Comments
 (0)