Skip to content

Commit 8d8bd5a

Browse files
committed
another supporting blog
1 parent 73bd24b commit 8d8bd5a

1 file changed

Lines changed: 125 additions & 0 deletions

File tree

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "4bacd1ad-6658-4e69-b596-d9db5a0a2201",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"Buckaroo has been enabled as the default DataFrame viewer. To return to default dataframe visualization use `from buckaroo import disable; disable()`\n"
14+
]
15+
}
16+
],
17+
"source": [
18+
"import pandas as pd\n",
19+
"import polars as pl\n",
20+
"import buckaroo\n",
21+
"JULY_FILE = \"~/NPPES_Data_Dissemination_July_2025/npidata_pfile_20050523-20250713.csv\"\n"
22+
]
23+
},
24+
{
25+
"cell_type": "markdown",
26+
"id": "2c21c2b4-8d86-4d2e-a7df-5022a0ed296c",
27+
"metadata": {},
28+
"source": [
29+
"# Lets investigate this file\n",
30+
"We are going to use some unix command line utils. These are generally going to be very fast and memory efficient"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 4,
36+
"id": "8eabb58f-ce10-4ac3-b5eb-99e1a2ada843",
37+
"metadata": {},
38+
"outputs": [
39+
{
40+
"name": "stdout",
41+
"output_type": "stream",
42+
"text": [
43+
" 10G\t/Users/paddy/NPPES_Data_Dissemination_July_2025/npidata_pfile_20050523-20250713.csv\n"
44+
]
45+
}
46+
],
47+
"source": [
48+
"!du -h /Users/paddy/NPPES_Data_Dissemination_July_2025/npidata_pfile_20050523-20250713.csv"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": 5,
54+
"id": "1338b817-097d-4155-a131-cf5b011a8ccc",
55+
"metadata": {},
56+
"outputs": [
57+
{
58+
"name": "stdout",
59+
"output_type": "stream",
60+
"text": [
61+
"cat > /dev/null 0.04s user 1.67s system 43% cpu 3.995 total\n"
62+
]
63+
}
64+
],
65+
"source": [
66+
"!time cat /Users/paddy/NPPES_Data_Dissemination_July_2025/npidata_pfile_20050523-20250713.csv > /dev/null"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": 3,
72+
"id": "15037796-b479-493d-b9ee-00ddbe69189b",
73+
"metadata": {},
74+
"outputs": [
75+
{
76+
"name": "stdout",
77+
"output_type": "stream",
78+
"text": [
79+
" 9026997 /Users/paddy/NPPES_Data_Dissemination_July_2025/npidata_pfile_20050523-20250713.csv\n",
80+
"wc -l ~/NPPES_Data_Dissemination_July_2025/npidata_pfile_20050523-20250713.cs 4.12s user 1.70s system 87% cpu 6.675 total\n"
81+
]
82+
}
83+
],
84+
"source": [
85+
"!time wc -l ~/NPPES_Data_Dissemination_July_2025/npidata_pfile_20050523-20250713.csv"
86+
]
87+
},
88+
{
89+
"cell_type": "code",
90+
"execution_count": null,
91+
"id": "74450ee4-c29a-4230-8ed3-e2e3fa987485",
92+
"metadata": {},
93+
"outputs": [],
94+
"source": []
95+
}
96+
],
97+
"metadata": {
98+
"kernelspec": {
99+
"display_name": "Python 3 (ipykernel)",
100+
"language": "python",
101+
"name": "python3"
102+
},
103+
"language_info": {
104+
"codemirror_mode": {
105+
"name": "ipython",
106+
"version": 3
107+
},
108+
"file_extension": ".py",
109+
"mimetype": "text/x-python",
110+
"name": "python",
111+
"nbconvert_exporter": "python",
112+
"pygments_lexer": "ipython3",
113+
"version": "3.12.8"
114+
},
115+
"widgets": {
116+
"application/vnd.jupyter.widget-state+json": {
117+
"state": {},
118+
"version_major": 2,
119+
"version_minor": 0
120+
}
121+
}
122+
},
123+
"nbformat": 4,
124+
"nbformat_minor": 5
125+
}

0 commit comments

Comments
 (0)