NatLabRockies · daniel-thom · Jun 14, 2026 · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,52 +11,46 @@ env:
   DEFAULT_OS: ubuntu-latest
 
 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          python-version: "3.12"
+          enable-cache: false
+      - name: Install Python project
+        run: uv sync --extra dev --frozen
+      - name: Run Ruff lint
+        run: uv run ruff check .
+      - name: Check Ruff formatting
+        run: uv run ruff format --check .
+      - name: Run ty
+        run: uv run ty check
+
   pytest:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: ["3.12", "3.13"]
         os: [ubuntu-latest]
-
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install ".[dev]"
-    - name: Run pytest with coverage
-      run: |
-        pytest -v -m "not integration" --cov --cov-report=xml
-    - name: codecov
-      uses: codecov/codecov-action@v4.2.0
-      if: ${{ matrix.os == env.DEFAULT_OS && matrix.python-version == env.DEFAULT_PYTHON  }}
-      with:
-        token: ${{ secrets.CODECOV_TOKEN }}
-        name: sparkctl-tests
-        fail_ci_if_error: false
-        verbose: true
-  mypy:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: 3.12
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install ".[dev]"
-        mypy
-  ruff:
-    runs-on: ubuntu-latest
-    name: "ruff"
     steps:
       - uses: actions/checkout@v4
-      - uses: chartboost/ruff-action@v1
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          python-version: ${{ matrix.python-version }}
+          enable-cache: false
+      - name: Install Python project
+        run: uv sync --extra dev --frozen
+      - name: Run pytest with coverage
+        run: uv run pytest -v -m "not integration" --cov --cov-report=xml
+      - name: codecov
+        uses: codecov/codecov-action@v4.2.0
+        if: ${{ matrix.os == env.DEFAULT_OS && matrix.python-version == env.DEFAULT_PYTHON }}
         with:
-          src: "./src"
+          token: ${{ secrets.CODECOV_TOKEN }}
+          name: sparkctl-tests
+          fail_ci_if_error: false
+          verbose: true
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -9,19 +9,18 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - name: select python version
-        uses: actions/setup-python@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
         with:
           python-version: "3.12"
-      - name: install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install ".[dev]"
+          enable-cache: false
+      - name: Install Python project
+        run: uv sync --extra dev --frozen
       - name: build documentation
         run: |
           cd docs
-          make clean
-          make html
+          uv run make clean
+          uv run make html
       - name: deploy
         uses: peaceiris/actions-gh-pages@v3.6.1
         with:

diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml
@@ -11,16 +11,12 @@ jobs:
         id-token: write
       steps:    
       - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
         with:
           python-version: "3.12"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install build
-      - name: Build and publish
-        run: |
-          python -m build
+          enable-cache: false
+      - name: Build
+        run: uv build
       - name: Publish package distributions to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
@@ -133,7 +133,7 @@ dmypy.json
 .vscode
 
 tests/data/apache-hive-4.0.1-bin*
-tests/data/spark-4.1.2-bin-hadoop3*
+tests/data/spark-*-bin-hadoop3*
 tests/data/postgresql*
 tests/data/jdk-21.0.7.jdk*
 conf

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,15 +1,16 @@
 repos:
-- repo: https://github.com/astral-sh/ruff-pre-commit
-  # Ruff version.
-  rev: v0.2.1
-  hooks:
-    # Run the linter.
-    - id: ruff
-      args: [ --fix ]
-    # Run the formatter.
-    - id: ruff-format
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v1.13.0
-  hooks:
-  - id: mypy
-    language: system
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.15.8
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
+
+  - repo: local
+    hooks:
+      - id: ty
+        name: ty (type check)
+        entry: uv run ty check
+        language: system
+        types: [python]
+        pass_filenames: false
diff --git a/README.md b/README.md
@@ -49,24 +49,40 @@ support.
 Contributions are welcome.
 
 ## Development
-Install the package with its development dependencies:
+This project uses [uv](https://docs.astral.sh/uv/) for environment management. Install the
+package with its development dependencies:
 ```console
-$ pip install -e ".[dev]"
+$ uv sync --extra dev
+```
+
+Lint, format, and type-check the code with [ruff](https://docs.astral.sh/ruff/) and
+[ty](https://github.com/astral-sh/ty):
+```console
+$ uv run ruff check .
+$ uv run ruff format --check .
+$ uv run ty check
+```
+
+These checks also run as Git hooks via [prek](https://github.com/j178/prek). Install the hooks
+once and then run them on demand:
+```console
+$ uv run prek install
+$ uv run prek run --all-files
 ```
 
 Run the unit tests. These are fast, require no special resources, and are what CI runs:
 ```console
-$ pytest -m "not integration"
+$ uv run pytest -m "not integration"
 ```
 
 The integration tests download a real Spark and Java distribution into `tests/data/` and start a
 real single-node Spark cluster, so they are slower and require network access and sufficient
 memory. They are excluded from CI; run them locally with:
 ```console
-$ pytest -m integration
+$ uv run pytest -m integration
 ```
 
-Run the complete suite (unit and integration tests) with `pytest`.
+Run the complete suite (unit and integration tests) with `uv run pytest`.
 
 ## License
 sparkctl is released under a BSD 3-Clause [license](https://github.com/NatLabRockies/sparkctl/blob/main/LICENSE).

diff --git a/docs/explanation/index.md b/docs/explanation/index.md
@@ -94,13 +94,19 @@ This TOML file stores environment-specific settings that rarely change:
 
 ```toml
 [binaries]
-spark_home = "/path/to/spark"
-java_home = "/path/to/java"
+spark_path = "/path/to/spark"
+java_path = "/path/to/java"
 ```
 
 These settings tell sparkctl where to find Spark and Java. You can also set global settings that
 apply every time you run `sparkctl configure`. For example, if you always want to use Spark Connect,
-you can set `spark_connect_server = true` and avoid having to set it each time you configure.
+you can set `start_connect_server = true` in a `[runtime]` section and avoid having to set it each
+time you configure:
+
+```toml
+[runtime]
+start_connect_server = true
+```
 
 ### Runtime Configuration (`./conf/`)
 

diff --git a/docs/faq.md b/docs/faq.md
@@ -70,24 +70,26 @@ export the relevant variables yourself, for example through `spark-env.sh` in th
 
 If you are running pyspark/spark-submit after installing via `pip install sparkctl[pyspark]`,
 your version of pyspark must match the cluster version exactly. Client version 4.1.3 is
-incompatible with cluster version 4.1.2.
+incompatible with cluster version 4.1.1.
 
 ### Why can't my workers connect to the master?
 
 Common causes:
 
 1. **High-bandwidth nodes**: Some NLR Kestrel compute nodes have two network cards, which Spark
-   cannot deal with. Set `--constaint lbw` when allocating nodes.
+   cannot deal with. Set `--constraint lbw` when allocating nodes.
 
 Check the Spark master logs in `./spark_scratch/logs/` for connection errors.
 
 ### How do I connect to the Spark Web UI?
 
-The Spark master runs a web UI on port 4040 (driver) or 8080 (master). Since HPC compute nodes
-aren't directly accessible, use SSH tunneling:
+Spark runs a web UI on port 8080 (master) and port 4040 (driver/application). Since HPC compute
+nodes aren't directly accessible, use SSH tunneling. Substitute the name of your compute node
+(it is listed in `./conf/workers`) for `$COMPUTE_NODE`:
 
 ```console
-$ ssh -L 8080:$(hostname):8080 user@hpc-login-node
+$ export COMPUTE_NODE=<your-compute-node-name>
+$ ssh -L 8080:$COMPUTE_NODE:8080 -L 4040:$COMPUTE_NODE:4040 user@hpc-login-node
 ```
 
 Then open `http://localhost:8080` or `http://localhost:4040` in your browser.
@@ -104,13 +106,13 @@ Common causes:
 3. **Too few partitions**: Increase `spark.sql.shuffle.partitions`.
 4. **Too many partitions**: Decrease partitions if you have many small tasks.
 5. **Slow storage**: Ensure shuffle storage uses fast local SSDs, not shared filesystem.
+6. **Non-ideal partitioning**: If you are trying to partition-by-column in the same query as your
+   main work, especially where you significantly increased the shuffle partitions, persist your
+   main work first. Then repartition in a second task.
 7. **Query too complex**: If you are trying to run a very complex query where subtasks have
    different data sizes and partitioning needs, consider breaking the query into smaller parts with
    different settings. Persist intermediate results to the filesystem so that you can checkpoint and
    make incremental progress.
-6. **Non-ideal partitioning**: If you are trying to partition-by-column in the same query as your
-   main work, especially where you significantly increased the shuffle partitions, persist your
-   main work first. Then repartition in a second task.
 
 See the {ref}`how-tos-debugging` for performance troubleshooting.
 

diff --git a/docs/how_tos/applications/hive_metastore.md b/docs/how_tos/applications/hive_metastore.md
@@ -26,6 +26,6 @@ start the server. Apptainer will cache the container image and you can reuse the
 across Slurm allocations.
 
 **Note**: The metadata about your tables will be stored in Derby or Postgres. Your tables will
-be stored on the filesystem (Parquet files by default) in a directory called `spark_warehouse`,
+be stored on the filesystem (Parquet files by default) in a directory called `spark-warehouse`,
 which gets created in the directory passed to `--metastore-dir` (current directory by default).
 Postgres data, if enabled, will be in the same directory (`pg_data`).
diff --git a/docs/how_tos/applications/index.md b/docs/how_tos/applications/index.md
@@ -7,4 +7,5 @@
 
     hive_metastore
     tableau
+    jupyter
 ```
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,4 +7,5 @@ @@
         hive_metastore
         tableau
+        jupyter
     ```