|
| 1 | +^{:kindly/hide-code true |
| 2 | + :clay {:title "Exploring Heart Rate Variability" |
| 3 | + :external-requirements ["WESAD dataset at /workspace/datasets/WESAD/"] |
| 4 | + :quarto {:author :ludgersolbach |
| 5 | + :draft true |
| 6 | + :type :post |
| 7 | + :date "2025-10-17" |
| 8 | + :tags [:data-analysis :noj]}}} |
| 9 | +(ns data-analysis.heart-rate-variability.exploring-heart-rate-variability |
| 10 | + (:require [tablecloth.api :as tc] |
| 11 | + [scicloj.tableplot.v1.plotly :as plotly] |
| 12 | + [tech.v3.datatype :as dtype] |
| 13 | + [tech.v3.datatype.functional :as dfn] |
| 14 | + [tech.v3.tensor :as tensor] |
| 15 | + [tech.v3.datatype.datetime :as dt-datetime] |
| 16 | + [tablecloth.api :as tc] |
| 17 | + [tablecloth.column.api :as tcc] |
| 18 | + [fastmath.stats :as stats] |
| 19 | + [clojure.math :as math] |
| 20 | + [fastmath.interpolation :as interp] |
| 21 | + [libpython-clj2.require :refer [require-python]] |
| 22 | + [libpython-clj2.python :refer [py. py.. py.-] :as py] |
| 23 | + [libpython-clj2.python.np-array] |
| 24 | + [tech.v3.parallel.for :as pfor] |
| 25 | + [scicloj.kindly.v4.kind :as kind] |
| 26 | + [scicloj.tableplot.v1.plotly :as plotly] |
| 27 | + [java-time.api :as jt])) |
| 28 | + |
| 29 | + |
| 30 | +;; # Exploring HRV |
| 31 | + |
| 32 | +(ns data-analysis.heart-rate-variability.exploring-heart-rate-variability |
| 33 | + (:require [tech.v3.datatype :as dtype] |
| 34 | + [tech.v3.datatype.functional :as dfn] |
| 35 | + [tech.v3.tensor :as tensor] |
| 36 | + [tech.v3.datatype.datetime :as dt-datetime] |
| 37 | + [tablecloth.api :as tc] |
| 38 | + [tablecloth.column.api :as tcc] |
| 39 | + [fastmath.stats :as stats] |
| 40 | + [clojure.math :as math] |
| 41 | + [fastmath.interpolation :as interp] |
| 42 | + [libpython-clj2.require :refer [require-python]] |
| 43 | + [libpython-clj2.python :refer [py. py.. py.-] :as py] |
| 44 | + [libpython-clj2.python.np-array] |
| 45 | + [tech.v3.parallel.for :as pfor] |
| 46 | + [scicloj.kindly.v4.kind :as kind] |
| 47 | + [scicloj.tableplot.v1.plotly :as plotly] |
| 48 | + [java-time.api :as jt]) |
| 49 | + (:import [com.github.psambit9791.jdsp.signal CrossCorrelation] |
| 50 | + [com.github.psambit9791.jdsp.signal.peaks FindPeak] |
| 51 | + [com.github.psambit9791.jdsp.filter Butterworth] |
| 52 | + [com.github.psambit9791.jdsp.transform DiscreteFourier])) |
| 53 | + |
| 54 | + |
| 55 | +;; ## My pulse-to-pulse intervals |
| 56 | + |
| 57 | + |
| 58 | +;; (extracted from PPG data) |
| 59 | + |
| 60 | + |
| 61 | +(def my-ppi |
| 62 | + (-> (tc/dataset "src/data_analysis/heart_rate_variability/ppi-series.csv" |
| 63 | + {:key-fn keyword}) |
| 64 | + (tc/map-columns :t |
| 65 | + :t |
| 66 | + (fn [t] |
| 67 | + (-> t |
| 68 | + jt/to-millis-from-epoch |
| 69 | + (/ 1000.0)))))) |
| 70 | + |
| 71 | +(delay |
| 72 | + (-> my-ppi |
| 73 | + (plotly/base {:=x :t |
| 74 | + :=height 300 :=width 700}) |
| 75 | + (plotly/layer-bar {:=y :ppi}))) |
| 76 | + |
| 77 | + |
| 78 | + |
| 79 | +(def compute-spectrograms |
| 80 | + (fn [ppi-ds {:keys [sampling-rate |
| 81 | + window-size-in-sec ]}] |
| 82 | + (let [spline (interp/interpolation |
| 83 | + :cubic |
| 84 | + (:t ppi-ds) |
| 85 | + (:ppi ppi-ds)) |
| 86 | + resampled-ppi (-> {:t (tcc/* (range 50 |
| 87 | + (* (int (tcc/reduce-max (:t ppi-ds))) |
| 88 | + sampling-rate)) |
| 89 | + (/ 1.0 sampling-rate))} |
| 90 | + tc/dataset |
| 91 | + (tc/map-columns :ppi :t spline)) |
| 92 | + bw (com.github.psambit9791.jdsp.filter.Butterworth. |
| 93 | + sampling-rate) |
| 94 | + n (tc/row-count resampled-ppi) |
| 95 | + window-size (* window-size-in-sec sampling-rate) |
| 96 | + hop-size 8 |
| 97 | + n-windows (int (/ (- n window-size) |
| 98 | + hop-size)) |
| 99 | + spectrograms (->> (range n-windows) |
| 100 | + (pfor/pmap (fn [w] |
| 101 | + (let [start-idx (* w hop-size) |
| 102 | + window (-> resampled-ppi |
| 103 | + :ppi |
| 104 | + (dtype/sub-buffer start-idx window-size)) |
| 105 | + window-standardized (stats/standardize window) |
| 106 | + window-filtered (.bandPassFilter bw |
| 107 | + (double-array window-standardized) |
| 108 | + 4 |
| 109 | + 0 |
| 110 | + 0.4) |
| 111 | + fft (DiscreteFourier. (double-array window-filtered)) |
| 112 | + _ (.transform fft) |
| 113 | + whole-magnitude (.getMagnitude fft true)] |
| 114 | + {:t (* w hop-size (/ 1.0 sampling-rate)) |
| 115 | + :whole-magnitude whole-magnitude |
| 116 | + :magnitude (take 40 whole-magnitude)}))) |
| 117 | + vec)] |
| 118 | + {:sampling-rate sampling-rate |
| 119 | + :resampled-ppi resampled-ppi |
| 120 | + :spectrograms spectrograms}))) |
| 121 | + |
| 122 | + |
| 123 | +(comment |
| 124 | + (compute-spectrograms my-ppi |
| 125 | + {:sampling-rate 10 |
| 126 | + :window-size-in-sec 60})) |
| 127 | + |
| 128 | + |
| 129 | + |
| 130 | +;; [An Overview of Heart Rate Variability Metrics and Norms](https://pmc.ncbi.nlm.nih.gov/articles/PMC5624990/) |
| 131 | +;; by Fred Shaffer, & J P Ginsberg. |
| 132 | +;; doi: [10.3389/fpubh.2017.00258](https://www.frontiersin.org/journals/public-health/articles/10.3389/fpubh.2017.00258/full) |
| 133 | + |
| 134 | +(defn LF-to-HF [freqs spectrogram] |
| 135 | + (let [ds (tc/dataset {:f freqs |
| 136 | + :s (:magnitude spectrogram)} |
| 137 | + tc/dataset)] |
| 138 | + (/ (-> ds |
| 139 | + (tc/select-rows #(<= 0.04 (% :f) 0.15)) |
| 140 | + :s |
| 141 | + tcc/sum) |
| 142 | + (-> ds |
| 143 | + (tc/select-rows #(<= 0.15 (% :f) 0.4)) |
| 144 | + :s |
| 145 | + tcc/sum)))) |
| 146 | + |
| 147 | + |
| 148 | + |
| 149 | +(defn plot-with-power-spectrum [{:keys [sampling-rate |
| 150 | + resampled-ppi |
| 151 | + spectrograms]}] |
| 152 | + (when spectrograms |
| 153 | + (kind/fragment |
| 154 | + (let [n (-> spectrograms first :magnitude count) |
| 155 | + Nyquist-freq (/ sampling-rate 2.0) |
| 156 | + freq-resolution (/ Nyquist-freq n) |
| 157 | + times (map (comp str :t) spectrograms) |
| 158 | + freqs (tcc/* (range n) |
| 159 | + freq-resolution)] |
| 160 | + [(-> resampled-ppi |
| 161 | + (plotly/base {:=height 300 :=width 700}) |
| 162 | + (plotly/layer-bar (merge {:=x :t |
| 163 | + :=y :ppi} |
| 164 | + (when (:label resampled-ppi) |
| 165 | + {:=color :label |
| 166 | + :=color-type :nominal})))) |
| 167 | + (kind/plotly |
| 168 | + {:data [{:x times |
| 169 | + :y freqs |
| 170 | + :z (-> (mapv :magnitude spectrograms) |
| 171 | + (tensor/transpose [1 0])) |
| 172 | + :type :heatmap |
| 173 | + :showscale false}] |
| 174 | + :layout {:height 300 |
| 175 | + :width 700 |
| 176 | + :margin {:t 25} |
| 177 | + :xaxis {:title {:text "t"}} |
| 178 | + :yaxis {:title {:text "freq"}}}}) |
| 179 | + (-> {:freq freqs |
| 180 | + :mean-power (-> spectrograms |
| 181 | + (->> (map :magnitude)) |
| 182 | + tensor/->tensor |
| 183 | + (tensor/reduce-axis dfn/mean 0))} |
| 184 | + tc/dataset |
| 185 | + (plotly/base {:=height 300 :=width 700}) |
| 186 | + (plotly/layer-bar {:=x :freq |
| 187 | + :=y :mean-power})) |
| 188 | + (-> {:t times |
| 189 | + :LF-to-HF (->> spectrograms |
| 190 | + (map (partial LF-to-HF freqs)))} |
| 191 | + tc/dataset |
| 192 | + (plotly/base {:=height 300 :=width 700}) |
| 193 | + (plotly/layer-line {:=x :t |
| 194 | + :=y :LF-to-HF}) |
| 195 | + plotly/plot |
| 196 | + (assoc-in [:layout :yaxis :range] [0 4]) |
| 197 | + (assoc-in [:layout :yaxis :title] {:text "LF/HF"}))])))) |
| 198 | + |
| 199 | + |
| 200 | +(delay |
| 201 | + (-> my-ppi |
| 202 | + (compute-spectrograms {:sampling-rate 10 |
| 203 | + :window-size-in-sec 60}) |
| 204 | + plot-with-power-spectrum)) |
| 205 | + |
| 206 | + |
| 207 | +;; ## Analysing ECG data |
| 208 | + |
| 209 | +;; ### The [WESAD](https://dl.acm.org/doi/10.1145/3242969.3242985) dataset |
| 210 | + |
| 211 | +(def WESAD-sampling-rate 700) |
| 212 | + |
| 213 | +(require-python '[pickle :as pkl] |
| 214 | + '[pandas :as pd] |
| 215 | + '[builtins]) |
| 216 | + |
| 217 | +(defn load-pickle [filename] |
| 218 | + "Load object from pickle file" |
| 219 | + (py/with [f (builtins/open filename "rb")] |
| 220 | + (pkl/load f :encoding "latin"))) |
| 221 | + |
| 222 | +(def labelled-data |
| 223 | + (memoize |
| 224 | + (fn [subject] |
| 225 | + (load-pickle (format "/workspace/datasets/WESAD/WESAD/S%d/S%d.pkl" |
| 226 | + subject subject))))) |
| 227 | + |
| 228 | +(def labelled-dataset |
| 229 | + (memoize |
| 230 | + (fn [subject] |
| 231 | + (let [ld (labelled-data subject)] |
| 232 | + (tc/dataset {:t (tcc/* (range) |
| 233 | + (/ 1.0 WESAD-sampling-rate)) |
| 234 | + :ECG (-> ld |
| 235 | + (get-in [:signal :chest :ECG]) |
| 236 | + (py. flatten)) |
| 237 | + :label (-> ld |
| 238 | + (get :label))}))))) |
| 239 | + |
| 240 | +(delay |
| 241 | + (labelled-dataset 5)) |
| 242 | + |
| 243 | +;; ### Finding peaks |
| 244 | + |
| 245 | +;; [Pan-Tompkins Algorithm](https://en.wikipedia.org/wiki/Pan%E2%80%93Tompkins_algorithm) |
| 246 | + |
| 247 | +;; [Unupervised ECG QRS Detection](https://hooman650.github.io/ECG-QRS.html) by Hooman SedghamizHooman Sedghamiz |
| 248 | + |
| 249 | +;; scipy: `peaks = signal.find_peaks(signal, height=mean, distance=200)` |
| 250 | +;; JDSP equivalent: |
| 251 | +(defn find-peaks [signal {:keys [distance]}] |
| 252 | + (let [signal-array (double-array signal) |
| 253 | + fp (FindPeak. signal-array) |
| 254 | + peak-obj (.detectPeaks fp) |
| 255 | + signal-mean (dfn/mean signal-array) |
| 256 | + ;; Filter by (lower=mean, upper=nil for no upper limit) |
| 257 | + height-filtered (.filterByHeight peak-obj signal-mean nil) |
| 258 | + ;; Then filter by distance |
| 259 | + final-peaks (.filterByPeakDistance peak-obj height-filtered distance)] |
| 260 | + final-peaks)) ; Returns int[] of peak row-numbers |
| 261 | + |
| 262 | + |
| 263 | +(delay |
| 264 | + (let [bw (com.github.psambit9791.jdsp.filter.Butterworth. |
| 265 | + WESAD-sampling-rate) |
| 266 | + raw (labelled-dataset 5) |
| 267 | + pipeline (-> raw |
| 268 | + (tc/head 50000) |
| 269 | + (tc/add-column :filtered |
| 270 | + #(.bandPassFilter bw |
| 271 | + (double-array (:ECG %)) |
| 272 | + 4 |
| 273 | + 5 15)) |
| 274 | + (tc/add-column :sqdiff |
| 275 | + #(tcc/sq |
| 276 | + (tcc/- |
| 277 | + (:filtered %) |
| 278 | + (tcc/shift (:filtered %) 1)))) |
| 279 | + (tc/add-column :peak #(let [peaks (set |
| 280 | + (find-peaks (:sqdiff %) |
| 281 | + {:distance 200}))] |
| 282 | + (->> (tc/row-count %) |
| 283 | + range |
| 284 | + (map (fn [i] |
| 285 | + (if (peaks i) |
| 286 | + 1 |
| 287 | + nil)))))))] |
| 288 | + (-> pipeline |
| 289 | + (tc/head 5000) |
| 290 | + (plotly/base {:=x :t}) |
| 291 | + (plotly/layer-line {:=y :ECG |
| 292 | + :=name "raw ECG"}) |
| 293 | + (plotly/layer-line {:=y :filtered |
| 294 | + :=name "filtered"}) |
| 295 | + (plotly/layer-line {:=y :sqdiff |
| 296 | + :=name "squared difference"}) |
| 297 | + (plotly/layer-point {:=y :peak |
| 298 | + :=name "peak"})))) |
| 299 | + |
| 300 | + |
| 301 | +(defn extract-ppi |
| 302 | + "Extract peak-to-peak intervals from ECG signal. |
| 303 | + Returns dataset with columns: :t (time in seconds), :ppi (interval in seconds)" |
| 304 | + [{:keys [subject-id row-interval]}] |
| 305 | + (let [bw (Butterworth. WESAD-sampling-rate) |
| 306 | + raw (labelled-dataset subject-id) |
| 307 | + pipeline (-> raw |
| 308 | + (cond-> row-interval |
| 309 | + (tc/select-rows (apply range row-interval))) |
| 310 | + ;; Bandpass filter 5-15 Hz for QRS detection |
| 311 | + (tc/add-column :filtered |
| 312 | + #(.bandPassFilter bw |
| 313 | + (double-array (:ECG %)) |
| 314 | + 4 5 15)) |
| 315 | + ;; Differentiate and square |
| 316 | + (tc/add-column :sqdiff |
| 317 | + #(tcc/- (:filtered %) |
| 318 | + (tcc/shift (:filtered %) 1)))) |
| 319 | + ;; Find peaks with distance constraint (200 samples = ~0.29s) |
| 320 | + peak-indices (find-peaks (:sqdiff pipeline) |
| 321 | + {:distance 200}) |
| 322 | + peak-times (tcc/* peak-indices (/ 1.0 WESAD-sampling-rate))] |
| 323 | + (-> {:t peak-times} |
| 324 | + tc/dataset |
| 325 | + ;; Calculate peak-to-peak intervals |
| 326 | + (tc/add-column :ppi #(tcc/- (:t %) |
| 327 | + (tcc/shift (:t %) 1))) |
| 328 | + (tc/drop-rows [0])))) |
| 329 | + |
| 330 | + |
| 331 | +;; ### Plotting the PPI |
| 332 | + |
| 333 | +(delay |
| 334 | + (-> (extract-ppi {:subject-id 5 |
| 335 | + :row-interval [0 200000]}) |
| 336 | + (plotly/layer-bar {:=x :t |
| 337 | + :=y :ppi}))) |
| 338 | + |
| 339 | +(delay |
| 340 | + (-> (extract-ppi {:subject-id 5}) |
| 341 | + (plotly/layer-bar {:=x :t |
| 342 | + :=y :ppi}))) |
| 343 | + |
| 344 | +;; ## Spectrograms again |
| 345 | + |
| 346 | +(def WESAD-spectrograms |
| 347 | + (memoize |
| 348 | + (fn [{:keys [ppi-params spectrogram-params]}] |
| 349 | + (-> ppi-params |
| 350 | + extract-ppi |
| 351 | + (compute-spectrograms spectrogram-params))))) |
| 352 | + |
| 353 | + |
| 354 | +(delay |
| 355 | + (-> {:ppi-params {:subject-id 5 |
| 356 | + :row-interval [0 1000000]} |
| 357 | + :spectrogram-params {:sampling-rate 10 |
| 358 | + :window-size-in-sec 120}} |
| 359 | + WESAD-spectrograms |
| 360 | + plot-with-power-spectrum)) |
| 361 | + |
| 362 | + |
0 commit comments