Skip to content

Commit 84e8cff

Browse files
committed
merge cleanup
2 parents 2a5b589 + 5a4fcd3 commit 84e8cff

9 files changed

Lines changed: 130 additions & 77 deletions

File tree

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ The data to be analyzed should be stored in two pandas Series of the same size,
2828
rta.fit(rt, accuracy)
2929
```
3030

31-
The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.meanrt_` and `rta.meanacc_`.
31+
The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.mean_rt_` and `rta.mean_accuracy_`.
3232

3333
## Test 1: A simple smoke test
3434

@@ -95,8 +95,8 @@ def test_rtanalysis_fit():
9595
rta = RTAnalysis()
9696
rta.fit(test_df.rt, test_df.accuracy)
9797

98-
assert np.allclose(meanRT, rta.meanrt_)
99-
assert np.allclose(meanAcc, rta.meanacc_)
98+
assert np.allclose(meanRT, rta.mean_rt_)
99+
assert np.allclose(meanAcc, rta.mean_accuracy_)
100100
```
101101

102102
We generate the data with known mean and accuracy values, fit the model using our function, and then confirm that our estimates are basically equal to the actual values. We use `np.allclose()` rather than a test for equality because sometimes the values will be off by a very small amount due to the numerical precision of the computer; an equality test would treat those as different, but `np.allclose` allows some tolerance in its test.
@@ -174,8 +174,8 @@ def simulated_data(params):
174174
def test_rtanalysis_fit(simulated_data, params):
175175
rta = RTAnalysis()
176176
rta.fit(simulated_data.rt, simulated_data.accuracy)
177-
assert np.allclose(params['meanRT'], rta.meanrt_)
178-
assert np.allclose(params['meanAcc'], rta.meanacc_)
177+
assert np.allclose(params['meanRT'], rta.mean_rt_)
178+
assert np.allclose(params['meanAcc'], rta.mean_accuracy_)
179179

180180

181181
def test_rtanalysis_checkfail(simulated_data, params):
@@ -199,8 +199,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc):
199199
rta = RTAnalysis()
200200
if meanAcc > 0:
201201
rta.fit(test_df.rt, test_df.accuracy)
202-
assert np.allclose(meanRT, rta.meanrt_)
203-
assert np.allclose(meanAcc, rta.meanacc_)
202+
assert np.allclose(meanRT, rta.mean_rt_)
203+
assert np.allclose(meanAcc, rta.mean_accuracy_)
204204
else:
205205
with pytest.raises(ValueError):
206206
rta.fit(test_df.rt, test_df.accuracy)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
black
12
coverage
23
flake8
34
numpy

rtanalysis/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Analysis of response data to estimate accuracy from response time (RT)."""

rtanalysis/generate_testdata.py

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,40 @@
1-
import pandas as pd
1+
"""Utility module for handling the generation of test data."""
22
import numpy as np
3+
import pandas as pd
34
import scipy.stats
45

56

6-
def generate_test_df(meanRT, sdRT, meanAcc, n=100):
7-
"""
8-
generate simulated RT data for testing
7+
def generate_test_df(mean_rt, sd_rt, mean_accuracy, n=100):
8+
"""Generate simulated RT data for testing.
99
10-
Args:
11-
meanRT (float): mean RT (for correct trials)
12-
sdRT (float): std deviation of RT (for correct trials)
13-
meanAcc (float): mean accuracy (proportion, 0 <= meanAcc <= 1)
14-
sdcutoff ([type]): outlier cutoff (default None for no cutoff)
15-
"""
10+
Parameters
11+
----------
12+
mean_rt : float
13+
Mean response time for correct trials
14+
sd_rt : float
15+
Standard deviation of the response time in correct trials
16+
mean_accuracy : float
17+
Mean accuracy across trials (between 0 and 1)
18+
n : int, optional
19+
Number of observations to generate, by default 100
1620
21+
Returns
22+
-------
23+
pd.DataFrame
24+
Generated mock data
25+
"""
1726
rt = pd.Series(scipy.stats.weibull_min.rvs(2, loc=1, size=n))
1827

1928
# get random accuracy values and threshold for intended proportion
2029
accuracy_continuous = np.random.rand(n)
2130
accuracy = pd.Series(
2231
accuracy_continuous
23-
< scipy.stats.scoreatpercentile(accuracy_continuous, 100 * meanAcc)
32+
< scipy.stats.scoreatpercentile(accuracy_continuous, 100 * mean_accuracy)
2433
)
2534

2635
# scale the correct RTs only
2736
rt_correct = rt.mask(~accuracy)
28-
rt_scaled = scale_values(rt_correct, meanRT, sdRT)
37+
rt_scaled = scale_values(rt_correct, mean_rt, sd_rt)
2938

3039
# NB: .where() replaces values where the condition is False
3140
rt_scaled_with_inaccurate_rts = rt_scaled.where(accuracy, rt)
@@ -34,14 +43,22 @@ def generate_test_df(meanRT, sdRT, meanAcc, n=100):
3443

3544

3645
def scale_values(values, mean, sd):
37-
"""scale values by given mean/sd
46+
"""Scale values by given mean/SD.
47+
48+
Parameters
49+
----------
50+
values : array-like
51+
Values to be scaled
52+
mean : float
53+
Target mean
54+
sd : float
55+
Target standard deviation
3856
39-
Args:
40-
values (array-like): values to be scaled
41-
mean (float): intended mean
42-
sd (float): intended standard deviation
57+
Returns
58+
-------
59+
array-like
60+
Scaled values
4361
"""
4462
values = values * (sd / np.std(values))
4563
values = (values - np.mean(values)) + mean
46-
4764
return values

rtanalysis/rtanalysis.py

Lines changed: 80 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,118 @@
1-
"""example function to analyze reaction times
2-
- given a data frame with RT and accuracy,
3-
compute mean RT for correct trials and mean accuracy
1+
"""Example class to analyze reaction times.
2+
3+
Given a data frame with RT and accuracy, compute mean RT for correct trials and
4+
mean accuracy.
45
"""
5-
# %%
66
import pandas as pd
77

88

9-
# %%
109
class RTAnalysis:
11-
"""[summary]"""
10+
"""Response time (RT) analysis."""
1211

1312
def __init__(self, outlier_cutoff_sd=None):
14-
"""
15-
RT analysis
13+
"""Initialize a new RTAnalysis instance.
1614
17-
Parameters:
18-
-----------
19-
outlier_cutoff_sd: standard deviation cutoff for long RT outliers (default: no cutoff)
15+
Parameters
16+
----------
17+
outlier_cutoff_sd : float, optional
18+
Standard deviation cutoff for long RT outliers, by default None
2019
"""
2120
self.outlier_cutoff_sd = outlier_cutoff_sd
22-
self.meanrt_ = None
23-
self.meanacc_ = None
21+
self.mean_rt_ = None
22+
self.mean_accuracy_ = None
2423

2524
def fit(self, rt, accuracy, verbose=True):
26-
"""[summary]
27-
28-
Args:
29-
rt (Series of floats): response times for each trial
30-
accuracy (Series of booleans): accuracy for each trial
25+
"""Fit response time to accuracy.
26+
27+
Parameters
28+
----------
29+
rt : pd.Series
30+
Response time per trial
31+
accuracy : pd.Series
32+
Accuracy per trial
33+
verbose : bool, optional
34+
Whether to print verbose output or not, by default True
35+
36+
Raises
37+
------
38+
ValueError
39+
RT/accuracy length mismatch
40+
ValueError
41+
Accuracy is 0
3142
"""
32-
3343
rt = self._ensure_series_type(rt)
3444
accuracy = self._ensure_series_type(accuracy)
3545

36-
try:
37-
assert rt.shape[0] == accuracy.shape[0]
38-
except AssertionError as e:
39-
raise ValueError("rt and accuracy must be the same length!") from e
46+
self._validate_length(rt, accuracy)
4047

41-
# ensure that accuracy values are boolean
42-
assert not set(accuracy.unique()).difference([True, False])
48+
# Ensure that accuracy values are boolean.
49+
assert accuracy.dtype == bool
4350

44-
if self.outlier_cutoff_sd is not None:
45-
cutoff = rt.std() * self.outlier_cutoff_sd
46-
if verbose:
47-
print(f"outlier rejection excluded {(rt > cutoff).sum()} trials")
48-
rt = rt.mask(rt > cutoff)
51+
rt = self.reject_outlier_rt(rt, verbose=verbose)
4952

50-
self.meanacc_ = accuracy.mean()
53+
self.mean_accuracy_ = accuracy.mean()
5154
try:
52-
assert self.meanacc_ > 0
55+
assert self.mean_accuracy_ > 0
5356
except AssertionError as e:
54-
raise ValueError("accuracy is zero") from e
57+
raise ValueError("Accuracy is zero!") from e
5558

5659
rt = rt.mask(~accuracy)
57-
self.meanrt_ = rt.mean()
60+
self.mean_rt_ = rt.mean()
5861

5962
try:
6063
assert rt.min() > 0
6164
except:
6265
raise ValueError( "negative response times found")
6366
if verbose:
64-
print(f"mean RT: {self.meanrt_}")
65-
print(f"mean accuracy: {self.meanacc_}")
67+
print(f"mean RT: {self.mean_rt_}")
68+
print(f"mean accuracy: {self.mean_accuracy_}")
69+
70+
@staticmethod
71+
def _validate_length(rt, accuracy):
72+
"""Validate response time and accuracy series lengths.
73+
74+
Parameters
75+
----------
76+
rt : pd.Series
77+
Response time values
78+
accuracy : _type_
79+
Accuracy values
80+
81+
Raises
82+
------
83+
ValueError
84+
Length mismatch
85+
"""
86+
same_length = rt.shape[0] == accuracy.shape[0]
87+
try:
88+
assert same_length
89+
except AssertionError as e:
90+
raise ValueError("RT and accuracy must be the same length!") from e
91+
6692

6793
@staticmethod
6894
def _ensure_series_type(var):
69-
"""return variable as a pandas Series or raise exception if
70-
not possible
95+
"""Return variable as a pandas Series.
7196
72-
Args:
73-
var (array-like): variable to convert
97+
Parameters
98+
----------
99+
var : Iterable
100+
Variable to be converted
74101
75-
Returns:
76-
series (pandas Series): converted variable
102+
Returns
103+
-------
104+
pd.Series
105+
Variable values as a pandas Series
77106
"""
78-
79-
if type(var) is not pd.core.series.Series:
107+
if not isinstance(var, pd.Series):
80108
var = pd.Series(var)
81109
return var
82110

83-
# %%
111+
def reject_outlier_rt(self, rt, verbose=True):
112+
if self.outlier_cutoff_sd is None:
113+
return rt
114+
cutoff = rt.std() * self.outlier_cutoff_sd
115+
if verbose:
116+
n_excluded = (rt > cutoff).sum()
117+
print(f"Outlier rejection excluded {n_excluded} trials.")
118+
return rt.mask(rt > cutoff)

tests/test_1_smoketest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""test suite for rtanalysis
22
"""
3-
import pytest
43
from rtanalysis.rtanalysis import RTAnalysis
54

65

tests/test_2_fit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ def test_rtanalysis_fit():
1515
meanAcc = 0.8
1616
test_df = generate_test_df(meanRT, sdRT, meanAcc)
1717
rta.fit(test_df.rt, test_df.accuracy)
18-
assert np.allclose(meanRT, rta.meanrt_)
19-
assert np.allclose(meanAcc, rta.meanacc_)
18+
assert np.allclose(meanRT, rta.mean_rt_)
19+
assert np.allclose(meanAcc, rta.mean_accuracy_)

tests/test_4_fixture.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def simulated_data(params):
2323
def test_rtanalysis_fit(simulated_data, params):
2424
rta = RTAnalysis()
2525
rta.fit(simulated_data.rt, simulated_data.accuracy)
26-
assert np.allclose(params["meanRT"], rta.meanrt_)
27-
assert np.allclose(params["meanAcc"], rta.meanacc_)
26+
assert np.allclose(params["meanRT"], rta.mean_rt_)
27+
assert np.allclose(params["meanAcc"], rta.mean_accuracy_)
2828

2929

3030
def test_rtanalysis_checkfail(simulated_data, params):

tests/test_5_parametric.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc):
1818
rta = RTAnalysis()
1919
if meanAcc > 0:
2020
rta.fit(test_df.rt, test_df.accuracy)
21-
assert np.allclose(meanRT, rta.meanrt_)
22-
assert np.allclose(meanAcc, rta.meanacc_)
21+
assert np.allclose(meanRT, rta.mean_rt_)
22+
assert np.allclose(meanAcc, rta.mean_accuracy_)
2323
else:
2424
with pytest.raises(ValueError):
2525
rta.fit(test_df.rt, test_df.accuracy)

0 commit comments

Comments
 (0)