-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmeanVar.py
More file actions
58 lines (44 loc) · 2.36 KB
/
meanVar.py
File metadata and controls
58 lines (44 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import glob
import h5py
import numpy as np
from tqdm import tqdm
meanVarFile = h5py.File("/projects/jamming/shared/MillionQuad2DProbs/1,1,1,1/MeanVar.h5", 'a')
finalProbs = h5py.File("/projects/jamming/shared/MillionQuad2DProbs/1,1,1,1/FinalProbs.h5", 'a')
files = glob.glob('/projects/jamming/shared/MillionQuad2DProbs/1,1,1,1/*.h5')
files = files[:5]
totalFiles = len(files)
maxTime = 999
moments = ['mean', 'secondMoment', 'thirdMoment', 'var', 'skew']
with h5py.File(files[0], 'r') as f:
for regime in f['regimes'].keys():
meanVarFile.require_group(regime)
finalProbs.require_dataset(f'temp{regime}', shape=(totalFiles, f['regimes'][regime].shape[1]), dtype=float)
for moment in moments:
meanVarFile[regime].require_dataset(moment, shape=f['regimes'][regime].shape, dtype=float)
meanVarFile[regime][moment][:] = np.zeros(f['regimes'][regime].shape, dtype=float)
num_files = 0
for f in tqdm(files):
with h5py.File(f, 'r') as f:
if f.attrs['currentOccupancyTime'] < maxTime:
continue
for regime in f['regimes'].keys():
probs = f['regimes'][regime][:]
finalProbs[f'temp{regime}'][num_files, :] = np.log(probs[-1, :]).astype(float)
with np.errstate(divide='ignore'):
meanVarFile[regime]['mean'][:] += (np.log(probs)).astype(float)
meanVarFile[regime]['secondMoment'][:] += (np.log(probs) ** 2).astype(float)
meanVarFile[regime]['thirdMoment'][:] += (np.log(probs) ** 3).astype(float)
num_files += 1
for regime in finalProbs.keys():
meanVarFile[regime]['mean'][:] /= num_files
meanVarFile[regime]['secondMoment'][:] /= num_files
meanVarFile[regime]['thirdMoment'][:] /= num_files
with np.errstate(invalid='ignore', divide='ignore'):
meanVarFile[regime]['var'][:] = meanVarFile[regime]['secondMoment'][:] - meanVarFile[regime]['mean'][:] ** 2
sigma = np.sqrt(meanVarFile[regime]['var'][:])
meanVarFile[regime]['skew'][:] = (meanVarFile[regime]['thirdMoment'][:] - 3 * meanVarFile[regime]['mean'][:] * sigma ** 2 - meanVarFile[regime]['mean'][:] ** 3) / sigma ** 3
nonzeroProbs = finalProbs[regime][:num_files, :]
finalProbs.create_dataset(regime.replace("temp", ''), data=nonzeroProbs)
del finalProbs[regime]
# meanVarFile.close()
finalProbs.close()