-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsample_training_set.py
More file actions
54 lines (42 loc) · 2.38 KB
/
sample_training_set.py
File metadata and controls
54 lines (42 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
This Python script collects and processes image sequences
from a nested directory structure containing various "experiments" and "species."
It randomly selects a specified number of TIFF image sequences from each category, combines them,
and saves the combined sequences in a new "training" folder within each experiment's directory.
The aim is to generate training datasets for each species in each experiment.
"""
import os
import random
import numpy as np
from skimage import io
# Number of sequences you want for training from each basename
N_TRAINING_SAMPLES = 3
# Source root folder
SOURCE_ROOT = './experiments' # Current directory
# Extract all experiments from the source root
experiments = [folder for folder in os.listdir(SOURCE_ROOT) if os.path.isdir(os.path.join(SOURCE_ROOT, folder))]
for experiment in experiments:
species_path = os.path.join(SOURCE_ROOT, experiment, 'focus')
if os.path.exists(species_path):
species_list = [folder for folder in os.listdir(species_path) if os.path.isdir(os.path.join(species_path, folder))]
for species in species_list:
base_name_path = os.path.join(species_path, species)
base_names = [folder for folder in os.listdir(base_name_path) if os.path.isdir(os.path.join(base_name_path, folder))]
# Placeholder to accumulate sequences
combined_seqs = []
for base_name in base_names:
sequence_path = os.path.join(base_name_path, base_name)
sequences = [seq for seq in os.listdir(sequence_path) if seq.endswith('.tif')]
# Randomly select sequences for training
training_samples = random.sample(sequences, min(N_TRAINING_SAMPLES, len(sequences)))
# Read and accumulate sequences
for sample in training_samples:
seq_data = io.imread(os.path.join(sequence_path, sample))
combined_seqs.append(seq_data)
# Combine sequences and save
combined_seqs = np.concatenate(combined_seqs, axis=0)
dest_folder = os.path.join(SOURCE_ROOT, experiment, 'training', species)
os.makedirs(dest_folder, exist_ok=True)
dest_path = os.path.join(dest_folder, f"{experiment}_training_data.tif")
io.imsave(dest_path, combined_seqs)
print("Training samples created in each experiment's 'training' directory.")