new dataloading, move to 32 anon mel/drum, validate with fixed length, fixes

victor-shepardson · victor-shepardson · commit 404f1cf3c60d · 2022-08-12T09:32:40.000Z
diff --git a/notochord/notochord/data.py b/notochord/notochord/data.py
@@ -17,7 +17,7 @@ def __init__(self, data_dir, batch_len, transpose=5, speed=0.1, glob='**/*.pkl',
         self.transpose = transpose
         self.speed = speed
         self.start_token = 128
-        self.n_anon = 8
+        self.n_anon = 32
         self.prog_start_token = 0
         # self.clamp_time = clamp_time
         self.testing = False
@@ -26,47 +26,64 @@ def __init__(self, data_dir, batch_len, transpose=5, speed=0.1, glob='**/*.pkl',
     def __len__(self):
         return len(self.files)
     
-    def _random_map_anonymous_instruments(self, program: torch.Tensor) -> torch.Tensor:
+    def _remap_anonymous_instruments(self, program: torch.Tensor) -> torch.Tensor:
         """
-        Randomly map instruments to eight additional ‘anonymous’ melodic and drum identities
-        with a probability of 10% per instrument, without replacement.
-
-        The input program should contain melodic instruments from MIDI note numbers 0-127 and
-        drum instruments from 128-255. Anonymous instruments are mapped to subsequent note numbers.
+        Randomly map instruments to additional ‘anonymous’ melodic and drum identities
+        with a probability of 10% per instrument, without replacement. 
+        Also map any parts > 256 to appropriate anonymous ids.
         """
-        unique_melodic = program.masked_select(program<128).unique()
-        unique_drum = program.masked_select(program>=128).unique()
-
-        anon_melodic_start = 256
+        orig_program = program%1000
+        is_melodic = (orig_program<=128) | (orig_program>256)
+        is_anon = (program > 256)
+        named_melodic = list(program.masked_select(is_melodic & ~is_anon).unique())
+        anon_melodic = list(program.masked_select(is_melodic & is_anon).unique())
+        named_drum = list(program.masked_select(~is_melodic & ~is_anon).unique())
+        anon_drum = list(program.masked_select(~is_melodic & is_anon).unique())
+
+        anon_melodic_start = 257
         anon_drum_start = anon_melodic_start + self.n_anon
-        anon_melodic = torch.randperm(self.n_anon) + anon_melodic_start  # array of anon melodic programs
-        anon_drum = torch.randperm(self.n_anon) + anon_drum_start  # array of anon drum programs
+        perm_anon_melodic = torch.randperm(self.n_anon) + anon_melodic_start 
+        perm_anon_drum = torch.randperm(self.n_anon) + anon_drum_start 
 
-        i = 0
-        for pr in unique_melodic:
+        for pr in named_melodic:
             if torch.rand((1,)) < 0.1:
-                program[program==pr] = anon_melodic[i]
-                i += 1
-                if i >= len(anon_melodic):  # no more anon instruments to write to
-                    break
-        i = 0
-        for pr in unique_drum:
+                anon_melodic.append(pr)
+        for pr in named_drum:
             if torch.rand((1,)) < 0.1:
-                program[program==pr] = anon_drum[i]
-                i += 1
-                if i >= len(anon_drum):  # no more anon instruments to write to
-                    break
+                anon_drum.append(pr)
+
+        new_program = program.clone()
+
+        if len(anon_melodic)>self.n_anon:
+            print(f'warning: {anon_melodic} > {self.n_anon} anon melodic instruments')
+        if len(anon_drum)>self.n_anon:
+            print(f'warning: {anon_drum} > {self.n_anon} anon drum instruments')
 
-        return program
+        i = 0
+        for pr in anon_melodic:
+            new_program[program==pr] = perm_anon_melodic[i%self.n_anon]
+            i += 1
+        i = 0
+        for pr in anon_drum:
+            new_program[program==pr] = perm_anon_drum[i%self.n_anon]
+            i += 1
+
+        # print(new_program.unique())
+
+        return new_program
 
     def __getitem__(self, idx):
         f = self.files[idx]
         item = torch.load(f)
-        program = item['program'] # 1-d LongTensor of MIDI programs 0-255
-        # (128-255 are drums)
+        program = item['program'] # 1-d LongTensor of MIDI programs
+        # 0 is unused
+        # (128-256 are drums)
+        # 257+ are 'true anonymous' (no program change on track)
+        # (drums with no PC are just mapped to 129)
+        # N + 1000*K is the Kth additional part for instrument N
         pitch = item['pitch'] # 1-d LongTensor of MIDI pitches 0-127
-        time = item['time']
-        velocity = item['velocity']
+        time = item['time'] # 1-d DoubleTensor of absolute times in seconds
+        velocity = item['velocity'] # 1-d LongTensor of MIDI velocities 0-127
 
         assert len(pitch) == len(time)
 
@@ -79,24 +96,24 @@ def __getitem__(self, idx):
         )
         pitch = pitch + transpose
 
-        # randomly map instruments to 'anonymous melodic' and 'anonymous drum'
-        program = self._random_map_anonymous_instruments(program)
-
-        # shift from 0-index to general MIDI 1-index; reserve 0 for start token
-        program += 1
+        # scramble anonymous and extra parts to 'anonymous melodic' and 'anonymous drum' parts
+        program = self._remap_anonymous_instruments(program)
 
-        time_margin = 1e-3 # hardcoded since it should match prep script
+        time_margin = 1e-3
 
         # dequantize: add noise up to +/- margin
-        time = time + (torch.rand_like(time)*2-1)*time_margin
+        # move note-ons later, note-offs earlier
+        time = (time + 
+            torch.rand_like(time) * ((velocity==0).double()*2-1) * time_margin
+        )
         # random augment tempo
         time = time * (1 + random.random()*self.speed*2 - self.speed)
 
         # dequantize velocity
         velocity = velocity.float()
         velocity = (
             velocity + 
-            (torch.rand_like(time)-0.5) * ((velocity>0) & (velocity<127)).float()
+            (torch.rand_like(time, dtype=torch.float)-0.5) * ((velocity>0) & (velocity<127)).float()
             ).clamp(0., 127.)
         # random velocity curve
         # take care not to map any positive values closer to 0 than 1
@@ -110,7 +127,7 @@ def __getitem__(self, idx):
         # sort (using argsort on time and indexing the rest)
         # compute delta time
         time, idx = time.sort()
-        time = torch.cat((time.new_zeros((1,)), time)).diff(1)
+        time = torch.cat((time.new_zeros((1,)), time)).diff(1).float()
         program = program[idx]
         pitch = pitch[idx]
         velocity = velocity[idx]
diff --git a/notochord/notochord/model.py b/notochord/notochord/model.py
@@ -159,7 +159,7 @@ def __init__(self,
             mlp_layers=0,
             dropout=0.1, norm=None,
             num_pitches=128, 
-            num_instruments=272,
+            num_instruments=320,
             time_sines=128, vel_sines=128,
             time_bounds=(0,10), time_components=32, time_res=1e-2,
             vel_components=16
@@ -188,10 +188,12 @@ def __init__(self,
         # embeddings for inputs
         self.instrument_emb = nn.Embedding(self.instrument_domain, emb_size)
         self.pitch_emb = nn.Embedding(self.pitch_domain, emb_size)
-        self.time_emb = torch.jit.script(SineEmbedding(
+        self.time_emb = torch.jit.script(
+            SineEmbedding(
             time_sines, emb_size, 1e-3, 30, scale='log'))
         # self.vel_emb = MixEmbedding(emb_size, (0, 127))
-        self.vel_emb = torch.jit.script(SineEmbedding(
+        self.vel_emb = torch.jit.script(
+            SineEmbedding(
             vel_sines, emb_size, 2, 512, scale='lin'))
 
         # RNN backbone
@@ -436,7 +438,7 @@ def forward(self, instruments, pitches, times, velocities, ends,
     def is_drum(self, inst):
         # TODO: add a constructor argument to specify which are drums
         # hardcoded for now
-        return inst > 128 and inst < 257 or inst > 264
+        return inst > 128 and inst < 257 or inst > 288
 
     
     def feed(self, inst, pitch, time, vel):
diff --git a/notochord/notochord/train.py b/notochord/notochord/train.py
@@ -54,7 +54,7 @@ def __init__(self,
             """
         kw['model'] = model = get_class_defaults(model_cls) | model
         model['num_pitches'] = 128
-        model['num_instruments'] = 272
+        model['num_instruments'] = 320
         # model['time_bounds'] = clamp_time
 
         # assign all arguments to self by default
@@ -237,7 +237,8 @@ def train(self):
 
         ##### validation loop
         def run_validation():
-            logs = self._validate(valid_loader)['logs']
+            self.dataset.batch_len = self.dataset.max_test_len
+            logs = self._validate(valid_loader, testing=False)['logs']
             self.log('valid', logs)
 
         epoch_size = self.epoch_size or len(train_loader)
@@ -251,6 +252,7 @@ def run_validation():
             ##### training loop
             self.model.train()
             self.dataset.testing = False
+            self.dataset.batch_len = self.batch_len
             for batch in tqdm(it.islice(train_loader, epoch_size), 
                     desc=f'training epoch {self.epoch}', total=epoch_size):
                 mask = batch['mask'].to(self.device, non_blocking=True)