fix transposition for anonymous instruments; remove old code

victor-shepardson · victor-shepardson · commit 51c3c3ae9f3e · 2022-08-16T15:22:00.000Z
diff --git a/notochord/notochord/data.py b/notochord/notochord/data.py
@@ -25,16 +25,22 @@ def __init__(self, data_dir, batch_len, transpose=5, speed=0.1, glob='**/*.pkl',
         
     def __len__(self):
         return len(self.files)
+
+    def is_melodic(self, program):
+        orig_program = program%1000
+        return (orig_program<=128) | (orig_program>256)
+
+    def is_anon(self, program):
+        return program > 256
     
     def _remap_anonymous_instruments(self, program: torch.Tensor) -> torch.Tensor:
         """
         Randomly map instruments to additional ‘anonymous’ melodic and drum identities
         with a probability of 10% per instrument, without replacement. 
         Also map any parts > 256 to appropriate anonymous ids.
         """
-        orig_program = program%1000
-        is_melodic = (orig_program<=128) | (orig_program>256)
-        is_anon = (program > 256)
+        is_melodic = self.is_melodic(program)
+        is_anon = self.is_anon(program)
         named_melodic = list(program.masked_select(is_melodic & ~is_anon).unique())
         anon_melodic = list(program.masked_select(is_melodic & is_anon).unique())
         named_drum = list(program.masked_select(~is_melodic & ~is_anon).unique())
@@ -92,7 +98,7 @@ def __getitem__(self, idx):
         transpose_up = min(self.transpose, 127-pitch.max())
         transpose = (
             random.randint(-transpose_down, transpose_up)
-            * (program<128) # don't transpose drums
+            * self.is_melodic(program).long() # don't transpose drums
         )
         pitch = pitch + transpose
 
diff --git a/notochord/notochord/model.py b/notochord/notochord/model.py
@@ -91,65 +91,6 @@ def get_norm():
     def forward(self, x):
         return self.net(x)
 
-# class ModalityTransformer(nn.Module):
-#     """
-#     Model joint distribution of note modalities (e.g. pitch, time, velocity).
-
-#     This is an autoregressive Transformer model for the *internal* structure of notes.
-#     It is *not* autoregressive in time, but in modality.
-#     At training time, it executes in parallel over all timesteps and modalities, with
-#     time dependencies provided via the RNN backbone.
-
-#     At sampling time it is called serially, one modality at a time, 
-#     repeatedly at each time step.
-
-#     Inspired by XLNet: http://arxiv.org/abs/1906.08237
-#     """
-#     def __init__(self, input_size, hidden_size, heads=4, layers=1):
-#         super().__init__()
-#         self.net = nn.TransformerDecoder(
-#             nn.TransformerDecoderLayer(
-#                 input_size, heads, hidden_size, norm_first=False
-#                 ), layers)
-
-#     def forward(self, ctx, h_ctx, h_tgt):
-#         """
-#         Args:
-#             ctx: list of Tensor[batch x time x input_size], length note_dim-1
-#                 these are the embedded ground truth values
-#             h_ctx: Tensor[batch x time x input_size]
-#                 projection of RNN state (need something to attend to when ctx is empty)
-#             h_tgt: list of Tensor[batch x time x input_size], length note_dim
-#                 these are projections of the RNN state for each target,
-#                 which the Transformer will map to distribution parameters.
-#         """
-#         # explicitly broadcast
-#         h_ctx, *ctx = torch.broadcast_tensors(h_ctx, *ctx)
-#         h_ctx, *h_tgt = torch.broadcast_tensors(h_ctx, *h_tgt)
-
-#         # h_tgt is 'target' w.r.t TransformerDecoder
-#         # h_ctx and context are 'memory'
-#         batch_size = h_ctx.shape[0]*h_ctx.shape[1]
-#         # fold time into batch, stack modes
-#         tgt = torch.stack([
-#             item.reshape(batch_size,-1)
-#             for item in h_tgt
-#         ],0)
-#         mem = torch.stack([
-#             item.reshape(batch_size,-1)
-#             for item in [h_ctx, *ctx]
-#         ],0)
-#         # now "time"(mode) x "batch"(+time) x channel
-
-#         # generate a mask
-#         # this is both the target and memory mask
-#         # masking is such that each target can only depend on "previous" context
-#         n = len(h_tgt)
-#         mask = ~tgt.new_ones((n,n), dtype=bool).tril()
-
-#         x = self.net(tgt, mem, mask, mask)
-#         return list(x.reshape(n, *h_ctx.shape).unbind(0))
-
 
 class Notochord(nn.Module):
     # note: use named arguments only for benefit of training script