patch from exprnn branch

victor-shepardson · victor-shepardson · commit 00146902d078 · 2022-03-01T10:00:47.000Z
diff --git a/notepredictor/notepredictor/model.py b/notepredictor/notepredictor/model.py
@@ -2,88 +2,12 @@
 from torch import nn
 import torch.nn.functional as F
 
-# import geotorch
-
-# shim torch RNN,GRU classes to have same API as LSTM
-def rnn_shim(cls):
-    """LSTM API for GRU and RNN.
-    
-    hidden state is first element of state tuple"""
-    class shim(cls):
-        def forward(self, input, states):
-            assert len(states)==1
-            out, h = super().forward(input, *states)
-            return out, (h,)
-    return shim
-
-GRU = rnn_shim(nn.GRU)
-RNN = rnn_shim(nn.RNN)
-LSTM = nn.LSTM
-
-class ExpRNN(nn.Module):
-    pass
-
-# class ExpRNNCell(nn.Module):
-#     pass
-
-class GenericRNN(nn.Module):
-    kind_cls = {
-        'gru':GRU,
-        'lstm':LSTM,
-        'elman':RNN,
-        'exprnn':ExpRNN
-        }
-    # desiderata:
-    # support geotorch constraints
-    # clean API for multiple layers, multiple cell states (e.g. LSTM)
-    def __init__(self, kind, *a, **kw):
-        super().__init__()
-        if kw.get('bidirectional'): raise ValueError("""
-            bidirectional GenericRNN not supported.
-            """)
-        cls = GenericRNN.kind_cls[kind]
-        self.rnn = cls(*a, **kw)
-
-    def __getattr__(self, a):
-        try:
-            return  super().__getattr__(a)
-        except AttributeError:
-            return getattr(self.rnn, a)
-
-    def forward(self, x, initial_state):
-        """
-        Args:
-            x: Tensor[batch x time x channel] if batch_first else [time x batch x channel]
-            initial_state: List[Tensor[layers x batch x hidden]]], list of components 
-            with 0 being hidden state (e.g. 1 is cell state for LSTM). 
-        Returns:
-            hidden: hidden states of top layers Tensor[batch x time x hidden]
-                or [time x batch x hidden]
-            new_states: List[Tensor[layers x batch x hidden]]
-        """
-        hidden, final_state = self.rnn.forward(x, initial_state)  #forward or __call__?
-        return hidden, final_state
-
-    ## NOTE: individual time-step API might be useful, not actually needed yet though
-    # def step(self, x, state):
-    #     """
-    #     Args:
-    #         x: Tensor[batch x channel]
-    #         state: List[Tensor[layers x batch x hidden]]], list of components 
-    #         with 0 being hidden state (e.g. 1 is cell state for LSTM). 
-    #     Returns:
-    #         hidden: hidden state of top layer [batch x hidden]
-    #         new_states: List[Tensor[layers x batch x hidden]]
-    #     """
-    #     time_idx = 1 if self.rnn.batch_first else 0
-    #     x = x.unsqueeze(time_idx)
-    #     hidden, state = self.forward(x, state)
-    #     return hidden.squeeze(time_idx), state
-
+from .rnn import GenericRNN
 
 class PitchPredictor(nn.Module):
     # note: use named arguments only for benefit of training script
-    def __init__(self, emb_size=128, hidden_size=512, domain_size=128, num_layers=1):
+    def __init__(self, emb_size=128, hidden_size=512, domain_size=128, 
+            num_layers=1, kind='gru', dropout=0):
         """
         """
         super().__init__()
@@ -93,14 +17,18 @@ def __init__(self, emb_size=128, hidden_size=512, domain_size=128, num_layers=1)
 
         self.emb = nn.Embedding(domain_size, emb_size)
         self.proj = nn.Linear(hidden_size, domain_size)
+        #### DEBUG
+        with torch.no_grad():
+            self.proj.weight.mul_(1e-2)
         
-        self.rnn = GenericRNN('gru', emb_size, hidden_size, 
-            num_layers=num_layers, batch_first=True)
+        self.rnn = GenericRNN(kind, emb_size, hidden_size, 
+            num_layers=num_layers, batch_first=True, dropout=dropout)
         
         # learnable initial state
         self.initial_state = nn.ParameterList([
              # layer x batch x hidden
-            nn.Parameter(torch.randn(num_layers,1,hidden_size)*hidden_size**-0.5),
+            nn.Parameter(torch.randn(num_layers,1,hidden_size)*hidden_size**-0.5)
+            for _ in range(2 if kind=='lstm' else 1)
         ])
 
         # persistent state for inference
diff --git a/notepredictor/notepredictor/rnn.py b/notepredictor/notepredictor/rnn.py
@@ -0,0 +1,77 @@
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+class ExpRNN(nn.Module):
+    def __init__(self, input_size, hidden_size, **kw):
+	    raise NotImplementedError("see `exprnn` branch")
+
+def rnn_shim(cls):
+    """LSTM API for GRU and RNN.
+    
+    hidden state is first element of state tuple"""
+    class shim(cls):
+        def forward(self, input, states=(None,)):
+            assert len(states)==1
+            out, h = super().forward(input, *states)
+            return out, (h,)
+    return shim
+
+GRU = rnn_shim(nn.GRU)
+RNN = rnn_shim(nn.RNN)
+LSTM = nn.LSTM
+
+
+class GenericRNN(nn.Module):
+    kind_cls = {
+        'gru':GRU,
+        'lstm':LSTM,
+        'elman':RNN,
+        'exprnn':ExpRNN
+        }
+    def __init__(self, kind, *a, **kw):
+        super().__init__()
+        if kw.get('bidirectional'): raise ValueError("""
+            bidirectional GenericRNN not supported.
+            """)
+        cls = GenericRNN.kind_cls[kind]
+        self.kind = kind
+        self.rnn = cls(*a, **kw)
+
+    def __getattr__(self, a):
+        try:
+            return  super().__getattr__(a)
+        except AttributeError:
+            return getattr(self.rnn, a)
+
+    def forward(self, x, initial_state):
+        """
+        Args:
+            x: Tensor[batch x time x channel] if batch_first else [time x batch x channel]
+            initial_state: List[Tensor[layers x batch x hidden]]], list of components 
+            with 0 being hidden state (e.g. 1 is cell state for LSTM). 
+        Returns:
+            hidden: hidden states of top layers Tensor[batch x time x hidden]
+                or [time x batch x hidden]
+            new_states: List[Tensor[layers x batch x hidden]]
+        """
+        hidden, final_state = self.rnn.forward(x, initial_state)  #forward or __call__?
+        return hidden, final_state
+
+    ## NOTE: individual time-step API might be useful, not actually needed yet though
+    # def step(self, x, state):
+    #     """
+    #     Args:
+    #         x: Tensor[batch x channel]
+    #         state: List[Tensor[layers x batch x hidden]]], list of components 
+    #         with 0 being hidden state (e.g. 1 is cell state for LSTM). 
+    #     Returns:
+    #         hidden: hidden state of top layer [batch x hidden]
+    #         new_states: List[Tensor[layers x batch x hidden]]
+    #     """
+    #     time_idx = 1 if self.rnn.batch_first else 0
+    #     x = x.unsqueeze(time_idx)
+    #     hidden, state = self.forward(x, state)
+    #     return hidden.squeeze(time_idx), state
diff --git a/pytorch-osc/pytorch-osc.py b/pytorch-osc/pytorch-osc.py
@@ -20,6 +20,7 @@ def predictor_handler(address, *args):
         print(f"/load {args}")
         global predictor
         predictor = PitchPredictor.from_checkpoint(*args)
+        predictor.eval()
 
     elif(address[2] == "predict"):
         print(f"/predict {args}")
@@ -60,6 +61,7 @@ def main(ip="127.0.0.1", send=57120, receive=9999, checkpoint=None):
 
     if checkpoint is not None:
         predictor = PitchPredictor.from_checkpoint(checkpoint)
+        predictor.eval()
 
     asyncio.run(init_main())
 
diff --git a/scripts/train_pitch.py b/scripts/train_pitch.py
@@ -30,7 +30,7 @@ def __init__(self,
         adam_eps = 1e-08, 
         weight_decay = 0.01,
         seed = 0, # random seed
-        n_jobs = 0, # for dataloaders
+        n_jobs = 1, # for dataloaders
         device = 'cpu', # 'cuda:0'
         epoch_size = None, # in iterations, None for whole dataset
         ):