Minor Changes - Sample compose for python

lapplislazuli · lapplislazuli · commit db16c19edd45 · 2022-03-11T09:44:33.000+01:00
diff --git a/Dockerfile b/Dockerfile
@@ -40,6 +40,8 @@ ENV test_file $data_dir/test_minimal.jsonl
 ENV epochs 10 
 ENV pretrained_model microsoft/codebert-base
 
+ENV seed 2022
+
 ENV load_existing_model false
 ENV load_model_path /models/pytorch_model.bin
 
diff --git a/README.md b/README.md
@@ -70,7 +70,6 @@ On our servers, one epoch on the java data takes ~30h.
 The Containers starts ~20 threads for training and your server should have >20 cores.  
 
 In comparison, training on a RTX 1070 took 7h per epoch. 
-However, mounting GPUs into a container is hard, so its not yet implemented.
 
 ## Known issues
 
diff --git a/docker-compose-pretrained-minimal.yml b/docker-compose-pretrained-minimal.yml
@@ -21,3 +21,8 @@ services:
       pretrained_model: microsoft/codebert-base
       load_existing_model: "true"
       load_model_path: /models/best_pytorch_model.bin
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: [gpu]
diff --git a/docker-compose-python.yml b/docker-compose-python.yml
@@ -0,0 +1,21 @@
+version: '3.3'
+
+services:
+  python-codebert-training:
+    image: ciselab/codebert-code2text:1.3
+    volumes:
+      - ./dataset/java/:/dataset:ro
+      - ./compose_output:/experiment/output
+    environment:
+      epochs: 5
+      lang: python
+      train_file: /dataset/train.jsonl
+      valid_file: /dataset/valid.jsonl
+      test_file: /dataset/test.jsonl
+      batch_size: 16
+    oom_kill_disable: true
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: [gpu]
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -15,4 +15,9 @@ services:
       train_file: /dataset/train.jsonl
       valid_file: /dataset/valid.jsonl
       test_file: /dataset/test.jsonl
-      batch_size: 8
+      batch_size: 32
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: [gpu]
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -30,7 +30,8 @@ if [ "$load_existing_model" = true ]; then
             --train_batch_size $batch_size --eval_batch_size $batch_size \
             --learning_rate $lr \
             --num_train_epochs $epochs \
-            --load_model_path $load_model_path
+            --load_model_path $load_model_path \
+            --seed $seed
         exit
     fi
     if [ "$DO_TRAIN" = true -a "$DO_VALID" = true ]; then
@@ -46,7 +47,8 @@ if [ "$load_existing_model" = true ]; then
             --train_batch_size $batch_size --eval_batch_size $batch_size \
             --learning_rate $lr \
             --load_model_path $load_model_path \
-            --num_train_epochs $epochs
+            --num_train_epochs $epochs \
+            --seed $seed
         exit
     fi
     if [ "$DO_TRAIN" = true -a "$DO_TEST" = true ]; then
@@ -62,7 +64,8 @@ if [ "$load_existing_model" = true ]; then
             --train_batch_size $batch_size --eval_batch_size $batch_size \
             --learning_rate $lr \
             --num_train_epochs $epochs \
-            --load_model_path $load_model_path
+            --load_model_path $load_model_path \
+            --seed $seed
         exit
     fi
     if [ "$DO_TRAIN" = true ]; then
@@ -79,7 +82,8 @@ if [ "$load_existing_model" = true ]; then
             --eval_batch_size $batch_size \
             --learning_rate $lr \
             --num_train_epochs $epochs \
-            --load_model_path $load_model_path
+            --load_model_path $load_model_path \
+            --seed $seed
         exit 0
     fi
     if [ "$DO_TEST" = true ]; then
@@ -93,7 +97,8 @@ if [ "$load_existing_model" = true ]; then
             --max_target_length $target_length \
             --train_batch_size $batch_size \
             --eval_batch_size $batch_size \
-            --load_model_path $load_model_path
+            --load_model_path $load_model_path \
+            --seed $seed
         exit
     fi
 fi
@@ -114,7 +119,8 @@ if [ "$DO_TRAIN" = true -a "$DO_TEST" = true -a "$DO_VALID" = true ]; then
         --beam_size $beam_size \
         --train_batch_size $batch_size --eval_batch_size $batch_size \
         --learning_rate $lr \
-        --num_train_epochs $epochs
+        --num_train_epochs $epochs \
+        --seed $seed
     exit
 fi
 if [ "$DO_TRAIN" = true -a "$DO_VALID" = true ]; then
@@ -129,7 +135,8 @@ if [ "$DO_TRAIN" = true -a "$DO_VALID" = true ]; then
         --beam_size $beam_size \
         --train_batch_size $batch_size --eval_batch_size $batch_size \
         --learning_rate $lr \
-        --num_train_epochs $epochs
+        --num_train_epochs $epochs \
+        --seed $seed
     exit
 fi
 
@@ -145,7 +152,8 @@ if [ "$DO_TRAIN" = true -a "$DO_TEST" = true ]; then
         --beam_size $beam_size \
         --train_batch_size $batch_size --eval_batch_size $batch_size \
         --learning_rate $lr \
-        --num_train_epochs $epochs
+        --num_train_epochs $epochs \
+        --seed $seed
     exit
 fi
 if [ "$DO_TRAIN" = true ]; then
@@ -161,7 +169,8 @@ if [ "$DO_TRAIN" = true ]; then
         --train_batch_size $batch_size \
         --eval_batch_size $batch_size \
         --learning_rate $lr \
-        --num_train_epochs $epochs
+        --num_train_epochs $epochs \
+        --seed $seed
     exit 0
 fi
 if [ "$DO_TEST" = true ]; then
@@ -174,7 +183,8 @@ if [ "$DO_TEST" = true ]; then
         --max_source_length $source_length \
         --max_target_length $target_length \
         --train_batch_size $batch_size \
-        --eval_batch_size $batch_size
+        --eval_batch_size $batch_size \
+        --seed $seed
     exit
 fi
 
diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,30 @@
+name: codebert-code-to-text
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _r-xgboost-mutex=2.0=cpu_0
+  - idna=2.10
+  - pip=20.3
+  - pycparser=2.20
+  - pyopenssl=20.0.0
+  - python_abi=3.7
+  - requests=2.25.0
+  - six=1.15.0
+  - tqdm=4.51.0
+  - wheel=0.35.1
+  - pytorch=1.4.0
+  - pip:
+    - click==7.1.2
+    - filelock==3.0.12
+    - joblib==0.17.0
+    - numpy==1.19.3
+    - packaging==20.4
+    - protobuf==3.14.0
+    - pyparsing==2.4.7
+    - regex==2020.11.13
+    - sacremoses==0.0.43
+    - sentencepiece==0.1.91
+    - tokenizers==0.9.3
+    - transformers==3.5.0
+    - urllib3==1.26.2
diff --git a/initial_readme.md b/initial_readme.md
@@ -103,17 +103,17 @@ To fine-tune encoder-decoder on the dataset
 
 ```shell
 cd code
-lang=java #programming language
+lang=python #programming language
 lr=5e-5
 batch_size=32
 beam_size=10
 source_length=256
 target_length=128
 data_dir=../dataset
 output_dir=model/$lang
-train_file=$data_dir/$lang/train_minimal.jsonl
-dev_file=$data_dir/$lang/valid_minimal.jsonl
-test_file=$data_dir/$lang/test_minimal.jsonl
+train_file=$data_dir/$lang/train.jsonl
+dev_file=$data_dir/$lang/valid.jsonl
+test_file=$data_dir/$lang/test.jsonl
 epochs=10 
 pretrained_model=microsoft/codebert-base #Roberta: roberta-base
 
@@ -125,7 +125,8 @@ python run.py --do_train --do_test --do_eval --model_type roberta --model_name_o
 
 ```shell
 batch_size=64
-dev_file=$data_dir/$lang/valid.jsonl
+dev_file=$data_dir/$lang
+/valid.jsonl
 test_file=$data_dir/$lang/test.jsonl
 test_model=$output_dir/checkpoint-best-bleu/pytorch_model.bin #checkpoint for test