PyMarlin/examples/cnndailymail_text_summarization/config-ortds.yaml at 196a57dbae22d399aabe9088c105e1849201d934 · microsoft/PyMarlin · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
data_path: 'D:/data/cnn_cln'
dist: true
ort: true
ortds: true

trainer:
    max_train_steps_per_epoch : null # Maximum train steps per epoch.
    max_val_steps_per_epoch : null # Maximum validation steps per epoch.
    train_batch_size: 32 # Training global batch size.
    val_batch_size: 32 # Validation batch size per GPU.
    epochs: 3 # Total epochs to run.
    ort: True
    gpu_batch_size_limit : 4 # Max limit for GPU batch size during training.
    disable_tqdm : True
    writers: ["stdout", "aml", "tensorboard"]

module:
    max_length_encoder : 1024
    max_length_decoder : 128
    deepspeed_config: 'deepspeed_methods/deepspeedConfig.json'
    deepspeed_transformer_kernel: true
    deepspeed_ckpt_tag: "deepspeed_ckpt"    # optional, let deepspeed load specific checkpoint, unnecessary if save_latest is true (default) when checkpointing with deepspeed

wrt:
    tb_log_dir : 'outputs/tb_logs'


stat:
    log_steps : 20

chkp:
    checkpoint : True
    delete_existing_checkpoints: False
    save_dir: 'outputs/chkpt' # aml output path. does not require mounting
    model_state_save_dir: 'outputs/model'
    load_dir: null
    load_filename: null

# add more from BartForConditionalGeneration.generate?
generate:
    max_length: 128
    do_sample : False
    num_beams : 5
# support everything in a yaml. ignore (print warning) everything that's not present.
# Do not add the requirement to define anything in the parser other than yamls