adna-workflow/o2.conf at master · DReichLab/adna-workflow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
##################################
# Cromwell Reference Config File #
##################################

# This is the reference config file that contains all the default settings.
# Make your edits/overrides in your application.conf.

webservice {
  port = 8000
  interface = 0.0.0.0
  binding-timeout = 5s
  instance.name = "reference"
}

akka {
  actor.default-dispatcher.fork-join-executor {
    # Number of threads = min(parallelism-factor * cpus, parallelism-max)
    # Below are the default values set by Akka, uncomment to tune these

    #parallelism-factor = 3.0
    #parallelism-max = 64
  }

  dispatchers {
    # A dispatcher for actors performing blocking io operations
    # Prevents the whole system from being slowed down when waiting for responses from external resources for instance
    io-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
      # Using the forkjoin defaults, this can be tuned if we wish
    }

    # A dispatcher for actors handling API operations
    # Keeps the API responsive regardless of the load of workflows being run
    api-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

    # A dispatcher for engine actors
    # Because backends behaviour is unpredictable (potentially blocking, slow) the engine runs
    # on its own dispatcher to prevent backends from affecting its performance.
    engine-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

    # A dispatcher used by supported backend actors
    backend-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

    # A dispatcher used for the service registry
    service-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

    # Note that without further configuration, all other actors run on the default dispatcher
  }
}

system {
  # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting
  #abort-jobs-on-terminate = false

  # If 'true' then when Cromwell starts up, it tries to restart incomplete workflows
  workflow-restart = true

  # Cromwell will cap the number of running workflows at N
  max-concurrent-workflows = 5000

  # Cromwell will launch up to N submitted workflows at a time, regardless of how many open workflow slots exist
  max-workflow-launch-count = 50

  # Number of seconds between workflow launches
  new-workflow-poll-rate = 20

  # Since the WorkflowLogCopyRouter is initialized in code, this is the number of workers
  number-of-workflow-log-copy-workers = 10

  # Default number of cache read workers
  number-of-cache-read-workers = 25

  io {
    # Global Throttling - This is mostly useful for GCS and can be adjusted to match
    # the quota availble on the GCS API
    number-of-requests = 100000
    per = 100 seconds

    # Number of times an I/O operation should be attempted before giving up and failing it.
    number-of-attempts = 5
  }

  input-read-limits {
    lines = 1280000
    bool = 7
    int = 19
    float = 50
    string = 1280000
    json = 1280000
    tsv = 1280000
    map = 1280000
    object = 1280000
  }

  job-rate-control {
    jobs = 2
    per = 1 second
  }
}

workflow-options {
  # These workflow options will be encrypted when stored in the database
  encrypted-fields: []

  # AES-256 key to use to encrypt the values in `encrypted-fields`
  base64-encryption-key: "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="

  # Directory where to write per workflow logs
  workflow-log-dir: "cromwell-workflow-logs"

  # When true, per workflow logs will be deleted after copying
  workflow-log-temporary: true

  # Workflow-failure-mode determines what happens to other calls when a call fails. Can be either ContinueWhilePossible or NoNewCalls.
  # Can also be overridden in workflow options. Defaults to NoNewCalls. Uncomment to change:
  #workflow-failure-mode: "ContinueWhilePossible"
}

# Optional call-caching configuration.
call-caching {
  # Allows re-use of existing results for jobs you've already run
  # (default: false)
  enabled = true

  # Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies
  # to fail for external reasons which should not invalidate the cache (e.g. auth differences between users):
  # (default: true)
  invalidate-bad-cache-results = true
}

google {

  application-name = "cromwell"

  auths = [
    {
      name = "application-default"
      scheme = "application_default"
    },
    #{
    #  name = "user-via-refresh"
    #  scheme = "refresh_token"
    #  client-id = "secret_id"
    #  client-secret = "secret_secret"
    #},
    #{
    #  name = "service-account"
    #  scheme = "service_account"
    #  service-account-id = "my-service-account"
    #  pem-file = "/path/to/file.pem"
    #}
  ]
}

docker {
  // Set this to match your available quota against the Google Container Engine API
  gcr-api-queries-per-100-seconds = 1000
  // Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again
  cache-entry-ttl = "20 minutes"
  // Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache
  cache-size = 200
}

engine {
  # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need.
  # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here.
  # If you intend to be able to run workflows with this kind of declarations:
  # workflow {
  #    String str = read_string("gs://bucket/my-file.txt")
  # }
  # You will need to provide the engine with a gcs filesystem
  # Note that the default filesystem (local) is always available.
  #filesystems {
  #  gcs {
  #    auth = "application-default"
  #  }
  #}
}

backend {
  default = "SLURM"
  providers {
    Local {
      actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
      config {

        # Limits the number of concurrent jobs
        #concurrent-job-limit = 5

        run-in-background = true
        # `script-epilogue` configures a shell command to run after the execution of every command block.
        #
        # If this value is not set explicitly, the default value is `sync`, equivalent to:
        # script-epilogue = "sync"
        #
        # To turn off the default `sync` behavior set this value to an empty string:
        script-epilogue = ""

        runtime-attributes = """
        String? docker
        String? docker_user
        """
        submit = "/bin/bash ${script}"
        submit-docker = """
        docker run \
          --rm -i \
          ${"--user " + docker_user} \
          -v ${cwd}:${docker_cwd} \
          ${docker} \
          /bin/bash ${script}
        """

        # Root directory where Cromwell writes job results.  This directory must be
        # visible and writeable by the Cromwell process as well as the jobs that Cromwell
        # launches.
        root = "cromwell-executions"

        filesystems {
          local {
            localization: [
              "hard-link", "soft-link", "copy"
            ]

            caching {
              # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below:
              duplication-strategy: [
                "hard-link", "soft-link", "copy"
              ]

              # Possible values: file, path
              # "file" will compute an md5 hash of the file content.
              # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link",
              # in order to allow for the original file path to be hashed.
              hashing-strategy: "file"

              # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
              # If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
              check-sibling-md5: false
            }
          }
        }
      }
    }

    #TES {
    #  actor-factory = "cromwell.backend.impl.tes.TesBackendLifecycleActorFactory"
    #  config {
    #    root = "cromwell-executions"
    #    dockerRoot = "/cromwell-executions"
    #    endpoint = "http://127.0.0.1:9000/v1/jobs"
    #  }
    #}

    #SGE {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #
    #    # Limits the number of concurrent jobs
    #    concurrent-job-limit = 5
    #
    #    runtime-attributes = """
    #    Int cpu = 1
    #    Float? memory_gb
    #    String? sge_queue
    #    String? sge_project
    #    """
    #
    #    submit = """
    #    qsub \
    #    -terse \
    #    -V \
    #    -b n \
    #    -N ${job_name} \
    #    -wd ${cwd} \
    #    -o ${out} \
    #    -e ${err} \
    #    -pe smp ${cpu} \
    #    ${"-l mem_free=" + memory_gb + "g"} \
    #    ${"-q " + sge_queue} \
    #    ${"-P " + sge_project} \
    #    ${script}
    #    """
    #
    #    kill = "qdel ${job_id}"
    #    check-alive = "qstat -j ${job_id}"
    #    job-id-regex = "(\\d+)"
    #  }
    #}

    #LSF {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #    submit = "bsub -J ${job_name} -cwd ${cwd} -o ${out} -e ${err} /bin/bash ${script}"
    #    kill = "bkill ${job_id}"
    #    check-alive = "bjobs ${job_id}"
    #    job-id-regex = "Job <(\\d+)>.*"
    #  }
    #}

    SLURM {
      actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
      config {
        concurrent-job-limit = 500
        script-epilogue = ""
        runtime-attributes = """
        Int runtime_minutes = 720
        Int cpus = 1
        Int requested_memory_mb_per_core = 8000
        String queue = "short"
        """

        submit = """
            sbatch -J ${job_name} -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} -p ${queue} \
            ${"-n " + cpus} \
            --mem-per-cpu=${requested_memory_mb_per_core} \
            --constraint="groups" \
            --qos=ded_reich \
            --wrap "/bin/bash ${script}"
        """
        kill = "scancel ${job_id}"
        check-alive = "squeue -j ${job_id}"
        job-id-regex = "Submitted batch job (\\d+).*"

        filesystems {
          local {
            localization: [
              "hard-link", "soft-link", "copy"
            ]

            caching {
              # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below:
              duplication-strategy: [
                "soft-link"
              ]

              # Possible values: file, path
              # "file" will compute an md5 hash of the file content.
              # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link",
              # in order to allow for the original file path to be hashed.
              hashing-strategy: "path"

              # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
              # If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
              check-sibling-md5: false
            }
          }
        }
      }
    }

    # Example backend that _only_ runs workflows that specify docker for every command.
    #Docker {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #    run-in-background = true
    #    runtime-attributes = "String docker"
    #    submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash < ${script}"
    #  }
    #}

    #HtCondor {
    #  actor-factory = "cromwell.backend.impl.htcondor.HtCondorBackendFactory"
    #  config {
    #    # Root directory where Cromwell writes job results.  This directory must be
    #    # visible and writeable by the Cromwell process as well as the jobs that Cromwell
    #    # launches.
    #    root: "cromwell-executions"
    #
    #    #Placeholders:
    #    #1. Working directory.
    #    #2. Working directory volume.
    #    #3. Inputs volumes.
    #    #4. Output volume.
    #    #5. Docker image.
    #    #6. Job command.
    #    docker {
    #      #Allow soft links in dockerized jobs
    #      cmd = "docker run -w %s %s %s %s --rm %s /bin/bash -c \"%s\""
    #      defaultWorkingDir = "/workingDir/"
    #      defaultOutputDir = "/output/"
    #    }
    #
    #    cache {
    #      provider = "cromwell.backend.impl.htcondor.caching.provider.mongodb.MongoCacheActorFactory"
    #      enabled = false
    #      forceRewrite = false
    #      db {
    #        host = "127.0.0.1"
    #        port = 27017
    #        name = "htcondor"
    #        collection = "cache"
    #      }
    #    }
    #
    #    filesystems {
    #      local {
    #        localization: [
    #          "hard-link", "soft-link", "copy"
    #        ]
    #      }
    #    }
    #    # Time (in seconds) to wait before re-checking the status of the job again
    #    poll-interval = 3
    #  }
    #}

    #Spark {
    # actor-factory = "cromwell.backend.impl.spark.SparkBackendFactory"
    # config {
    #   # Root directory where Cromwell writes job results.  This directory must be
    #    # visible and writeable by the Cromwell process as well as the jobs that Cromwell
    #   # launches.
    #   root: "cromwell-executions"
    #
    #   filesystems {
    #     local {
    #       localization: [
    #         "hard-link", "soft-link", "copy"
    #       ]
    #     }
    #    }
    #      # change (master, deployMode) to (yarn, client), (yarn, cluster)
    #      #  or (spark://hostname:port, cluster) for spark standalone cluster mode
    #     master: "local"
    #     deployMode: "client"
    #  }
    # }

    #JES {
    #  actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory"
    #  config {
    #    # Google project
    #    project = "my-cromwell-workflows"
    #
    #    # Base bucket for workflow executions
    #    root = "gs://my-cromwell-workflows-bucket"
    #
    #    # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for
    #    # your project.
    #    #
    #    # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will
    #    # cause a drop in performance. Setting this value too high will cause QPS based locks from Google.
    #    # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds"
    #    # See https://cloud.google.com/genomics/quotas for more information
    #    genomics-api-queries-per-100-seconds = 1000
    #
    #    # Polling for completion backs-off gradually for slower-running jobs.
    #    # This is the maximum polling interval (in seconds):
    #    maximum-polling-interval = 600
    #
    #    # Optional Dockerhub Credentials. Can be used to access private docker images.
    #    dockerhub {
    #      # account = ""
    #      # token = ""
    #    }
    #
    #    genomics {
    #      # A reference to an auth defined in the `google` stanza at the top.  This auth is used to create
    #      # Pipelines and manipulate auth JSONs.
    #      auth = "application-default"
    #
    #      # Specifies the zone(s) to use for JES jobs unless overridden by a task's runtime attributes
    #      default-zones = ["us-central1-b"]
    #
    #      // alternative service account to use on the launched compute instance
    #      // NOTE: If combined with service account authorization, both that serivce account and this service account
    #      // must be able to read and write to the 'root' GCS path
    #      compute-service-account = "default"
    #
    #      # Endpoint for APIs, no reason to change this unless directed by Google.
    #      endpoint-url = "https://genomics.googleapis.com/"
    #    }
    #
    #    filesystems {
    #      gcs {
    #        # A reference to a potentially different auth for manipulating files via engine functions.
    #        auth = "application-default"
    #      }
    #    }
    #  }
    #}

    #AWS {
    #  actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory"
    #  config {
    #    ## These two settings are required to authenticate with the ECS service:
    #    accessKeyId = "..."
    #    secretKey = "..."
    #  }
    #}

  }
}

services {
  KeyValue {
    class = "cromwell.services.keyvalue.impl.SqlKeyValueServiceActor"
  }
  MetadataService {
    class = "cromwell.services.metadata.impl.MetadataServiceActor"
    config {
      # Set this value to "Inf" to turn off metadata summary refresh.  The default value is currently "2 seconds".
      # metadata-summary-refresh-interval = "Inf"
      # For higher scale environments, e.g. many workflows and/or jobs, DB write performance for metadata events
      # can improved by writing to the database in batches. Increasing this value can dramatically improve overall
      # performance but will both lead to a higher memory usage as well as increase the risk that metadata events
      # might not have been persisted in the event of a Cromwell crash.
      #
      # For normal usage the default value of 1 (effectively no batching) should be fine but for larger/production
      # environments we recommend a value of at least 500. There'll be no one size fits all number here so we recommend
      # benchmarking performance and tuning the value to match your environment
      # db-batch-size = 1
      #
      # Periodically the stored metadata events will be forcibly written to the DB regardless of if the batch size
      # has been reached. This is to prevent situations where events wind up never being written to an incomplete batch
      # with no new events being generated. The default value is currently 5 seconds
      # db-flush-rate = 5 seconds
    }
  }
}

database {
  # hsql default
  #driver = "slick.driver.HsqldbDriver$"
  #db {
  #  driver = "org.hsqldb.jdbcDriver"
  #  url = "jdbc:hsqldb:mem:${uniqueSchema};shutdown=false;hsqldb.tx=mvcc"
  #  connectionTimeout = 3000
  #}

  # mysql example
  profile = "slick.jdbc.MySQLProfile$"
  db {
    driver = "com.mysql.jdbc.Driver"
    url = "jdbc:mysql://dev.mysql.orchestra/devcromwellcc?rewriteBatchedStatements=true"
    user = ${USER}
    password = ${MYSQL_DB_PW}
    connectionTimeout = 5000
  }

  # For batch inserts the number of inserts to send to the DB at a time
  # insert-batch-size = 2000

  migration {
    # For databases with a very large number of symbols, selecting all the rows at once can generate a variety of
    # problems. In order to avoid any issue, the selection is paginated. This value sets how many rows should be
    # retrieved and processed at a time, before asking for the next chunk.
    read-batch-size = 100000

    # Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single
    # symbol row can vary from 1 to several thousands (or more). To keep the size of the insert batch from growing out
    # of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize.
    write-batch-size = 100000
  }
}