@@ -384,17 +384,17 @@ InfllmV2AttentionVarlen::InfllmV2AttentionVarlen(Tensor out,
384384}
385385
386386void InfllmV2AttentionVarlen::execute (Tensor out,
387- const Tensor &q,
388- const Tensor &k,
389- const Tensor &v,
390- const Tensor &cu_seqlens_q,
391- const Tensor &cu_seqlens_k,
392- int max_seqlen_q,
393- int max_seqlen_k,
394- float scale,
395- bool causal,
396- int window_size_left,
397- int window_size_right) {
387+ const Tensor &q,
388+ const Tensor &k,
389+ const Tensor &v,
390+ const Tensor &cu_seqlens_q,
391+ const Tensor &cu_seqlens_k,
392+ int max_seqlen_q,
393+ int max_seqlen_k,
394+ float scale,
395+ bool causal,
396+ int window_size_left,
397+ int window_size_right) {
398398 INFINICORE_GRAPH_OP_RECORD_OR_RUN (
399399 InfllmV2AttentionVarlen,
400400 out, q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, scale, causal, window_size_left, window_size_right);
@@ -491,14 +491,14 @@ InfllmV2AttentionKVCache::InfllmV2AttentionKVCache(Tensor out,
491491}
492492
493493void InfllmV2AttentionKVCache::execute (Tensor out,
494- const Tensor &q,
495- const Tensor &k_cache,
496- const Tensor &v_cache,
497- const Tensor &cache_lens,
498- float scale,
499- bool causal,
500- int window_size_left,
501- int window_size_right) {
494+ const Tensor &q,
495+ const Tensor &k_cache,
496+ const Tensor &v_cache,
497+ const Tensor &cache_lens,
498+ float scale,
499+ bool causal,
500+ int window_size_left,
501+ int window_size_right) {
502502 INFINICORE_GRAPH_OP_RECORD_OR_RUN (
503503 InfllmV2AttentionKVCache,
504504 out, q, k_cache, v_cache, cache_lens, scale, causal, window_size_left, window_size_right);
@@ -585,16 +585,16 @@ InfllmV2AttentionKVCacheUpdate::InfllmV2AttentionKVCacheUpdate(Tensor out,
585585}
586586
587587void InfllmV2AttentionKVCacheUpdate::execute (Tensor out,
588- const Tensor &q,
589- const Tensor &k_cache,
590- const Tensor &v_cache,
591- const Tensor &k_new,
592- const Tensor &v_new,
593- const Tensor &cache_lens,
594- float scale,
595- bool causal,
596- int window_size_left,
597- int window_size_right) {
588+ const Tensor &q,
589+ const Tensor &k_cache,
590+ const Tensor &v_cache,
591+ const Tensor &k_new,
592+ const Tensor &v_new,
593+ const Tensor &cache_lens,
594+ float scale,
595+ bool causal,
596+ int window_size_left,
597+ int window_size_right) {
598598 INFINICORE_GRAPH_OP_RECORD_OR_RUN (
599599 InfllmV2AttentionKVCacheUpdate,
600600 out, q, k_cache, v_cache, k_new, v_new, cache_lens, scale, causal, window_size_left, window_size_right);
0 commit comments