From 152593c5d440073907e4c3c6430f3c6ef2491990 Mon Sep 17 00:00:00 2001
From: "huangjintao.hjt" <huangjintao.hjt@alibaba-inc.com>
Date: Sat, 9 May 2026 14:48:43 +0800
Subject: [PATCH 1/7] support gemma4 megatron

---
 .../Instruction/Supported-models-and-datasets.md | 16 ++++++++--------
 .../Instruction/Supported-models-and-datasets.md | 16 ++++++++--------
 swift/megatron/utils/convert_utils.py            |  6 +++---
 swift/model/models/gemma.py                      |  1 +
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/docs/source/Instruction/Supported-models-and-datasets.md b/docs/source/Instruction/Supported-models-and-datasets.md
index 7115be751a..1b7369f945 100644
--- a/docs/source/Instruction/Supported-models-and-datasets.md
+++ b/docs/source/Instruction/Supported-models-and-datasets.md
@@ -1119,14 +1119,14 @@
 |[google/gemma-3n-E4B](https://modelscope.cn/models/google/gemma-3n-E4B)|gemma3n|gemma3n|transformers>=4.53.1|&#x2718;|-|[google/gemma-3n-E4B](https://huggingface.co/google/gemma-3n-E4B)|
 |[google/gemma-3n-E2B-it](https://modelscope.cn/models/google/gemma-3n-E2B-it)|gemma3n|gemma3n|transformers>=4.53.1|&#x2718;|-|[google/gemma-3n-E2B-it](https://huggingface.co/google/gemma-3n-E2B-it)|
 |[google/gemma-3n-E4B-it](https://modelscope.cn/models/google/gemma-3n-E4B-it)|gemma3n|gemma3n|transformers>=4.53.1|&#x2718;|-|[google/gemma-3n-E4B-it](https://huggingface.co/google/gemma-3n-E4B-it)|
-|[google/gemma-4-E2B](https://modelscope.cn/models/google/gemma-4-E2B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E2B](https://huggingface.co/google/gemma-4-E2B)|
-|[google/gemma-4-E2B-it](https://modelscope.cn/models/google/gemma-4-E2B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it)|
-|[google/gemma-4-E4B](https://modelscope.cn/models/google/gemma-4-E4B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E4B](https://huggingface.co/google/gemma-4-E4B)|
-|[google/gemma-4-E4B-it](https://modelscope.cn/models/google/gemma-4-E4B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E4B-it](https://huggingface.co/google/gemma-4-E4B-it)|
-|[google/gemma-4-31B](https://modelscope.cn/models/google/gemma-4-31B)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B)|
-|[google/gemma-4-31B-it](https://modelscope.cn/models/google/gemma-4-31B-it)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-31B-it](https://huggingface.co/google/gemma-4-31B-it)|
-|[google/gemma-4-26B-A4B](https://modelscope.cn/models/google/gemma-4-26B-A4B)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-26B-A4B](https://huggingface.co/google/gemma-4-26B-A4B)|
-|[google/gemma-4-26B-A4B-it](https://modelscope.cn/models/google/gemma-4-26B-A4B-it)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it)|
+|[google/gemma-4-E2B](https://modelscope.cn/models/google/gemma-4-E2B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E2B](https://huggingface.co/google/gemma-4-E2B)|
+|[google/gemma-4-E2B-it](https://modelscope.cn/models/google/gemma-4-E2B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it)|
+|[google/gemma-4-E4B](https://modelscope.cn/models/google/gemma-4-E4B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E4B](https://huggingface.co/google/gemma-4-E4B)|
+|[google/gemma-4-E4B-it](https://modelscope.cn/models/google/gemma-4-E4B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E4B-it](https://huggingface.co/google/gemma-4-E4B-it)|
+|[google/gemma-4-31B](https://modelscope.cn/models/google/gemma-4-31B)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B)|
+|[google/gemma-4-31B-it](https://modelscope.cn/models/google/gemma-4-31B-it)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-31B-it](https://huggingface.co/google/gemma-4-31B-it)|
+|[google/gemma-4-26B-A4B](https://modelscope.cn/models/google/gemma-4-26B-A4B)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-26B-A4B](https://huggingface.co/google/gemma-4-26B-A4B)|
+|[google/gemma-4-26B-A4B-it](https://modelscope.cn/models/google/gemma-4-26B-A4B-it)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it)|
 |[mistralai/Mistral-Small-3.1-24B-Base-2503](https://modelscope.cn/models/mistralai/Mistral-Small-3.1-24B-Base-2503)|mistral3|mistral_2503|transformers>=4.49|&#x2718;|vision|[mistralai/Mistral-Small-3.1-24B-Base-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503)|
 |[mistralai/Mistral-Small-3.1-24B-Instruct-2503](https://modelscope.cn/models/mistralai/Mistral-Small-3.1-24B-Instruct-2503)|mistral3|mistral_2503|transformers>=4.49|&#x2718;|vision|[mistralai/Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503)|
 |[mistralai/Ministral-3-3B-Base-2512](https://modelscope.cn/models/mistralai/Ministral-3-3B-Base-2512)|mistral3|mistral_2512|transformers>=5.0.0.dev0, mistral-common>=1.8.6|&#x2718;|vision|[mistralai/Ministral-3-3B-Base-2512](https://huggingface.co/mistralai/Ministral-3-3B-Base-2512)|
diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md
index c70e7b7aaa..4431217b5e 100644
--- a/docs/source_en/Instruction/Supported-models-and-datasets.md
+++ b/docs/source_en/Instruction/Supported-models-and-datasets.md
@@ -1120,14 +1120,14 @@ The table below introduces the models integrated with ms-swift:
 |[google/gemma-3n-E4B](https://modelscope.cn/models/google/gemma-3n-E4B)|gemma3n|gemma3n|transformers>=4.53.1|&#x2718;|-|[google/gemma-3n-E4B](https://huggingface.co/google/gemma-3n-E4B)|
 |[google/gemma-3n-E2B-it](https://modelscope.cn/models/google/gemma-3n-E2B-it)|gemma3n|gemma3n|transformers>=4.53.1|&#x2718;|-|[google/gemma-3n-E2B-it](https://huggingface.co/google/gemma-3n-E2B-it)|
 |[google/gemma-3n-E4B-it](https://modelscope.cn/models/google/gemma-3n-E4B-it)|gemma3n|gemma3n|transformers>=4.53.1|&#x2718;|-|[google/gemma-3n-E4B-it](https://huggingface.co/google/gemma-3n-E4B-it)|
-|[google/gemma-4-E2B](https://modelscope.cn/models/google/gemma-4-E2B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E2B](https://huggingface.co/google/gemma-4-E2B)|
-|[google/gemma-4-E2B-it](https://modelscope.cn/models/google/gemma-4-E2B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it)|
-|[google/gemma-4-E4B](https://modelscope.cn/models/google/gemma-4-E4B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E4B](https://huggingface.co/google/gemma-4-E4B)|
-|[google/gemma-4-E4B-it](https://modelscope.cn/models/google/gemma-4-E4B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2718;|-|[google/gemma-4-E4B-it](https://huggingface.co/google/gemma-4-E4B-it)|
-|[google/gemma-4-31B](https://modelscope.cn/models/google/gemma-4-31B)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B)|
-|[google/gemma-4-31B-it](https://modelscope.cn/models/google/gemma-4-31B-it)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-31B-it](https://huggingface.co/google/gemma-4-31B-it)|
-|[google/gemma-4-26B-A4B](https://modelscope.cn/models/google/gemma-4-26B-A4B)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-26B-A4B](https://huggingface.co/google/gemma-4-26B-A4B)|
-|[google/gemma-4-26B-A4B-it](https://modelscope.cn/models/google/gemma-4-26B-A4B-it)|gemma4|gemma4|transformers>=4.53|&#x2718;|-|[google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it)|
+|[google/gemma-4-E2B](https://modelscope.cn/models/google/gemma-4-E2B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E2B](https://huggingface.co/google/gemma-4-E2B)|
+|[google/gemma-4-E2B-it](https://modelscope.cn/models/google/gemma-4-E2B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it)|
+|[google/gemma-4-E4B](https://modelscope.cn/models/google/gemma-4-E4B)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E4B](https://huggingface.co/google/gemma-4-E4B)|
+|[google/gemma-4-E4B-it](https://modelscope.cn/models/google/gemma-4-E4B-it)|gemma4|gemma4_nothinking|transformers>=4.53|&#x2714;|-|[google/gemma-4-E4B-it](https://huggingface.co/google/gemma-4-E4B-it)|
+|[google/gemma-4-31B](https://modelscope.cn/models/google/gemma-4-31B)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B)|
+|[google/gemma-4-31B-it](https://modelscope.cn/models/google/gemma-4-31B-it)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-31B-it](https://huggingface.co/google/gemma-4-31B-it)|
+|[google/gemma-4-26B-A4B](https://modelscope.cn/models/google/gemma-4-26B-A4B)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-26B-A4B](https://huggingface.co/google/gemma-4-26B-A4B)|
+|[google/gemma-4-26B-A4B-it](https://modelscope.cn/models/google/gemma-4-26B-A4B-it)|gemma4|gemma4|transformers>=4.53|&#x2714;|-|[google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it)|
 |[mistralai/Mistral-Small-3.1-24B-Base-2503](https://modelscope.cn/models/mistralai/Mistral-Small-3.1-24B-Base-2503)|mistral3|mistral_2503|transformers>=4.49|&#x2718;|vision|[mistralai/Mistral-Small-3.1-24B-Base-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503)|
 |[mistralai/Mistral-Small-3.1-24B-Instruct-2503](https://modelscope.cn/models/mistralai/Mistral-Small-3.1-24B-Instruct-2503)|mistral3|mistral_2503|transformers>=4.49|&#x2718;|vision|[mistralai/Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503)|
 |[mistralai/Ministral-3-3B-Base-2512](https://modelscope.cn/models/mistralai/Ministral-3-3B-Base-2512)|mistral3|mistral_2512|transformers>=5.0.0.dev0, mistral-common>=1.8.6|&#x2718;|vision|[mistralai/Ministral-3-3B-Base-2512](https://huggingface.co/mistralai/Ministral-3-3B-Base-2512)|
diff --git a/swift/megatron/utils/convert_utils.py b/swift/megatron/utils/convert_utils.py
index e10c5cf3ce..f95c260f59 100644
--- a/swift/megatron/utils/convert_utils.py
+++ b/swift/megatron/utils/convert_utils.py
@@ -66,9 +66,9 @@ def _model_cpu_forward_context(modules,
         origin_torch_dtype = next(modules[0].parameters()).dtype
     except StopIteration:
         origin_torch_dtype = next(modules[-1].parameters()).dtype
-    embedding = None
+    embeddings = None
     if share_embedding:
-        embedding = [module for module in modules if isinstance(module, (nn.Embedding, VocabParallelEmbedding))][-1]
+        embeddings = [module for module in modules if isinstance(module, (nn.Embedding, VocabParallelEmbedding))]
 
     def _to_cuda_hook(module, args):
         if compute_device is not None or torch_dtype is not None:
@@ -77,7 +77,7 @@ def _to_cuda_hook(module, args):
         return args
 
     def _to_cpu_hook(module, args, output):
-        if share_embedding and module is embedding:
+        if share_embedding and module in embeddings:
             return
         module.to(device=target_device, dtype=origin_torch_dtype)
 
diff --git a/swift/model/models/gemma.py b/swift/model/models/gemma.py
index a1b6e3a9dc..4d1eedf357 100644
--- a/swift/model/models/gemma.py
+++ b/swift/model/models/gemma.py
@@ -263,6 +263,7 @@ def forward(
 
         if self.config.get_text_config().hidden_size_per_layer_input:
             pad_embedding = self.language_model.embed_tokens.weight[self.config.text_config.pad_token_id, :]
+            pad_embedding = pad_embedding.to(multimodal_mask.device)
             llm_inputs_embeds = torch.where(multimodal_mask[..., None], pad_embedding.view(1, 1, -1), inputs_embeds)
             per_layer_inputs = self.language_model.get_per_layer_inputs(llm_input_ids, llm_inputs_embeds)
         else:

From d7c2f289d728f4387b5712fe507e8ad0b2e834e6 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 19 May 2026 02:00:26 +0800
Subject: [PATCH 2/7] update

---
 swift/megatron/utils/convert_utils.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/swift/megatron/utils/convert_utils.py b/swift/megatron/utils/convert_utils.py
index 4b487737a6..7b589b84ed 100644
--- a/swift/megatron/utils/convert_utils.py
+++ b/swift/megatron/utils/convert_utils.py
@@ -62,10 +62,13 @@ def _model_cpu_forward_context(modules,
                                compute_device=None,
                                share_embedding: bool = False,
                                target_device='cpu'):
-    try:
-        origin_torch_dtype = next(modules[0].parameters()).dtype
-    except StopIteration:
-        origin_torch_dtype = next(modules[-1].parameters()).dtype
+    for module in modules:
+        try:
+            origin_torch_dtype = next(module.parameters()).dtype
+        except StopIteration:
+            pass
+        else:
+            break
     embeddings = None
     if share_embedding:
         embeddings = [module for module in modules if isinstance(module, (nn.Embedding, VocabParallelEmbedding))]
@@ -77,7 +80,7 @@ def _to_cuda_hook(module, args):
         return args
 
     def _to_cpu_hook(module, args, output):
-        if share_embedding and module in embeddings:
+        if share_embedding and module in embeddings or 'rotaryemb' in module.__class__.__name__.lower():
             return
         module.to(device=target_device, dtype=origin_torch_dtype)
 

From 96ff1662beef0a4345270fa633cdac76a7e5c46b Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 19 May 2026 15:57:08 +0800
Subject: [PATCH 3/7] update

---
 examples/models/gemma4/megatron.sh | 50 ++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 examples/models/gemma4/megatron.sh

diff --git a/examples/models/gemma4/megatron.sh b/examples/models/gemma4/megatron.sh
new file mode 100644
index 0000000000..df9f4c6f30
--- /dev/null
+++ b/examples/models/gemma4/megatron.sh
@@ -0,0 +1,50 @@
+# 8 * 80GiB
+PYTORCH_CUDA_ALLOC_CONF='expandable_segments:True' \
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+megatron sft \
+    --model google/gemma-4-26B-A4B-it \
+    --save_safetensors true \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+              'AI-ModelScope/LaTeX_OCR:human_handwrite#2000' \
+    --load_from_cache_file true \
+    --add_non_thinking_prefix true \
+    --split_dataset_ratio 0.01 \
+    --tuner_type full \
+    --tensor_model_parallel_size 2 \
+    --expert_model_parallel_size 4 \
+    --pipeline_model_parallel_size 2 \
+    --moe_permute_fusion true \
+    --moe_grouped_gemm true \
+    --moe_shared_expert_overlap true \
+    --moe_aux_loss_coeff 1e-6 \
+    --micro_batch_size 4 \
+    --global_batch_size 16 \
+    --recompute_granularity full \
+    --recompute_method uniform \
+    --recompute_num_layers 1 \
+    --num_train_epochs 1 \
+    --finetune true \
+    --freeze_llm false \
+    --freeze_vit true \
+    --freeze_aligner true \
+    --cross_entropy_loss_fusion true \
+    --lr 1e-5 \
+    --lr_warmup_fraction 0.05 \
+    --min_lr 1e-6 \
+    --output_dir megatron_output/gemma-4-26B-A4B-it \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --max_length 4096 \
+    --dataloader_num_workers 8 \
+    --dataset_num_proc 8 \
+    --no_save_optim true \
+    --no_save_rng true \
+    --sequence_parallel true \
+    --attention_backend unfused \
+    --group_by_length true \
+    --padding_free false \
+    --model_author swift \
+    --model_name swift-robot

From fd783fc610d78238a0914d3e72cc1860c10c6d5a Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 19 May 2026 16:00:45 +0800
Subject: [PATCH 4/7] update

---
 examples/models/gemma4/megatron.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/models/gemma4/megatron.sh b/examples/models/gemma4/megatron.sh
index df9f4c6f30..4458b3b762 100644
--- a/examples/models/gemma4/megatron.sh
+++ b/examples/models/gemma4/megatron.sh
@@ -20,7 +20,7 @@ megatron sft \
     --moe_grouped_gemm true \
     --moe_shared_expert_overlap true \
     --moe_aux_loss_coeff 1e-6 \
-    --micro_batch_size 4 \
+    --micro_batch_size 8 \
     --global_batch_size 16 \
     --recompute_granularity full \
     --recompute_method uniform \

From c8aa1c140f198114d0c38b337114ba9522fb10ef Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 19 May 2026 16:02:08 +0800
Subject: [PATCH 5/7] update

---
 examples/models/gemma4/megatron.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/models/gemma4/megatron.sh b/examples/models/gemma4/megatron.sh
index 4458b3b762..b301e59e78 100644
--- a/examples/models/gemma4/megatron.sh
+++ b/examples/models/gemma4/megatron.sh
@@ -1,4 +1,6 @@
 # 8 * 80GiB
+# Due to the use of group_by_length, the data is not sufficiently shuffled,
+# which may cause fluctuations in the loss curve. Please adjust the parameters accordingly.
 PYTORCH_CUDA_ALLOC_CONF='expandable_segments:True' \
 NPROC_PER_NODE=8 \
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \

From 845ad34b5cfa1bbb0cab152845b79bbbdccc2d3d Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 19 May 2026 16:09:23 +0800
Subject: [PATCH 6/7] update

---
 examples/models/gemma4/megatron.sh | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/examples/models/gemma4/megatron.sh b/examples/models/gemma4/megatron.sh
index b301e59e78..97294e1ea8 100644
--- a/examples/models/gemma4/megatron.sh
+++ b/examples/models/gemma4/megatron.sh
@@ -37,8 +37,8 @@ megatron sft \
     --lr_warmup_fraction 0.05 \
     --min_lr 1e-6 \
     --output_dir megatron_output/gemma-4-26B-A4B-it \
-    --eval_steps 200 \
-    --save_steps 200 \
+    --eval_steps 500 \
+    --save_steps 500 \
     --max_length 4096 \
     --dataloader_num_workers 8 \
     --dataset_num_proc 8 \
@@ -50,3 +50,10 @@ megatron sft \
     --padding_free false \
     --model_author swift \
     --model_name swift-robot
+
+# CUDA_VISIBLE_DEVICES=0 swift infer \
+#     --model megatron_output/gemma-4-26B-A4B-it/vx-xxx/checkpoint-xxx \
+#     --stream true \
+#     --enable_thinking false \
+#     --load_data_args true \
+#     --max_new_tokens 2048

From 72a258dc998fbe811d74a6d0f3da99ec41b3bb51 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 19 May 2026 16:48:09 +0800
Subject: [PATCH 7/7] update

---
 examples/models/gemma4/{megatron.sh => mcore.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/models/gemma4/{megatron.sh => mcore.sh} (100%)

diff --git a/examples/models/gemma4/megatron.sh b/examples/models/gemma4/mcore.sh
similarity index 100%
rename from examples/models/gemma4/megatron.sh
rename to examples/models/gemma4/mcore.sh