From a7cc976d3b1901aa08834aee831db67fcbf1f22b Mon Sep 17 00:00:00 2001
From: James Nguyen <jamesamn@google.com>
Date: Thu, 21 May 2026 14:52:33 -0700
Subject: [PATCH 1/5] test: add gemini cli skill content preservation test

Add a new integration test to verify that skill contents are preserved
when copied to the fake home sandbox.

TAG=agy
CONV=a151643e-4527-4bbe-9908-a1f4f33be7e3
---
 evalbench/test/gemini_cli_test.py | 146 ++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 evalbench/test/gemini_cli_test.py

diff --git a/evalbench/test/gemini_cli_test.py b/evalbench/test/gemini_cli_test.py
new file mode 100644
index 00000000..defda624
--- /dev/null
+++ b/evalbench/test/gemini_cli_test.py
@@ -0,0 +1,146 @@
+import os
+import pytest
+from unittest.mock import patch, MagicMock
+from generators.models.gemini_cli import GeminiCliGenerator
+
+
+@patch('generators.models.gemini_cli.logging')
+@patch('generators.models.gemini_cli.subprocess.run')
+@patch('generators.models.gemini_cli.os.path.exists')
+@patch('generators.models.gemini_cli.shutil.copytree')
+@patch('generators.models.gemini_cli.shutil.rmtree')
+def test_setup_single_skill_string(
+    mock_rmtree, 
+    mock_copytree, 
+    mock_exists, 
+    mock_run,
+    mock_logging
+):
+    """Test that a single skill defined as a string is copied to fake home."""
+    print("\n--- STARTING TEST: test_setup_single_skill_string ---")
+    
+    real_home = "/home/jamesamn"
+    real_skill_path = os.path.join(real_home, ".gemini", "skills", "my-single-skill")
+    print(f"[Setup] Real home directory set to: {real_home}")
+    print(f"[Setup] Expected path of the skill in real home: {real_skill_path}")
+    
+    # We mock 'os.path.exists' because we don't want the test to actually look 
+    # at your real disk. We want to simulate different scenarios.
+    def exists_side_effect(path):
+        print(f"[Mock os.path.exists] Checking if path exists: {path}")
+        if path == real_skill_path:
+            print(f"[Mock os.path.exists] Path MATCHES real skill path! Returning True (Simulating it exists).")
+            return True
+        print(f"[Mock os.path.exists] Path does NOT match. Returning False.")
+        return False
+    mock_exists.side_effect = exists_side_effect
+
+    # We mock 'subprocess.run' because during setup, the generator tries to
+    # run 'gcloud auth print-access-token' to authenticate NPM.
+    # We simulate a successful run returning a fake token.
+    mock_run.return_value = MagicMock(returncode=0, stdout="fake-token")
+    print("[Mock subprocess.run] Configured to simulate successful 'gcloud auth' (returns 'fake-token')")
+
+    # This is the configuration we are passing to the generator.
+    # We are telling it to set up a skill named "my-single-skill".
+    config = {
+        "setup": {
+            "skills": ["my-single-skill"]
+        }
+    }
+    print(f"[Config] Generator configuration: {config}")
+
+    print("[Execution] Initializing GeminiCliGenerator (this will trigger _setup and _setup_skills)...")
+    # We patch 'os.makedirs' and 'open' to prevent the generator from actually 
+    # creating folders and files on your disk during this test.
+    with patch('generators.models.gemini_cli.os.makedirs'), \
+         patch('generators.models.gemini_cli.open', create=True), \
+         patch.dict(os.environ, {"HOME": real_home}):
+        
+        GeminiCliGenerator(config)
+    print("[Execution] GeminiCliGenerator initialization complete.")
+
+    # Now we verify if the generator did what we expected.
+    # It should have copied the skill from the 'real home' to the 'fake home'.
+    expected_fake_home = os.path.abspath(os.path.join(".venv", "fake_home"))
+    expected_fake_skill_path = os.path.join(expected_fake_home, ".gemini", "skills", "my-single-skill")
+    
+    print(f"[Assertion] Checking if shutil.copytree was called to copy the skill...")
+    print(f"[Assertion] Expected Source (Real): {real_skill_path}")
+    print(f"[Assertion] Expected Destination (Fake): {expected_fake_skill_path}")
+
+    try:
+        mock_copytree.assert_called_once_with(real_skill_path, expected_fake_skill_path)
+        print("[Assertion] SUCCESS: shutil.copytree was called with the exact expected paths!")
+    except AssertionError as e:
+        print("[Assertion] FAILED: shutil.copytree was NOT called as expected!")
+        raise e
+
+    print("[Assertion] Checking if correct log message was logged...")
+    try:
+        mock_logging.info.assert_any_call("Syncing skill: my-single-skill")
+        print("[Assertion] SUCCESS: Log message 'Syncing skill: my-single-skill' was found!")
+    except AssertionError as e:
+        print("[Assertion] FAILED: Expected log message was NOT found!")
+        raise e
+        
+    print("--- END OF TEST: test_setup_single_skill_string ---")
+
+
+@patch('generators.models.gemini_cli.subprocess.run')
+def test_skill_content_preserved(mock_run, tmp_path, monkeypatch):
+    """Test that the content of the skill is preserved when copied to fake home."""
+    print("\n--- STARTING TEST: test_skill_content_preserved ---")
+
+    # 1. Setup paths in temp directory
+    real_home = tmp_path / "real_home"
+    real_home.mkdir()
+
+    skill_name = "my-content-skill"
+    real_skill_dir = real_home / ".gemini" / "skills" / skill_name
+    real_skill_dir.mkdir(parents=True)
+
+    # Create a file inside the skill with some content
+    skill_file = real_skill_dir / "secret.txt"
+    expected_content = "password is xyz"
+    skill_file.write_text(expected_content)
+
+    print(f"[Setup] Created real home at: {real_home}")
+    print(f"[Setup] Created skill at: {real_skill_dir}")
+    print(f"[Setup] Created skill file with content: {expected_content}")
+
+    # 2. Mock subprocess.run for gcloud auth
+    mock_run.return_value = MagicMock(returncode=0, stdout="fake-token")
+    print("[Mock subprocess.run] Configured to simulate successful 'gcloud auth'")
+
+    # 3. Configure CWD and HOME env var to use temp directory
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setenv("HOME", str(real_home))
+    print(f"[Setup] Changed CWD to: {tmp_path}")
+    print(f"[Setup] Set HOME to: {real_home}")
+
+    config = {
+        "setup": {
+            "skills": [skill_name]
+        }
+    }
+
+    print("[Execution] Initializing GeminiCliGenerator (real file operations)...")
+    # We do NOT patch os.makedirs, open, shutil, or os.path.exists here.
+    # They will run for real inside the tmp_path.
+    GeminiCliGenerator(config)
+    print("[Execution] GeminiCliGenerator initialization complete.")
+
+    # 4. Verify if the file was copied and content preserved
+    expected_fake_skill_file = tmp_path / ".venv" / "fake_home" / ".gemini" / "skills" / skill_name / "secret.txt"
+
+    print(f"[Assertion] Checking if copied file exists at: {expected_fake_skill_file}")
+    assert expected_fake_skill_file.exists(), f"Copied skill file not found at {expected_fake_skill_file}"
+    print("[Assertion] SUCCESS: Copied skill file exists!")
+
+    print("[Assertion] Checking if content is preserved...")
+    actual_content = expected_fake_skill_file.read_text()
+    assert actual_content == expected_content, f"Content mismatch. Expected: '{expected_content}', Got: '{actual_content}'"
+    print(f"[Assertion] SUCCESS: Content matches! Got: '{actual_content}'")
+
+    print("--- END OF TEST: test_skill_content_preserved ---")

From 35d1d730abc87c32bda63a72091b2238f3f67396 Mon Sep 17 00:00:00 2001
From: James Nguyen <jamesamn@google.com>
Date: Tue, 26 May 2026 02:29:05 +0000
Subject: [PATCH 2/5] test: add more integration tests for gemini cli skills

TAG=agy
CONV=aa927cc7-418a-41e3-b658-9b82915e18eb
---
 evalbench/test/gemini_cli_test.py | 408 ++++++++++++++++++++++--------
 1 file changed, 309 insertions(+), 99 deletions(-)

diff --git a/evalbench/test/gemini_cli_test.py b/evalbench/test/gemini_cli_test.py
index defda624..8164cd1b 100644
--- a/evalbench/test/gemini_cli_test.py
+++ b/evalbench/test/gemini_cli_test.py
@@ -1,98 +1,57 @@
 import os
+import sys
+from unittest.mock import MagicMock, patch
+
 import pytest
-from unittest.mock import patch, MagicMock
+
+                                         
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 from generators.models.gemini_cli import GeminiCliGenerator
 
 
-@patch('generators.models.gemini_cli.logging')
 @patch('generators.models.gemini_cli.subprocess.run')
 @patch('generators.models.gemini_cli.os.path.exists')
 @patch('generators.models.gemini_cli.shutil.copytree')
 @patch('generators.models.gemini_cli.shutil.rmtree')
 def test_setup_single_skill_string(
-    mock_rmtree, 
-    mock_copytree, 
-    mock_exists, 
+    mock_rmtree,
+    mock_copytree,
+    mock_exists,
     mock_run,
-    mock_logging
+    monkeypatch,
 ):
-    """Test that a single skill defined as a string is copied to fake home."""
-    print("\n--- STARTING TEST: test_setup_single_skill_string ---")
-    
-    real_home = "/home/jamesamn"
+    """A string entry in setup.skills triggers a copytree from real to fake home."""
+    real_home = "/fake/real_home"
     real_skill_path = os.path.join(real_home, ".gemini", "skills", "my-single-skill")
-    print(f"[Setup] Real home directory set to: {real_home}")
-    print(f"[Setup] Expected path of the skill in real home: {real_skill_path}")
-    
-    # We mock 'os.path.exists' because we don't want the test to actually look 
-    # at your real disk. We want to simulate different scenarios.
-    def exists_side_effect(path):
-        print(f"[Mock os.path.exists] Checking if path exists: {path}")
-        if path == real_skill_path:
-            print(f"[Mock os.path.exists] Path MATCHES real skill path! Returning True (Simulating it exists).")
-            return True
-        print(f"[Mock os.path.exists] Path does NOT match. Returning False.")
-        return False
-    mock_exists.side_effect = exists_side_effect
-
-    # We mock 'subprocess.run' because during setup, the generator tries to
-    # run 'gcloud auth print-access-token' to authenticate NPM.
-    # We simulate a successful run returning a fake token.
+
+    mock_exists.side_effect = lambda path: path == real_skill_path
     mock_run.return_value = MagicMock(returncode=0, stdout="fake-token")
-    print("[Mock subprocess.run] Configured to simulate successful 'gcloud auth' (returns 'fake-token')")
 
-    # This is the configuration we are passing to the generator.
-    # We are telling it to set up a skill named "my-single-skill".
-    config = {
-        "setup": {
-            "skills": ["my-single-skill"]
-        }
-    }
-    print(f"[Config] Generator configuration: {config}")
-
-    print("[Execution] Initializing GeminiCliGenerator (this will trigger _setup and _setup_skills)...")
-    # We patch 'os.makedirs' and 'open' to prevent the generator from actually 
-    # creating folders and files on your disk during this test.
-    with patch('generators.models.gemini_cli.os.makedirs'), \
-         patch('generators.models.gemini_cli.open', create=True), \
-         patch.dict(os.environ, {"HOME": real_home}):
-        
+    config = {"setup": {"skills": ["my-single-skill"]}}
+
+    monkeypatch.setenv("HOME", real_home)
+    with (
+        patch('generators.models.gemini_cli.os.makedirs'),
+        patch('generators.models.gemini_cli.open', create=True),
+    ):
         GeminiCliGenerator(config)
-    print("[Execution] GeminiCliGenerator initialization complete.")
 
-    # Now we verify if the generator did what we expected.
-    # It should have copied the skill from the 'real home' to the 'fake home'.
     expected_fake_home = os.path.abspath(os.path.join(".venv", "fake_home"))
-    expected_fake_skill_path = os.path.join(expected_fake_home, ".gemini", "skills", "my-single-skill")
-    
-    print(f"[Assertion] Checking if shutil.copytree was called to copy the skill...")
-    print(f"[Assertion] Expected Source (Real): {real_skill_path}")
-    print(f"[Assertion] Expected Destination (Fake): {expected_fake_skill_path}")
-
-    try:
-        mock_copytree.assert_called_once_with(real_skill_path, expected_fake_skill_path)
-        print("[Assertion] SUCCESS: shutil.copytree was called with the exact expected paths!")
-    except AssertionError as e:
-        print("[Assertion] FAILED: shutil.copytree was NOT called as expected!")
-        raise e
-
-    print("[Assertion] Checking if correct log message was logged...")
-    try:
-        mock_logging.info.assert_any_call("Syncing skill: my-single-skill")
-        print("[Assertion] SUCCESS: Log message 'Syncing skill: my-single-skill' was found!")
-    except AssertionError as e:
-        print("[Assertion] FAILED: Expected log message was NOT found!")
-        raise e
-        
-    print("--- END OF TEST: test_setup_single_skill_string ---")
+    expected_fake_skill_path = os.path.join(
+        expected_fake_home, ".gemini", "skills", "my-single-skill"
+    )
+    mock_copytree.assert_called_once_with(real_skill_path, expected_fake_skill_path)
 
 
 @patch('generators.models.gemini_cli.subprocess.run')
 def test_skill_content_preserved(mock_run, tmp_path, monkeypatch):
-    """Test that the content of the skill is preserved when copied to fake home."""
-    print("\n--- STARTING TEST: test_skill_content_preserved ---")
+    """End-to-end: a skill file's contents survive copy into the fake-home sandbox.
 
-    # 1. Setup paths in temp directory
+    Exercises the full GeminiCliGenerator.__init__ -> _setup -> _setup_skills
+    path with real filesystem operations confined to tmp_path. Only subprocess
+    (gcloud auth) is mocked.
+    """
     real_home = tmp_path / "real_home"
     real_home.mkdir()
 
@@ -100,47 +59,298 @@ def test_skill_content_preserved(mock_run, tmp_path, monkeypatch):
     real_skill_dir = real_home / ".gemini" / "skills" / skill_name
     real_skill_dir.mkdir(parents=True)
 
-    # Create a file inside the skill with some content
     skill_file = real_skill_dir / "secret.txt"
-    expected_content = "password is xyz"
+    expected_content = "hello world"
     skill_file.write_text(expected_content)
 
-    print(f"[Setup] Created real home at: {real_home}")
-    print(f"[Setup] Created skill at: {real_skill_dir}")
-    print(f"[Setup] Created skill file with content: {expected_content}")
-
-    # 2. Mock subprocess.run for gcloud auth
     mock_run.return_value = MagicMock(returncode=0, stdout="fake-token")
-    print("[Mock subprocess.run] Configured to simulate successful 'gcloud auth'")
 
-    # 3. Configure CWD and HOME env var to use temp directory
     monkeypatch.chdir(tmp_path)
     monkeypatch.setenv("HOME", str(real_home))
-    print(f"[Setup] Changed CWD to: {tmp_path}")
-    print(f"[Setup] Set HOME to: {real_home}")
+
+    GeminiCliGenerator({"setup": {"skills": [skill_name]}})
+
+    expected_fake_skill_file = (
+        tmp_path / ".venv" / "fake_home" / ".gemini" / "skills" / skill_name / "secret.txt"
+    )
+    assert expected_fake_skill_file.exists(), (
+        f"Copied skill file not found at {expected_fake_skill_file}"
+    )
+    assert expected_fake_skill_file.read_text() == expected_content
+
+
+@patch('generators.models.gemini_cli.subprocess.run')
+@patch('generators.models.gemini_cli.os.path.exists')
+@patch('generators.models.gemini_cli.shutil.copytree')
+@patch('generators.models.gemini_cli.shutil.rmtree')
+def test_setup_multiple_skills_string(
+    mock_rmtree,
+    mock_copytree,
+    mock_exists,
+    mock_run,
+    monkeypatch,
+):
+    """Multiple string entries in setup.skills trigger copies for each."""
+    real_home = "/fake/real_home"
+    skill_a_path = os.path.join(real_home, ".gemini", "skills", "skill-A")
+    skill_b_path = os.path.join(real_home, ".gemini", "skills", "skill-B")
+
+                                    
+    mock_exists.side_effect = lambda path: path in (skill_a_path, skill_b_path)
+    mock_run.return_value = MagicMock(returncode=0, stdout="fake-token")
+
+    config = {"setup": {"skills": ["skill-A", "skill-B"]}}
+
+    monkeypatch.setenv("HOME", real_home)
+    with (
+        patch('generators.models.gemini_cli.os.makedirs'),
+        patch('generators.models.gemini_cli.open', create=True),
+    ):
+        GeminiCliGenerator(config)
+
+    expected_fake_home = os.path.abspath(os.path.join(".venv", "fake_home"))
+    expected_fake_a = os.path.join(expected_fake_home, ".gemini", "skills", "skill-A")
+    expected_fake_b = os.path.join(expected_fake_home, ".gemini", "skills", "skill-B")
+
+                                   
+    assert mock_copytree.call_count == 2
+    mock_copytree.assert_any_call(skill_a_path, expected_fake_a)
+    mock_copytree.assert_any_call(skill_b_path, expected_fake_b)
+
+
+@patch('generators.models.gemini_cli.subprocess.run')
+@patch('generators.models.gemini_cli.os.path.exists')
+def test_setup_skill_dict_link(
+    mock_exists,
+    mock_run,
+    monkeypatch,
+):
+    """A dict entry with action 'link' triggers npm exec gemini-cli skills link."""
+    real_home = "/fake/real_home"
+    mock_exists.return_value = False
+    mock_run.return_value = MagicMock(returncode=0, stdout="success")
+
+    config = {
+        "gemini_cli_version": "gemini-cli@0.36.0",
+        "setup": {
+            "skills": [
+                {
+                    "action": "link",
+                    "path": "/path/to/my-skill"
+                }
+            ]
+        }
+    }
+
+    monkeypatch.setenv("HOME", real_home)
+    with (
+        patch('generators.models.gemini_cli.os.makedirs'),
+        patch('generators.models.gemini_cli.open', create=True),
+    ):
+        GeminiCliGenerator(config)
+
+    assert mock_run.call_count == 3
+    calls = [call[0][0] for call in mock_run.call_args_list]
+    
+    expected_cmd = [
+        "npm",
+        "exec",
+        "--yes",
+        "gemini-cli@0.36.0",
+        "--",
+        "skills",
+        "link",
+        "/path/to/my-skill",
+        "--consent",
+    ]
+    assert expected_cmd in calls
+
+
+@patch('generators.models.gemini_cli.subprocess.run')
+@patch('generators.models.gemini_cli.os.path.exists')
+def test_setup_skill_dict_install_by_path(
+    mock_exists,
+    mock_run,
+    monkeypatch,
+):
+    """A dict entry with action 'install' and path triggers npm exec gemini-cli skills install <path>."""
+    real_home = "/fake/real_home"
+    mock_exists.return_value = False
+    mock_run.return_value = MagicMock(returncode=0, stdout="success")
+
+    config = {
+        "gemini_cli_version": "gemini-cli@0.36.0",
+        "setup": {
+            "skills": [
+                {
+                    "action": "install",
+                    "path": "/path/to/my-skill"
+                }
+            ]
+        }
+    }
+
+    monkeypatch.setenv("HOME", real_home)
+    with (
+        patch('generators.models.gemini_cli.os.makedirs'),
+        patch('generators.models.gemini_cli.open', create=True),
+    ):
+        GeminiCliGenerator(config)
+
+    assert mock_run.call_count == 3
+    calls = [call[0][0] for call in mock_run.call_args_list]
+    
+    expected_cmd = [
+        "npm",
+        "exec",
+        "--yes",
+        "gemini-cli@0.36.0",
+        "--",
+        "skills",
+        "install",
+        "/path/to/my-skill",
+        "--consent",
+    ]
+    assert expected_cmd in calls
+
+
+@patch('generators.models.gemini_cli.subprocess.run')
+@patch('generators.models.gemini_cli.os.path.exists')
+def test_setup_skill_dict_install_by_name(
+    mock_exists,
+    mock_run,
+    monkeypatch,
+):
+    """A dict entry with action 'install' and name triggers npm exec gemini-cli skills install <name>."""
+    real_home = "/fake/real_home"
+    mock_exists.return_value = False
+    mock_run.return_value = MagicMock(returncode=0, stdout="success")
+
+    config = {
+        "gemini_cli_version": "gemini-cli@0.36.0",
+        "setup": {
+            "skills": [
+                {
+                    "action": "install",
+                    "name": "my-skill-package"
+                }
+            ]
+        }
+    }
+
+    monkeypatch.setenv("HOME", real_home)
+    with (
+        patch('generators.models.gemini_cli.os.makedirs'),
+        patch('generators.models.gemini_cli.open', create=True),
+    ):
+        GeminiCliGenerator(config)
+
+    assert mock_run.call_count == 3
+    calls = [call[0][0] for call in mock_run.call_args_list]
+    
+    expected_cmd = [
+        "npm",
+        "exec",
+        "--yes",
+        "gemini-cli@0.36.0",
+        "--",
+        "skills",
+        "install",
+        "my-skill-package",
+        "--consent",
+    ]
+    assert expected_cmd in calls
+
+
+@patch('generators.models.gemini_cli.subprocess.run')
+@patch('generators.models.gemini_cli.os.path.exists')
+def test_setup_skill_dict_enable(
+    mock_exists,
+    mock_run,
+    monkeypatch,
+):
+    """A dict entry with action 'enable' triggers npm exec gemini-cli skills enable <name>."""
+    real_home = "/fake/real_home"
+    mock_exists.return_value = False
+    mock_run.return_value = MagicMock(returncode=0, stdout="success")
 
     config = {
+        "gemini_cli_version": "gemini-cli@0.36.0",
         "setup": {
-            "skills": [skill_name]
+            "skills": [
+                {
+                    "action": "enable",
+                    "name": "my-skill"
+                }
+            ]
         }
     }
 
-    print("[Execution] Initializing GeminiCliGenerator (real file operations)...")
-    # We do NOT patch os.makedirs, open, shutil, or os.path.exists here.
-    # They will run for real inside the tmp_path.
-    GeminiCliGenerator(config)
-    print("[Execution] GeminiCliGenerator initialization complete.")
+    monkeypatch.setenv("HOME", real_home)
+    with (
+        patch('generators.models.gemini_cli.os.makedirs'),
+        patch('generators.models.gemini_cli.open', create=True),
+    ):
+        GeminiCliGenerator(config)
+
+    assert mock_run.call_count == 3
+    calls = [call[0][0] for call in mock_run.call_args_list]
+    
+    expected_cmd = [
+        "npm",
+        "exec",
+        "--yes",
+        "gemini-cli@0.36.0",
+        "--",
+        "skills",
+        "enable",
+        "my-skill",
+    ]
+    assert expected_cmd in calls
+
 
-    # 4. Verify if the file was copied and content preserved
-    expected_fake_skill_file = tmp_path / ".venv" / "fake_home" / ".gemini" / "skills" / skill_name / "secret.txt"
+@patch('generators.models.gemini_cli.subprocess.run')
+@patch('generators.models.gemini_cli.os.path.exists')
+def test_setup_skill_dict_disable(
+    mock_exists,
+    mock_run,
+    monkeypatch,
+):
+    """A dict entry with action 'disable' triggers npm exec gemini-cli skills disable <name>."""
+    real_home = "/fake/real_home"
+    mock_exists.return_value = False
+    mock_run.return_value = MagicMock(returncode=0, stdout="success")
 
-    print(f"[Assertion] Checking if copied file exists at: {expected_fake_skill_file}")
-    assert expected_fake_skill_file.exists(), f"Copied skill file not found at {expected_fake_skill_file}"
-    print("[Assertion] SUCCESS: Copied skill file exists!")
+    config = {
+        "gemini_cli_version": "gemini-cli@0.36.0",
+        "setup": {
+            "skills": [
+                {
+                    "action": "disable",
+                    "name": "my-skill"
+                }
+            ]
+        }
+    }
 
-    print("[Assertion] Checking if content is preserved...")
-    actual_content = expected_fake_skill_file.read_text()
-    assert actual_content == expected_content, f"Content mismatch. Expected: '{expected_content}', Got: '{actual_content}'"
-    print(f"[Assertion] SUCCESS: Content matches! Got: '{actual_content}'")
+    monkeypatch.setenv("HOME", real_home)
+    with (
+        patch('generators.models.gemini_cli.os.makedirs'),
+        patch('generators.models.gemini_cli.open', create=True),
+    ):
+        GeminiCliGenerator(config)
 
-    print("--- END OF TEST: test_skill_content_preserved ---")
+    assert mock_run.call_count == 3
+    calls = [call[0][0] for call in mock_run.call_args_list]
+    
+    expected_cmd = [
+        "npm",
+        "exec",
+        "--yes",
+        "gemini-cli@0.36.0",
+        "--",
+        "skills",
+        "disable",
+        "my-skill",
+    ]
+    assert expected_cmd in calls

From a082b8c817444ecad85443160c4e14912fbb6a86 Mon Sep 17 00:00:00 2001
From: James Nguyen <jamesamn@google.com>
Date: Tue, 26 May 2026 17:19:27 +0000
Subject: [PATCH 3/5] refactor: simplify skill content preservation test

TAG=agy
CONV=aa927cc7-418a-41e3-b658-9b82915e18eb
---
 evalbench/test/gemini_cli_test.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/evalbench/test/gemini_cli_test.py b/evalbench/test/gemini_cli_test.py
index 8164cd1b..9d3a8897 100644
--- a/evalbench/test/gemini_cli_test.py
+++ b/evalbench/test/gemini_cli_test.py
@@ -44,14 +44,13 @@ def test_setup_single_skill_string(
     mock_copytree.assert_called_once_with(real_skill_path, expected_fake_skill_path)
 
 
-@patch('generators.models.gemini_cli.subprocess.run')
-def test_skill_content_preserved(mock_run, tmp_path, monkeypatch):
-    """End-to-end: a skill file's contents survive copy into the fake-home sandbox.
+def test_skill_content_preserved(tmp_path, monkeypatch):
+    """Unit test: a skill file's contents survive copy into the fake-home sandbox.
 
-    Exercises the full GeminiCliGenerator.__init__ -> _setup -> _setup_skills
-    path with real filesystem operations confined to tmp_path. Only subprocess
-    (gcloud auth) is mocked.
+    Directly tests the file copying step in _setup_skills without running
+    the full generator initialization or NPM/extension setup pipelines.
     """
+    # 1. Setup paths in temp directory
     real_home = tmp_path / "real_home"
     real_home.mkdir()
 
@@ -63,13 +62,19 @@ def test_skill_content_preserved(mock_run, tmp_path, monkeypatch):
     expected_content = "hello world"
     skill_file.write_text(expected_content)
 
-    mock_run.return_value = MagicMock(returncode=0, stdout="fake-token")
-
+    # 2. Configure HOME and CWD env var to use temp directory
     monkeypatch.chdir(tmp_path)
     monkeypatch.setenv("HOME", str(real_home))
 
-    GeminiCliGenerator({"setup": {"skills": [skill_name]}})
+    # 3. Initialize generator with EMPTY config (bypasses _setup)
+    # This sets up paths like self.skills_dir but does NOT run _setup_npm_auth
+    # or list extensions, meaning we don't even need to mock subprocess.run!
+    generator = GeminiCliGenerator({})
+
+    # 4. Directly call the target method under test
+    generator._setup_skills([skill_name])
 
+    # 5. Verify if the file was copied and content preserved
     expected_fake_skill_file = (
         tmp_path / ".venv" / "fake_home" / ".gemini" / "skills" / skill_name / "secret.txt"
     )

From a8ffc38aa0236a8786c372c7080b481f3cd2fd2c Mon Sep 17 00:00:00 2001
From: James Nguyen <jamesamn@google.com>
Date: Tue, 26 May 2026 17:30:35 +0000
Subject: [PATCH 4/5] style: remove docstrings and comments from tests

TAG=agy
CONV=aa927cc7-418a-41e3-b658-9b82915e18eb
---
 evalbench/test/gemini_cli_test.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/evalbench/test/gemini_cli_test.py b/evalbench/test/gemini_cli_test.py
index 9d3a8897..c84ff3d4 100644
--- a/evalbench/test/gemini_cli_test.py
+++ b/evalbench/test/gemini_cli_test.py
@@ -4,7 +4,6 @@
 
 import pytest
 
-                                         
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from generators.models.gemini_cli import GeminiCliGenerator
@@ -21,7 +20,6 @@ def test_setup_single_skill_string(
     mock_run,
     monkeypatch,
 ):
-    """A string entry in setup.skills triggers a copytree from real to fake home."""
     real_home = "/fake/real_home"
     real_skill_path = os.path.join(real_home, ".gemini", "skills", "my-single-skill")
 
@@ -45,12 +43,6 @@ def test_setup_single_skill_string(
 
 
 def test_skill_content_preserved(tmp_path, monkeypatch):
-    """Unit test: a skill file's contents survive copy into the fake-home sandbox.
-
-    Directly tests the file copying step in _setup_skills without running
-    the full generator initialization or NPM/extension setup pipelines.
-    """
-    # 1. Setup paths in temp directory
     real_home = tmp_path / "real_home"
     real_home.mkdir()
 
@@ -62,19 +54,13 @@ def test_skill_content_preserved(tmp_path, monkeypatch):
     expected_content = "hello world"
     skill_file.write_text(expected_content)
 
-    # 2. Configure HOME and CWD env var to use temp directory
     monkeypatch.chdir(tmp_path)
     monkeypatch.setenv("HOME", str(real_home))
 
-    # 3. Initialize generator with EMPTY config (bypasses _setup)
-    # This sets up paths like self.skills_dir but does NOT run _setup_npm_auth
-    # or list extensions, meaning we don't even need to mock subprocess.run!
     generator = GeminiCliGenerator({})
 
-    # 4. Directly call the target method under test
     generator._setup_skills([skill_name])
 
-    # 5. Verify if the file was copied and content preserved
     expected_fake_skill_file = (
         tmp_path / ".venv" / "fake_home" / ".gemini" / "skills" / skill_name / "secret.txt"
     )
@@ -95,12 +81,10 @@ def test_setup_multiple_skills_string(
     mock_run,
     monkeypatch,
 ):
-    """Multiple string entries in setup.skills trigger copies for each."""
     real_home = "/fake/real_home"
     skill_a_path = os.path.join(real_home, ".gemini", "skills", "skill-A")
     skill_b_path = os.path.join(real_home, ".gemini", "skills", "skill-B")
 
-                                    
     mock_exists.side_effect = lambda path: path in (skill_a_path, skill_b_path)
     mock_run.return_value = MagicMock(returncode=0, stdout="fake-token")
 
@@ -117,7 +101,6 @@ def test_setup_multiple_skills_string(
     expected_fake_a = os.path.join(expected_fake_home, ".gemini", "skills", "skill-A")
     expected_fake_b = os.path.join(expected_fake_home, ".gemini", "skills", "skill-B")
 
-                                   
     assert mock_copytree.call_count == 2
     mock_copytree.assert_any_call(skill_a_path, expected_fake_a)
     mock_copytree.assert_any_call(skill_b_path, expected_fake_b)
@@ -130,7 +113,6 @@ def test_setup_skill_dict_link(
     mock_run,
     monkeypatch,
 ):
-    """A dict entry with action 'link' triggers npm exec gemini-cli skills link."""
     real_home = "/fake/real_home"
     mock_exists.return_value = False
     mock_run.return_value = MagicMock(returncode=0, stdout="success")
@@ -178,7 +160,6 @@ def test_setup_skill_dict_install_by_path(
     mock_run,
     monkeypatch,
 ):
-    """A dict entry with action 'install' and path triggers npm exec gemini-cli skills install <path>."""
     real_home = "/fake/real_home"
     mock_exists.return_value = False
     mock_run.return_value = MagicMock(returncode=0, stdout="success")
@@ -226,7 +207,6 @@ def test_setup_skill_dict_install_by_name(
     mock_run,
     monkeypatch,
 ):
-    """A dict entry with action 'install' and name triggers npm exec gemini-cli skills install <name>."""
     real_home = "/fake/real_home"
     mock_exists.return_value = False
     mock_run.return_value = MagicMock(returncode=0, stdout="success")
@@ -274,7 +254,6 @@ def test_setup_skill_dict_enable(
     mock_run,
     monkeypatch,
 ):
-    """A dict entry with action 'enable' triggers npm exec gemini-cli skills enable <name>."""
     real_home = "/fake/real_home"
     mock_exists.return_value = False
     mock_run.return_value = MagicMock(returncode=0, stdout="success")
@@ -321,7 +300,6 @@ def test_setup_skill_dict_disable(
     mock_run,
     monkeypatch,
 ):
-    """A dict entry with action 'disable' triggers npm exec gemini-cli skills disable <name>."""
     real_home = "/fake/real_home"
     mock_exists.return_value = False
     mock_run.return_value = MagicMock(returncode=0, stdout="success")

From 81d78723c5b142ad622f1355009c884e91094efd Mon Sep 17 00:00:00 2001
From: James Nguyen <jamesamn@google.com>
Date: Tue, 26 May 2026 18:21:53 +0000
Subject: [PATCH 5/5] style: fix trailing whitespaces on blank lines

TAG=agy
CONV=aa927cc7-418a-41e3-b658-9b82915e18eb
---
 evalbench/test/gemini_cli_test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/evalbench/test/gemini_cli_test.py b/evalbench/test/gemini_cli_test.py
index c84ff3d4..ce2552bf 100644
--- a/evalbench/test/gemini_cli_test.py
+++ b/evalbench/test/gemini_cli_test.py
@@ -138,7 +138,7 @@ def test_setup_skill_dict_link(
 
     assert mock_run.call_count == 3
     calls = [call[0][0] for call in mock_run.call_args_list]
-    
+
     expected_cmd = [
         "npm",
         "exec",
@@ -185,7 +185,7 @@ def test_setup_skill_dict_install_by_path(
 
     assert mock_run.call_count == 3
     calls = [call[0][0] for call in mock_run.call_args_list]
-    
+
     expected_cmd = [
         "npm",
         "exec",
@@ -232,7 +232,7 @@ def test_setup_skill_dict_install_by_name(
 
     assert mock_run.call_count == 3
     calls = [call[0][0] for call in mock_run.call_args_list]
-    
+
     expected_cmd = [
         "npm",
         "exec",
@@ -279,7 +279,7 @@ def test_setup_skill_dict_enable(
 
     assert mock_run.call_count == 3
     calls = [call[0][0] for call in mock_run.call_args_list]
-    
+
     expected_cmd = [
         "npm",
         "exec",
@@ -325,7 +325,7 @@ def test_setup_skill_dict_disable(
 
     assert mock_run.call_count == 3
     calls = [call[0][0] for call in mock_run.call_args_list]
-    
+
     expected_cmd = [
         "npm",
         "exec",