Mazyod
diff --git a/‎CLAUDE.md‎
Lines changed: 8 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/SEMANTIC_TOKENS.md‎
Lines changed: 59 additions & 0 deletions b/‎docs/SEMANTIC_TOKENS.md‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎examples/extract_semantic_legends.py‎
Lines changed: 15 additions & 11 deletions b/‎examples/extract_semantic_legends.py‎
Lines changed: 15 additions & 11 deletions
diff --git a/‎lsp_types/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎lsp_types/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lsp_types/pyrefly/backend.py‎
Lines changed: 5 additions & 0 deletions b/‎lsp_types/pyrefly/backend.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎lsp_types/pyright/backend.py‎
Lines changed: 4 additions & 0 deletions b/‎lsp_types/pyright/backend.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lsp_types/semantic_tokens.py‎
Lines changed: 185 additions & 0 deletions b/‎lsp_types/semantic_tokens.py‎
Lines changed: 185 additions & 0 deletions
@@ -70,6 +70,14 @@ This is a minimal-dependency Python library providing typed LSP (Language Server
 - Reusable across different LSP implementations (not just Pyright)
 - Handles process lifecycle: creation, reuse, idle cleanup, and shutdown
 
+**Semantic Tokens Normalization (`lsp_types/semantic_tokens.py`)**
+- `CANONICAL_LEGEND`: Fixed canonical legend for Monaco/editor integration
+- `CANONICAL_TOKEN_TYPES`, `CANONICAL_TOKEN_MODIFIERS`: LSP standard types/modifiers plus backend-specific
+- `build_type_mapping()`, `build_modifier_mapping()`: Create index mapping tables
+- `normalize_tokens()`: Remap token indices from backend-specific to canonical legend
+- `PYREFLY_LEGEND`: Hardcoded legend for Pyrefly (doesn't advertise via LSP)
+- Used by `Session.get_semantic_tokens(normalize=True)` for backend-agnostic tokens
+
 **Backend Integrations**
 
 **Pyright Integration (`lsp_types/pyright/`)**
 
@@ -205,6 +205,65 @@ The token types and modifiers must be registered in the **exact same order** as
 
 ---
 
+## Normalized Semantic Tokens API
+
+The library provides a **normalized tokens API** that remaps token indices to a canonical legend. This allows Monaco/editors to use a single fixed legend regardless of which backend is active.
+
+### The Problem
+
+Each backend has different legend ordering:
+
+| Token | Pyright Index | Pyrefly Index | ty Index |
+|-------|---------------|---------------|----------|
+| `namespace` | 0 | 0 | 0 |
+| `class` | 2 | 2 | 1 |
+| `variable` | 6 | 8 | 5 |
+| `function` | 9 | 12 | 7 |
+
+A Monaco client configured with one legend breaks when switching backends.
+
+### The Solution
+
+Use the `normalize=True` parameter to get tokens with indices remapped to the canonical legend:
+
+```python
+from lsp_types import Session, CANONICAL_LEGEND
+from lsp_types.pyright.backend import PyrightBackend
+
+session = await Session.create(PyrightBackend(), initial_code="x = 1")
+
+# Original tokens (backend-specific indices)
+raw = await session.get_semantic_tokens()
+
+# Normalized tokens (canonical indices matching CANONICAL_LEGEND)
+normalized = await session.get_semantic_tokens(normalize=True)
+
+# Monaco uses one fixed legend for all backends
+monaco_legend = CANONICAL_LEGEND
+```
+
+### Available Properties
+
+```python
+session.canonical_legend   # The canonical legend (fixed, same for all backends)
+session.backend_legend     # The original legend from the server/backend
+```
+
+### Canonical Legend Order
+
+The canonical legend follows LSP standard ordering, with backend-specific tokens appended:
+
+**Token Types (index 0-26):**
+- 0-22: LSP standard types (namespace, type, class, enum, interface, struct, typeParameter, parameter, variable, property, enumMember, event, function, method, macro, keyword, modifier, comment, string, number, regexp, operator, decorator)
+- 23: label (LSP standard)
+- 24-26: Backend-specific (selfParameter, clsParameter, builtinConstant)
+
+**Token Modifiers (bit 0-12):**
+- 0-9: LSP standard modifiers (declaration, definition, readonly, static, deprecated, abstract, async, modification, documentation, defaultLibrary)
+- 10-12: Backend-specific (builtin, classMember, parameter)
+
+---
+
 ## Updating This Document
 
 Run the extraction script to get the latest legends:
 
@@ -65,22 +65,26 @@ async def extract_legend(
 
         if semantic_provider is None:
             # Try requesting tokens anyway - some servers respond without advertising
-            await process.notify.did_open_text_document({
-                "textDocument": {
-                    "uri": f"file://{base_path}/test.py",
-                    "languageId": types.LanguageKind.Python,
-                    "version": 1,
-                    "text": "x = 1\n",
+            await process.notify.did_open_text_document(
+                {
+                    "textDocument": {
+                        "uri": f"file://{base_path}/test.py",
+                        "languageId": types.LanguageKind.Python,
+                        "version": 1,
+                        "text": "x = 1\n",
+                    }
                 }
-            })
+            )
             tokens = await asyncio.wait_for(
-                process.send.semantic_tokens_full({
-                    "textDocument": {"uri": f"file://{base_path}/test.py"}
-                }),
+                process.send.semantic_tokens_full(
+                    {"textDocument": {"uri": f"file://{base_path}/test.py"}}
+                ),
                 timeout=5.0,
             )
             if tokens and tokens.get("data"):
-                print(f"  {backend_name}: No legend advertised, but returns tokens (unusable without legend)")
+                print(
+                    f"  {backend_name}: No legend advertised, but returns tokens (unusable without legend)"
+                )
             else:
                 print(f"  {backend_name}: No semantic tokens provider")
             return None
 
@@ -2,6 +2,7 @@
 
 from . import methods  # noqa: F401
 from .requests import *  # noqa: F401, F403
+from .semantic_tokens import CANONICAL_LEGEND  # noqa: F401
 from .session import *  # noqa: F401, F403
 from .types import *  # noqa: F401, F403
 
 
@@ -7,6 +7,7 @@
 import lsp_types
 from lsp_types import types
 from lsp_types.process import ProcessLaunchInfo
+from lsp_types.semantic_tokens import PYREFLY_LEGEND
 from lsp_types.session import LSPBackend
 
 from .config_schema import Model as PyreflyConfig
@@ -79,3 +80,7 @@ def get_workspace_settings(
     ) -> types.DidChangeConfigurationParams:
         """Get workspace settings for didChangeConfiguration"""
         return {"settings": options}
+
+    def get_semantic_tokens_legend(self) -> types.SemanticTokensLegend | None:
+        """Pyrefly doesn't advertise legend via LSP, return hardcoded legend."""
+        return PYREFLY_LEGEND
@@ -79,3 +79,7 @@ def get_workspace_settings(
     ) -> types.DidChangeConfigurationParams:
         """Get workspace settings for didChangeConfiguration"""
         return {"settings": options}
+
+    def get_semantic_tokens_legend(self) -> types.SemanticTokensLegend | None:
+        """Pyright advertises legend via LSP, use server-provided."""
+        return None
@@ -0,0 +1,185 @@
+"""Canonical semantic token legend and normalization utilities."""
+
+from __future__ import annotations
+
+from . import types
+
+# Canonical token types (LSP standard order, then backend-specific)
+CANONICAL_TOKEN_TYPES: list[str] = [
+    # LSP standard (SemanticTokenTypes enum order)
+    "namespace",  # 0
+    "type",  # 1
+    "class",  # 2
+    "enum",  # 3
+    "interface",  # 4
+    "struct",  # 5
+    "typeParameter",  # 6
+    "parameter",  # 7
+    "variable",  # 8
+    "property",  # 9
+    "enumMember",  # 10
+    "event",  # 11
+    "function",  # 12
+    "method",  # 13
+    "macro",  # 14
+    "keyword",  # 15
+    "modifier",  # 16
+    "comment",  # 17
+    "string",  # 18
+    "number",  # 19
+    "regexp",  # 20
+    "operator",  # 21
+    "decorator",  # 22
+    "label",  # 23 (LSP standard)
+    # Backend-specific (appended)
+    "selfParameter",  # 24 (pyright, ty)
+    "clsParameter",  # 25 (pyright, ty)
+    "builtinConstant",  # 26 (ty)
+]
+
+# Canonical token modifiers (LSP standard order, then backend-specific)
+CANONICAL_TOKEN_MODIFIERS: list[str] = [
+    # LSP standard (SemanticTokenModifiers enum order)
+    "declaration",  # bit 0
+    "definition",  # bit 1
+    "readonly",  # bit 2
+    "static",  # bit 3
+    "deprecated",  # bit 4
+    "abstract",  # bit 5
+    "async",  # bit 6
+    "modification",  # bit 7
+    "documentation",  # bit 8
+    "defaultLibrary",  # bit 9
+    # Backend-specific (appended)
+    "builtin",  # bit 10 (pyright)
+    "classMember",  # bit 11 (pyright)
+    "parameter",  # bit 12 (pyright - modifier, not to be confused with type)
+]
+
+# The canonical legend for Monaco/editor integration
+CANONICAL_LEGEND: types.SemanticTokensLegend = {
+    "tokenTypes": CANONICAL_TOKEN_TYPES,
+    "tokenModifiers": CANONICAL_TOKEN_MODIFIERS,
+}
+
+# Build lookup tables for canonical indices
+_CANONICAL_TYPE_INDEX: dict[str, int] = {
+    name: idx for idx, name in enumerate(CANONICAL_TOKEN_TYPES)
+}
+_CANONICAL_MODIFIER_INDEX: dict[str, int] = {
+    name: idx for idx, name in enumerate(CANONICAL_TOKEN_MODIFIERS)
+}
+
+# Pyrefly legend (server doesn't advertise it via LSP)
+# Source: https://github.com/facebook/pyrefly/blob/main/pyrefly/lib/state/semantic_tokens.rs
+PYREFLY_LEGEND: types.SemanticTokensLegend = {
+    "tokenTypes": [
+        "namespace",  # 0
+        "type",  # 1
+        "class",  # 2
+        "enum",  # 3
+        "interface",  # 4
+        "struct",  # 5
+        "typeParameter",  # 6
+        "parameter",  # 7
+        "variable",  # 8
+        "property",  # 9
+        "enumMember",  # 10
+        "event",  # 11
+        "function",  # 12
+        "method",  # 13
+        "macro",  # 14
+        "keyword",  # 15
+        "modifier",  # 16
+        "comment",  # 17
+        "string",  # 18
+        "number",  # 19
+        "regexp",  # 20
+        "operator",  # 21
+        "decorator",  # 22
+    ],
+    "tokenModifiers": [
+        "declaration",  # bit 0
+        "definition",  # bit 1
+        "readonly",  # bit 2
+        "static",  # bit 3
+        "deprecated",  # bit 4
+        "abstract",  # bit 5
+        "async",  # bit 6
+        "modification",  # bit 7
+        "documentation",  # bit 8
+        "defaultLibrary",  # bit 9
+    ],
+}
+
+
+def build_type_mapping(backend_legend: types.SemanticTokensLegend) -> dict[int, int]:
+    """Build mapping from backend token type indices to canonical indices."""
+    mapping: dict[int, int] = {}
+    for backend_idx, type_name in enumerate(backend_legend["tokenTypes"]):
+        canonical_idx = _CANONICAL_TYPE_INDEX.get(type_name, -1)
+        mapping[backend_idx] = canonical_idx
+    return mapping
+
+
+def build_modifier_mapping(
+    backend_legend: types.SemanticTokensLegend,
+) -> dict[int, int]:
+    """Build mapping from backend modifier bit positions to canonical positions."""
+    mapping: dict[int, int] = {}
+    for backend_bit, modifier_name in enumerate(backend_legend["tokenModifiers"]):
+        canonical_bit = _CANONICAL_MODIFIER_INDEX.get(modifier_name, -1)
+        mapping[backend_bit] = canonical_bit
+    return mapping
+
+
+def normalize_tokens(
+    tokens: types.SemanticTokens,
+    type_map: dict[int, int],
+    modifier_map: dict[int, int],
+) -> types.SemanticTokens:
+    """Remap token indices to use canonical legend."""
+    data = tokens.get("data", [])
+    if not data:
+        return tokens
+
+    # Each token is 5 integers: deltaLine, deltaStart, length, typeIndex, modifiers
+    normalized_data: list[int] = []
+
+    for i in range(0, len(data), 5):
+        if i + 4 >= len(data):
+            break  # Incomplete token data
+
+        delta_line = data[i]
+        delta_start = data[i + 1]
+        length = data[i + 2]
+        type_index = data[i + 3]
+        modifier_bits = data[i + 4]
+
+        # Remap token type index
+        canonical_type = type_map.get(type_index, type_index)
+        if canonical_type == -1:
+            canonical_type = type_index  # Keep original if unknown
+
+        # Remap modifier bitmask
+        canonical_modifiers = 0
+        for backend_bit, canonical_bit in modifier_map.items():
+            if modifier_bits & (1 << backend_bit):
+                if canonical_bit >= 0:
+                    canonical_modifiers |= 1 << canonical_bit
+
+        normalized_data.extend(
+            [
+                delta_line,
+                delta_start,
+                length,
+                canonical_type,
+                canonical_modifiers,
+            ]
+        )
+
+    result: types.SemanticTokens = {"data": normalized_data}
+    if "resultId" in tokens:
+        result["resultId"] = tokens["resultId"]
+
+    return result