Skip to content

Commit 68b4944

Browse files
authored
feat: add TextMate document partitioner (#952)
1 parent 9b45b60 commit 68b4944

30 files changed

Lines changed: 2226 additions & 85 deletions

File tree

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# TM Partitioner (IDocumentPartitioner)
2+
3+
## Overview
4+
5+
- Partitioning ID: `tm4e.partitioning`
6+
- Partition type naming: `tm4e:<root-scope>` derived from the active TextMate grammar's base root scope.
7+
- Examples: `tm4e:text.html`, `tm4e:source.js`, `tm4e:source.css`.
8+
- Base partition type:
9+
- Before a grammar is known, the synthetic base type `tm4e:base` is used.
10+
- After activation with a grammar, the base becomes `tm4e:<root-scope>` (for example, HTML base `tm4e:text.html`).
11+
- Installed automatically by `org.eclipse.tm4e.ui` for any document that resolves a TextMate grammar. It is added as a secondary partitioning and does not replace an editor's default partitioner.
12+
13+
## Partition Semantics
14+
15+
- `computePartitioning(offset, length)` returns regions that cover the entire requested range contiguously. Gaps between known embedded regions are filled with the current base type.
16+
- `getPartition(offset)` clamps the offset to the valid range `[0, docLength-1]`. Negative offsets resolve to the first region; EOF resolves to the last region.
17+
- Regions implement `ITMPartitionRegion` (extends `ITypedRegion`) and carry language metadata:
18+
- `getGrammarScope()` returns the effective grammar scope such as `source.js` or `text.html`.
19+
20+
## How to Consume from Code
21+
22+
Get partitions from the TM4E partitioner via `IDocumentExtension3`:
23+
```java
24+
var ext3 = (IDocumentExtension3) doc;
25+
var partitioner = ext3.getDocumentPartitioner(org.eclipse.tm4e.ui.text.TMPartitions.TM_PARTITIONING /* or "tm4e.partitioning" */);
26+
ITypedRegion[] regions = partitioner.computePartitioning(offset, length);
27+
```
28+
29+
### Example: Content Assist / Feature Code
30+
31+
```java
32+
import org.eclipse.tm4e.ui.text.ITMPartitionRegion;
33+
import org.eclipse.tm4e.ui.text.ITMPartitioner;
34+
import org.eclipse.tm4e.ui.text.TMPartitions;
35+
36+
// ...
37+
38+
IDocument doc = viewer.getDocument();
39+
if (doc instanceof IDocumentExtension3 ext3) {
40+
if (ext3.getDocumentPartitioner(TMPartitions.TM_PARTITIONING) instanceof ITMPartitioner tmPartitioner) {
41+
ITypedRegion r = tmPartitioner.getPartition(caretOffset);
42+
// Prefer matching by normalized language scope (handles embedded variants)
43+
switch(r.getGrammarScope()) {
44+
case "source.js": // offer JavaScript proposals
45+
case "source.css": // offer CSS proposals
46+
}
47+
}
48+
}
49+
```
50+
51+
## Matching by Scope (Recommended)
52+
53+
- Partition type strings are base-root only (for example, `tm4e:source.js`, `tm4e:text.html`). If you need to distinguish embedded variants (for example, JavaScript-in-Markdown), use `ITMPartitionRegion.getGrammarScope()` which carries the normalized full scope (such as `source.js`).
54+
- For most feature switches, detect the language via the normalized scope name from `getGrammarScope()` as shown in the example. This is stable for families like `source.css`, `source.js`, or `text.html`.
55+
56+
## Mapping to Content Types (Alternative)
57+
58+
- If your feature keys off `IContentType`, translate the partition at the caret offset:
59+
60+
```java
61+
IContentType[] cts = org.eclipse.tm4e.ui.text.TMPartitions.getContentTypesForOffset(doc, caretOffset);
62+
```
63+
64+
## Generic Editor Contributions
65+
66+
- Register your feature with the Generic Editor as usual (e.g., content assist, hovers). Inside your implementation, use the code above to query TM4E’s partitioning and branch behavior based on the normalized scope (recommended) or the partition type.
67+
- You do not need to install a partitioner yourself; `org.eclipse.tm4e.ui` installs it automatically when a grammar exists.
68+
69+
Example plugin.xml (content assist):
70+
71+
```xml
72+
<extension point="org.eclipse.ui.genericeditor.contentAssistProcessors">
73+
<contentAssistProcessor
74+
class="com.example.MyTM4EAwareAssist"
75+
contentType="org.eclipse.core.runtime.text"
76+
targetId="org.eclipse.ui.genericeditor.GenericEditor"/>
77+
<!-- partitionType here refers to the editor's default partitioning;
78+
to use TM4E partitions, query tm4e.partitioning from your code. -->
79+
</extension>
80+
```
81+
82+
## Utilities
83+
84+
- Check if a document has a TM4E partitioner installed:
85+
86+
```java
87+
boolean installed = org.eclipse.tm4e.ui.text.TMPartitions.hasPartitioning(doc);
88+
```
89+
90+
- Convenience to fetch the TM4E partition at an offset (null if none):
91+
92+
```java
93+
ITypedRegion part = org.eclipse.tm4e.ui.text.TMPartitions.getPartition(doc, caretOffset);
94+
```

org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,8 @@ private synchronized <T> TokenizeLineResult<T> _tokenize(
318318
false,
319319
null,
320320
scopeList,
321-
scopeList);
321+
scopeList,
322+
null);
322323
} else {
323324
isFirstLine = false;
324325
prevState.reset();

org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,15 +190,24 @@ private void scanNext() {
190190
final StateStack beforePush = stack;
191191
// push it on the stack rule
192192
final var scopeName = rule.getName(lineText.content, captureIndices);
193-
final var nameScopesList = castNonNull(stack.contentNameScopesList).pushAttributed(scopeName, grammar);
193+
194+
// custom tm4e code - not from upstream (for TMPartitioner)
195+
// make sure origin grammar scope is present when external
196+
AttributedScopeStack baseNames = castNonNull(stack.contentNameScopesList);
197+
final var nameScopesList = baseNames.pushAttributed(scopeName, grammar);
198+
final String frameGrammarScope = (rule.grammarScope != null && !rule.grammarScope.equals(grammar.getScopeName()))
199+
? rule.grammarScope
200+
: stack.grammarScope;
201+
194202
stack = stack.push(
195203
matchedRuleId,
196204
linePos,
197205
anchorPosition,
198206
captureIndices[0].end == lineLength,
199207
null,
200208
nameScopesList,
201-
nameScopesList);
209+
nameScopesList,
210+
frameGrammarScope);
202211

203212
if (rule instanceof final BeginEndRule pushedRule) {
204213
/*if(LOGGER.isLoggable(DEBUG)) {
@@ -218,7 +227,19 @@ private void scanNext() {
218227

219228
final var contentName = pushedRule.getContentName(lineText.content, captureIndices);
220229
final var contentNameScopesList = nameScopesList.pushAttributed(contentName, grammar);
221-
stack = stack.withContentNameScopesList(contentNameScopesList);
230+
231+
// custom tm4e code - not from upstream (for TMPartitioner)
232+
// If contentName indicates an embedded root (e.g., source.* or text.*) distinct from the grammar base,
233+
// propagate that as the effective grammar scope for this frame so that non-retokenized tokens (e.g., whitespace)
234+
// still carry the embedded grammar.
235+
String derivedGrammarScope = stack.grammarScope;
236+
if (contentName != null) {
237+
final String candidate = pickRootScopeFromContentName(contentName);
238+
if (candidate != null && !candidate.equals(grammar.getScopeName())) {
239+
derivedGrammarScope = candidate;
240+
}
241+
}
242+
stack = stack.withContentNameScopesListAndGrammarScope(contentNameScopesList, derivedGrammarScope);
222243

223244
if (pushedRule.endHasBackReferences) {
224245
stack = stack.withEndRule(
@@ -307,6 +328,31 @@ private void scanNext() {
307328
}
308329
}
309330

331+
/**
332+
* Extract a root scope candidate from a contentName string, which may contain multiple scopes separated by spaces.
333+
* Returns the first token that starts with "source." or "text.", or null if none found.
334+
*/
335+
private @Nullable String pickRootScopeFromContentName(final String contentName) {
336+
// custom tm4e code - not from upstream (for TMPartitioner)
337+
final int len = contentName.length();
338+
int i = 0;
339+
while (i < len) {
340+
while (i < len && contentName.charAt(i) == ' ')
341+
i++;
342+
int j = i;
343+
while (j < len && contentName.charAt(j) != ' ')
344+
j++;
345+
if (j > i) {
346+
final String part = contentName.substring(i, j);
347+
if (part.startsWith("source.") || part.startsWith("text.")) {
348+
return part;
349+
}
350+
}
351+
i = j + 1;
352+
}
353+
return null;
354+
}
355+
310356
private @Nullable MatchResult matchRule(final Grammar grammar, final OnigString lineText, final boolean isFirstLine, final int linePos,
311357
final StateStack stack, final int anchorPosition) {
312358
final var rule = stack.getRule(grammar);
@@ -460,13 +506,22 @@ private void handleCaptures(final Grammar grammar, final OnigString lineText, fi
460506
if (retokenizeCapturedWithRuleId.notEquals(RuleId.NO_RULE)) {
461507
// the capture requires additional matching
462508
final var scopeName = captureRule.getName(lineTextContent, captureIndices);
463-
final var nameScopesList = castNonNull(stack.contentNameScopesList).pushAttributed(scopeName, grammar);
509+
510+
// custom tm4e code - not from upstream (for TMPartitioner)
511+
AttributedScopeStack baseNames = castNonNull(stack.contentNameScopesList);
512+
final var retokenizeRule = grammar.getRule(retokenizeCapturedWithRuleId);
513+
final var grammarScope = retokenizeRule.grammarScope;
514+
final var nameScopesList = baseNames.pushAttributed(scopeName, grammar);
515+
final String frameGrammarScope = (grammarScope != null && !grammarScope.equals(grammar.getScopeName()))
516+
? grammarScope
517+
: stack.grammarScope;
518+
464519
final var contentName = captureRule.getContentName(lineTextContent, captureIndices);
465520
final var contentNameScopesList = nameScopesList.pushAttributed(contentName, grammar);
466521

467522
// the capture requires additional matching
468523
final var stackClone = stack.push(retokenizeCapturedWithRuleId, captureIndex.start, -1, false, null, nameScopesList,
469-
contentNameScopesList);
524+
contentNameScopesList, frameGrammarScope);
470525
final var onigSubStr = OnigString.of(lineTextContent.substring(0, captureIndex.end));
471526
tokenizeString(grammar, onigSubStr, isFirstLine && captureIndex.start == 0, captureIndex.start, stackClone, lineTokens,
472527
false, Duration.ZERO /* no time limit */);

org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokens.java

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,24 @@
3737
* "https://github.com/microsoft/vscode-textmate/blob/167bbbd509356cc4617f250c0d754aef670ab14a/src/grammar/grammar.ts#L945">
3838
* github.com/microsoft/vscode-textmate/blob/main/src/grammar/grammar.ts</a>
3939
*/
40-
final class LineTokens {
40+
public final class LineTokens {
4141

42-
private static final class Token implements IToken {
43-
private int startIndex;
44-
private final int endIndex;
45-
private final List<String> scopes;
42+
public static final class Token implements IToken {
43+
public int startIndex;
44+
public final int endIndex;
45+
public final List<String> scopes;
4646

47-
Token(final int startIndex, final int endIndex, final List<String> scopes) {
47+
/**
48+
* Optional grammar root scope for this token (e.g., "source.js" or "text.html.basic").
49+
* Implementations may return null when not available.
50+
*/
51+
public final @Nullable String grammarScope; // custom tm4e code - not from upstream (for TMPartitioner)
52+
53+
Token(final int startIndex, final int endIndex, final List<String> scopes, final @Nullable String grammarScope) {
4854
this.startIndex = startIndex;
4955
this.endIndex = endIndex;
5056
this.scopes = scopes;
57+
this.grammarScope = grammarScope;
5158
}
5259

5360
@Override
@@ -104,7 +111,7 @@ public String toString() {
104111

105112
private static final Logger LOGGER = System.getLogger(LineTokens.class.getName());
106113

107-
private static final Deque<IToken> EMPTY_DEQUE = new ArrayDeque<>(0);
114+
private static final Deque<Token> EMPTY_DEQUE = new ArrayDeque<>(0);
108115

109116
private final boolean _emitBinaryTokens;
110117

@@ -116,14 +123,15 @@ public String toString() {
116123
/**
117124
* used only if `emitBinaryTokens` is false.
118125
*/
119-
private final Deque<IToken> _tokens;
126+
private final Deque<Token> _tokens;
120127

121128
/**
122129
* used only if `emitBinaryTokens` is true.
123130
*/
124131
private final List<Integer> _binaryTokens;
125132

126133
private int _lastTokenEndIndex = 0;
134+
private @Nullable String _currentGrammarScope; // custom tm4e code - not from upstream (for TMPartitioner)
127135

128136
private final List<TokenTypeMatcher> _tokenTypeOverrides;
129137

@@ -148,6 +156,7 @@ public String toString() {
148156
}
149157

150158
void produce(final StateStack stack, final int endIndex) {
159+
this._currentGrammarScope = stack.grammarScope;
151160
this.produceFromScopes(stack.contentNameScopesList, endIndex);
152161
}
153162

@@ -233,24 +242,24 @@ void produceFromScopes(final @Nullable AttributedScopeStack scopesList, final in
233242
}
234243
}
235244

236-
this._tokens.add(new Token(_lastTokenEndIndex, endIndex, scopes));
245+
this._tokens.add(new Token(_lastTokenEndIndex, endIndex, scopes, _currentGrammarScope));
237246

238247
this._lastTokenEndIndex = endIndex;
239248
}
240249

241250
IToken[] getResult(final StateStack stack, final int lineLength) {
242-
if (!this._tokens.isEmpty() && this._tokens.getLast().getStartIndex() == lineLength - 1) {
251+
if (!this._tokens.isEmpty() && this._tokens.getLast().startIndex == lineLength - 1) {
243252
// pop produced token for newline
244253
this._tokens.removeLast();
245254
}
246255

247256
if (this._tokens.isEmpty()) {
248257
this._lastTokenEndIndex = -1;
249258
this.produce(stack, lineLength);
250-
this._tokens.getLast().setStartIndex(0);
259+
this._tokens.getLast().startIndex = 0;
251260
}
252261

253-
return this._tokens.toArray(IToken[]::new);
262+
return this._tokens.toArray(Token[]::new);
254263
}
255264

256265
int[] getBinaryResult(final StateStack stack, final int lineLength) {

0 commit comments

Comments
 (0)