Skip to content

Commit 4e63364

Browse files
committed
resolve merge conflicts
2 parents 98d6942 + 75fc4de commit 4e63364

9 files changed

Lines changed: 355 additions & 56 deletions

File tree

.github/workflows/sync-extension.yml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ jobs:
2525
uses: actions/checkout@v4
2626
with:
2727
token: ${{ secrets.GITHUB_TOKEN }}
28+
fetch-depth: 0 # Fetch all history for proper branching
2829

2930
- name: Set up Node.js
3031
uses: actions/setup-node@v4
@@ -62,7 +63,7 @@ jobs:
6263
mkdir -p extension-temp
6364
cd extension-temp
6465
65-
# Download individual files from release (reliable method)
66+
# Download individual files from release (reliable method - no zip)
6667
echo "📁 Downloading individual files from release..."
6768
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \
6869
"https://api.github.com/repos/$REPO/releases/tags/$TAG" | \
@@ -148,6 +149,11 @@ jobs:
148149
cp extension-temp/extension-package/pkg/*.d.ts src/extension/pkg/ 2>/dev/null || echo "⚠️ Type definitions not found"
149150
fi
150151
152+
# Verify copied files
153+
echo "📋 Copied files:"
154+
ls -la src/extension/
155+
ls -la src/extension/pkg/ 2>/dev/null || echo "⚠️ pkg directory not created"
156+
151157
- name: Check for changes
152158
if: steps.release.outputs.skip != 'true'
153159
id: changes
@@ -167,7 +173,9 @@ jobs:
167173
if: steps.release.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
168174
uses: peter-evans/create-pull-request@v5
169175
with:
170-
token: ${{ secrets.GITHUB_TOKEN }}
176+
# Use PR_TOKEN if available (for repos with org restrictions), otherwise use GITHUB_TOKEN
177+
# To use PAT: create secret named PR_TOKEN with a Personal Access Token that has 'repo' scope
178+
token: ${{ secrets.PR_TOKEN }}
171179
commit-message: "chore: sync extension files from sentience-chrome ${{ steps.release.outputs.tag }}"
172180
title: "Sync Extension: ${{ steps.release.outputs.tag }}"
173181
body: |
@@ -177,7 +185,10 @@ jobs:
177185
- Extension manifest and scripts
178186
- WASM binary and bindings
179187
180-
**Source:** [sentience-chrome release ${{ steps.release.outputs.tag }}](${{ secrets.SENTIENCE_CHROME_REPO }}/releases/tag/${{ steps.release.outputs.tag }})
188+
**Source:** [sentience-chrome release ${{ steps.release.outputs.tag }}](https://github.com/${{ secrets.SENTIENCE_CHROME_REPO }}/releases/tag/${{ steps.release.outputs.tag }})
181189
branch: sync-extension-${{ steps.release.outputs.tag }}
182190
delete-branch: true
191+
labels: |
192+
automated
193+
extension-sync
183194

README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ npm run build
7878
- `snapshot(browser, options)` - Capture page state
7979
- TypeScript types for type safety
8080

81+
### Content Reading & Screenshots
82+
- `read(browser, options)` - Read page content as text or markdown
83+
- Enhanced markdown conversion using `turndown` (better than extension's lightweight conversion)
84+
- Supports `enhance_markdown` option to use improved conversion
85+
- `screenshot(browser, options)` - Capture standalone screenshot
86+
- Returns base64-encoded data URL
87+
- Supports PNG and JPEG formats with quality control
88+
8189
### Day 4: Query Engine
8290
- `query(snapshot, selector)` - Find elements matching selector
8391
- `find(snapshot, selector)` - Find single best match
@@ -128,6 +136,50 @@ See `examples/` directory:
128136
- `wait-and-click.ts` - Wait and actions
129137
- `read-markdown.ts` - Reading page content and converting to markdown
130138

139+
### Content Reading Example
140+
141+
```typescript
142+
import { SentienceBrowser, read } from './src';
143+
144+
const browser = new SentienceBrowser();
145+
await browser.start();
146+
147+
await browser.getPage().goto('https://example.com');
148+
await browser.getPage().waitForLoadState('networkidle');
149+
150+
// Read as enhanced markdown (better quality)
151+
const result = await read(browser, {
152+
format: 'markdown',
153+
enhance_markdown: true
154+
});
155+
console.log(result.content); // High-quality markdown
156+
157+
await browser.close();
158+
```
159+
160+
### Screenshot Example
161+
162+
```typescript
163+
import { SentienceBrowser, screenshot } from './src';
164+
import { writeFileSync } from 'fs';
165+
166+
const browser = new SentienceBrowser();
167+
await browser.start();
168+
169+
await browser.getPage().goto('https://example.com');
170+
await browser.getPage().waitForLoadState('networkidle');
171+
172+
// Capture PNG screenshot
173+
const dataUrl = await screenshot(browser, { format: 'png' });
174+
175+
// Save to file
176+
const base64Data = dataUrl.split(',')[1];
177+
const imageData = Buffer.from(base64Data, 'base64');
178+
writeFileSync('screenshot.png', imageData);
179+
180+
await browser.close();
181+
```
182+
131183
## Testing
132184

133185
```bash

package-lock.json

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@
1818
},
1919
"dependencies": {
2020
"playwright": "^1.40.0",
21+
"turndown": "^7.2.2",
2122
"zod": "^3.22.0"
2223
},
2324
"devDependencies": {
2425
"@types/jest": "^29.5.14",
2526
"@types/node": "^20.0.0",
27+
"@types/turndown": "^5.0.3",
2628
"jest": "^29.0.0",
2729
"ts-jest": "^29.0.0",
2830
"ts-node": "^10.9.0",

src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ export { Inspector, inspect } from './inspector';
1212
export { Recorder, Trace, TraceStep, record } from './recorder';
1313
export { ScriptGenerator, generate } from './generator';
1414
export { read, ReadOptions, ReadResult } from './read';
15+
export { screenshot, ScreenshotOptions } from './screenshot';
1516
export * from './types';
1617

src/read.ts

Lines changed: 49 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import TurndownService from 'turndown';
77

88
export interface ReadOptions {
99
format?: 'raw' | 'text' | 'markdown';
10+
enhanceMarkdown?: boolean;
1011
}
1112

1213
export interface ReadResult {
@@ -46,69 +47,64 @@ export async function read(
4647
): Promise<ReadResult> {
4748
const page = browser.getPage();
4849
const format = options.format || 'raw'; // Default to 'raw' for Turndown compatibility
50+
const enhanceMarkdown = options.enhanceMarkdown !== false; // Default to true
4951

50-
// For markdown format, get raw HTML first, then convert with Turndown
51-
if (format === 'markdown') {
52-
// Get raw HTML from extension
53-
const rawResult = (await page.evaluate(
52+
if (format === 'markdown' && enhanceMarkdown) {
53+
// Get raw HTML from the extension first
54+
const rawHtmlResult = (await page.evaluate(
5455
(opts) => {
5556
return (window as any).sentience.read(opts);
5657
},
5758
{ format: 'raw' }
5859
)) as ReadResult;
5960

60-
if (rawResult.status !== 'success') {
61-
return rawResult;
62-
}
63-
64-
// Convert to markdown using Turndown
65-
try {
66-
const turndownService = new TurndownService({
67-
headingStyle: 'atx', // Use # for headings
68-
bulletListMarker: '-', // Use - for lists
69-
codeBlockStyle: 'fenced', // Use ``` for code blocks
70-
});
71-
72-
// Add custom rules for better conversion
73-
turndownService.addRule('strikethrough', {
74-
filter: ['del', 's', 'strike'] as any,
75-
replacement: (content: string) => `~~${content}~~`,
76-
});
61+
if (rawHtmlResult.status === 'success') {
62+
const htmlContent = rawHtmlResult.content;
63+
try {
64+
const turndownService = new TurndownService({
65+
headingStyle: 'atx',
66+
hr: '---',
67+
bulletListMarker: '-',
68+
codeBlockStyle: 'fenced',
69+
emDelimiter: '*',
70+
});
7771

78-
// Strip unwanted tags
79-
turndownService.remove(['script', 'style', 'nav', 'footer', 'header', 'noscript']);
72+
// Add custom rules for better markdown
73+
turndownService.addRule('strikethrough', {
74+
filter: (node) => ['s', 'del', 'strike'].includes(node.nodeName.toLowerCase()),
75+
replacement: function (content) {
76+
return '~~' + content + '~~';
77+
},
78+
});
8079

81-
const htmlContent = rawResult.content;
82-
const markdownContent = turndownService.turndown(htmlContent);
80+
// Optionally strip certain tags entirely
81+
turndownService.remove(['script', 'style', 'noscript', 'iframe'] as any);
8382

84-
// Return result with markdown content
85-
return {
86-
status: 'success',
87-
url: rawResult.url,
88-
format: 'markdown',
89-
content: markdownContent,
90-
length: markdownContent.length,
91-
};
92-
} catch (e) {
93-
// If conversion fails, return error
94-
return {
95-
status: 'error',
96-
url: rawResult.url,
97-
format: 'markdown',
98-
content: '',
99-
length: 0,
100-
error: `Markdown conversion failed: ${e}`,
101-
};
83+
const markdownContent = turndownService.turndown(htmlContent);
84+
return {
85+
status: 'success',
86+
url: rawHtmlResult.url,
87+
format: 'markdown',
88+
content: markdownContent,
89+
length: markdownContent.length,
90+
};
91+
} catch (e: any) {
92+
console.warn(`Turndown conversion failed: ${e.message}, falling back to extension's markdown.`);
93+
// Fallback to extension's markdown if Turndown fails
94+
}
95+
} else {
96+
console.warn(`Failed to get raw HTML from extension: ${rawHtmlResult.error}, falling back to extension's markdown.`);
97+
// Fallback to extension's markdown if getting raw HTML fails
10298
}
103-
} else {
104-
// For "raw" or "text", call extension directly
105-
const result = (await page.evaluate(
106-
(opts) => {
107-
return (window as any).sentience.read(opts);
108-
},
109-
{ format }
110-
)) as ReadResult;
111-
112-
return result;
11399
}
100+
101+
// If not enhanced markdown, or fallback, call extension with requested format
102+
const result = (await page.evaluate(
103+
(opts) => {
104+
return (window as any).sentience.read(opts);
105+
},
106+
{ format }
107+
)) as ReadResult;
108+
109+
return result;
114110
}

src/screenshot.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/**
2+
* Screenshot functionality - standalone screenshot capture
3+
*/
4+
5+
import { SentienceBrowser } from './browser';
6+
7+
export interface ScreenshotOptions {
8+
format?: 'png' | 'jpeg';
9+
quality?: number; // 1-100, only used for JPEG
10+
}
11+
12+
/**
13+
* Capture screenshot of current page
14+
*
15+
* @param browser - SentienceBrowser instance
16+
* @param options - Screenshot options
17+
* @returns Base64-encoded screenshot data URL (e.g., "data:image/png;base64,...")
18+
*/
19+
export async function screenshot(
20+
browser: SentienceBrowser,
21+
options: ScreenshotOptions = {}
22+
): Promise<string> {
23+
const page = browser.getPage();
24+
const format = options.format || 'png';
25+
const quality = options.quality;
26+
27+
if (format === 'jpeg' && quality !== undefined) {
28+
if (quality < 1 || quality > 100) {
29+
throw new Error('Quality must be between 1 and 100 for JPEG format');
30+
}
31+
}
32+
33+
// Use Playwright's screenshot with base64 encoding
34+
const screenshotOptions: any = {
35+
type: format,
36+
encoding: 'base64',
37+
};
38+
39+
if (format === 'jpeg' && quality !== undefined) {
40+
screenshotOptions.quality = quality;
41+
}
42+
43+
// Capture screenshot
44+
const base64Data = await page.screenshot(screenshotOptions);
45+
46+
// Return as data URL
47+
const mimeType = format === 'png' ? 'image/png' : 'image/jpeg';
48+
return `data:${mimeType};base64,${base64Data}`;
49+
}
50+

0 commit comments

Comments
 (0)