@@ -58,98 +58,76 @@ result = tool.invoke({
5858print (result)
5959```
6060
61- <details >
62- <summary >🔍 Using Output Schemas with SmartscraperTool</summary >
63-
64- You can define the structure of the output using Pydantic models:
61+ ### 🌐 SearchscraperTool
62+ Search and extract structured information from the web using natural language prompts.
6563
6664``` python
67- from typing import List
68- from pydantic import BaseModel, Field
69- from langchain_scrapegraph.tools import SmartscraperTool
65+ from langchain_scrapegraph.tools import SearchScraperTool
7066
71- class WebsiteInfo (BaseModel ):
72- title: str = Field(description = " The main title of the webpage" )
73- description: str = Field(description = " The main description or first paragraph" )
74- urls: List[str ] = Field(description = " The URLs inside the webpage" )
75-
76- # Initialize with schema
77- tool = SmartscraperTool(llm_output_schema = WebsiteInfo)
67+ # Initialize the tool (uses SGAI_API_KEY from environment)
68+ tool = SearchScraperTool()
7869
79- # The output will conform to the WebsiteInfo schema
70+ # Search and extract information using natural language
8071result = tool.invoke({
81- " website_url" : " https://www.example.com" ,
82- " user_prompt" : " Extract the website information"
72+ " user_prompt" : " What are the key features and pricing of ChatGPT Plus?"
8373})
8474
8575print (result)
8676# {
87- # "title": "Example Domain",
88- # "description": "This domain is for use in illustrative examples...",
89- # "urls": ["https://www.iana.org/domains/example"]
77+ # "product": {
78+ # "name": "ChatGPT Plus",
79+ # "description": "Premium version of ChatGPT..."
80+ # },
81+ # "features": [...],
82+ # "pricing": {...},
83+ # "reference_urls": [
84+ # "https://openai.com/chatgpt",
85+ # ...
86+ # ]
9087# }
9188```
92- </details >
93-
94- ### 💻 LocalscraperTool
95- Extract information from HTML content using AI.
96-
97- ``` python
98- from langchain_scrapegraph.tools import LocalscraperTool
99-
100- tool = LocalscraperTool()
101- result = tool.invoke({
102- " user_prompt" : " Extract all contact information" ,
103- " website_html" : " <html>...</html>"
104- })
105-
106- print (result)
107- ```
10889
10990<details >
110- <summary >🔍 Using Output Schemas with LocalscraperTool </summary >
91+ <summary >🔍 Using Output Schemas with SearchscraperTool </summary >
11192
11293You can define the structure of the output using Pydantic models:
11394
11495``` python
115- from typing import Optional
96+ from typing import List, Dict
11697from pydantic import BaseModel, Field
117- from langchain_scrapegraph.tools import LocalscraperTool
98+ from langchain_scrapegraph.tools import SearchScraperTool
11899
119- class CompanyInfo (BaseModel ):
120- name: str = Field(description = " The company name" )
121- description: str = Field(description = " The company description " )
122- email: Optional [str ] = Field(description = " Contact email if available " )
123- phone: Optional [str ] = Field(description = " Contact phone if available " )
100+ class ProductInfo (BaseModel ):
101+ name: str = Field(description = " Product name" )
102+ features: List[ str ] = Field(description = " List of product features " )
103+ pricing: Dict [str , Any ] = Field(description = " Pricing information " )
104+ reference_urls: List [str ] = Field(description = " Source URLs for the information " )
124105
125106# Initialize with schema
126- tool = LocalscraperTool(llm_output_schema = CompanyInfo)
127-
128- html_content = """
129- <html>
130- <body>
131- <h1>TechCorp Solutions</h1>
132- <p>We are a leading AI technology company.</p>
133- <div class="contact">
134- <p>Email: contact@techcorp.com</p>
135- <p>Phone: (555) 123-4567</p>
136- </div>
137- </body>
138- </html>
139- """
140-
141- # The output will conform to the CompanyInfo schema
107+ tool = SearchScraperTool(llm_output_schema = ProductInfo)
108+
109+ # The output will conform to the ProductInfo schema
142110result = tool.invoke({
143- " website_html" : html_content,
144- " user_prompt" : " Extract the company information"
111+ " user_prompt" : " What are the key features and pricing of ChatGPT Plus?"
145112})
146113
147114print (result)
148115# {
149- # "name": "TechCorp Solutions",
150- # "description": "We are a leading AI technology company.",
151- # "email": "contact@techcorp.com",
152- # "phone": "(555) 123-4567"
116+ # "name": "ChatGPT Plus",
117+ # "features": [
118+ # "GPT-4 access",
119+ # "Faster response speed",
120+ # ...
121+ # ],
122+ # "pricing": {
123+ # "amount": 20,
124+ # "currency": "USD",
125+ # "period": "monthly"
126+ # },
127+ # "reference_urls": [
128+ # "https://openai.com/chatgpt",
129+ # ...
130+ # ]
153131# }
154132```
155133</details >
0 commit comments