Python SDK
Official Python client for Refyne
The official Python SDK for Refyne with async support.
Installation
pip install refyne
# or
poetry add refyneQuick Start
The Refyne Python SDK is fully async. All methods must be called with await:
import asyncio
import os
from refyne import Refyne
async def main():
client = Refyne(api_key=os.environ["REFYNE_API_KEY"])
result = await client.extract(
url="https://example.com/product",
schema={
"name": "string",
"price": "number",
"description": "string",
}
)
print(result.data)
# {"name": "Product Name", "price": 29.99, "description": "..."}
await client.close()
asyncio.run(main())Context Manager
Use the async context manager for automatic cleanup:
async def main():
async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
result = await client.extract(
url="https://example.com/product",
schema={"name": "string", "price": "number"}
)
print(result.data)
asyncio.run(main())Site Analysis
Analyze a website to discover structure and suggested schemas:
async def main():
async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
analysis = await client.analyze(
url="https://example.com/products",
depth=1,
)
print(analysis.suggested_schema)
# {"name": "string", "price": "number", "description": "string"}
print(analysis.follow_patterns)
# ["a.product-link", ".pagination a", "a[href*='/product/']"]
# Use the analysis results for a crawl
job = await client.crawl(
url="https://example.com/products",
schema=analysis.suggested_schema,
options={
"follow_selector": ", ".join(analysis.follow_patterns),
"max_pages": 50,
}
)Crawling
async def main():
async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
# Start a crawl job
job = await client.crawl(
url="https://example.com/products",
schema={"name": "string", "price": "number"},
options={
"follow_selector": "a.product-link",
"max_pages": 20,
}
)
print(f"Job started: {job.job_id}")
# Poll for completion
import asyncio
while True:
status = await client.jobs.get(job.job_id)
if status.status.value == "completed":
break
if status.status.value == "failed":
raise Exception(f"Job failed: {status.error_message}")
await asyncio.sleep(2)
# Get results
results = await client.jobs.get_results(job.job_id)
# Get merged results
merged = await client.jobs.get_results_merged(job.job_id)
asyncio.run(main())Error Handling
from refyne import RefyneError, RateLimitError, ValidationError
import asyncio
async def main():
async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
try:
result = await client.extract(...)
except RateLimitError as e:
print(f"Rate limited. Retry after {e.retry_after} seconds")
await asyncio.sleep(e.retry_after)
except ValidationError as e:
print(f"Validation error: {e.message}")
except RefyneError as e:
print(f"API error: {e.message} (status {e.status})")Configuration
client = Refyne(
api_key=os.environ["REFYNE_API_KEY"],
base_url="https://api.refyne.uk", # Optional
timeout=30.0, # Optional, in seconds
max_retries=3, # Optional
cache_enabled=True, # Optional
verify_ssl=True, # Optional
)Sub-Clients
The client provides sub-clients for different operations:
# Jobs
jobs = await client.jobs.list()
job = await client.jobs.get("job-id")
results = await client.jobs.get_results("job-id")
# Schemas
schemas = await client.schemas.list()
schema = await client.schemas.get("schema-id")
schema = await client.schemas.create(name="My Schema", schema_yaml="...")
# Sites
sites = await client.sites.list()
# API Keys
keys = await client.keys.list()
# LLM Configuration
providers = await client.llm.list_providers()
models = await client.llm.list_models("openai")API Reference
For detailed documentation, see refyne.readthedocs.io.