Python SDK

Official Python client for Refyne

The official Python SDK for Refyne with async support.

Installation

pip install refyne
# or
poetry add refyne

Quick Start

The Refyne Python SDK is fully async. All methods must be called with await:

import asyncio
import os
from refyne import Refyne

async def main():
    client = Refyne(api_key=os.environ["REFYNE_API_KEY"])

    result = await client.extract(
        url="https://example.com/product",
        schema={
            "name": "string",
            "price": "number",
            "description": "string",
        }
    )

    print(result.data)
    # {"name": "Product Name", "price": 29.99, "description": "..."}

    await client.close()

asyncio.run(main())

Context Manager

Use the async context manager for automatic cleanup:

async def main():
    async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
        result = await client.extract(
            url="https://example.com/product",
            schema={"name": "string", "price": "number"}
        )
        print(result.data)

asyncio.run(main())

Site Analysis

Analyze a website to discover structure and suggested schemas:

async def main():
    async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
        analysis = await client.analyze(
            url="https://example.com/products",
            depth=1,
        )

        print(analysis.suggested_schema)
        # {"name": "string", "price": "number", "description": "string"}

        print(analysis.follow_patterns)
        # ["a.product-link", ".pagination a", "a[href*='/product/']"]

        # Use the analysis results for a crawl
        job = await client.crawl(
            url="https://example.com/products",
            schema=analysis.suggested_schema,
            options={
                "follow_selector": ", ".join(analysis.follow_patterns),
                "max_pages": 50,
            }
        )

Crawling

async def main():
    async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
        # Start a crawl job
        job = await client.crawl(
            url="https://example.com/products",
            schema={"name": "string", "price": "number"},
            options={
                "follow_selector": "a.product-link",
                "max_pages": 20,
            }
        )

        print(f"Job started: {job.job_id}")

        # Poll for completion
        import asyncio
        while True:
            status = await client.jobs.get(job.job_id)
            if status.status.value == "completed":
                break
            if status.status.value == "failed":
                raise Exception(f"Job failed: {status.error_message}")
            await asyncio.sleep(2)

        # Get results
        results = await client.jobs.get_results(job.job_id)

        # Get merged results
        merged = await client.jobs.get_results_merged(job.job_id)

asyncio.run(main())

Error Handling

from refyne import RefyneError, RateLimitError, ValidationError
import asyncio

async def main():
    async with Refyne(api_key=os.environ["REFYNE_API_KEY"]) as client:
        try:
            result = await client.extract(...)
        except RateLimitError as e:
            print(f"Rate limited. Retry after {e.retry_after} seconds")
            await asyncio.sleep(e.retry_after)
        except ValidationError as e:
            print(f"Validation error: {e.message}")
        except RefyneError as e:
            print(f"API error: {e.message} (status {e.status})")

Configuration

client = Refyne(
    api_key=os.environ["REFYNE_API_KEY"],
    base_url="https://api.refyne.uk",  # Optional
    timeout=30.0,                       # Optional, in seconds
    max_retries=3,                      # Optional
    cache_enabled=True,                 # Optional
    verify_ssl=True,                    # Optional
)

Sub-Clients

The client provides sub-clients for different operations:

# Jobs
jobs = await client.jobs.list()
job = await client.jobs.get("job-id")
results = await client.jobs.get_results("job-id")

# Schemas
schemas = await client.schemas.list()
schema = await client.schemas.get("schema-id")
schema = await client.schemas.create(name="My Schema", schema_yaml="...")

# Sites
sites = await client.sites.list()

# API Keys
keys = await client.keys.list()

# LLM Configuration
providers = await client.llm.list_providers()
models = await client.llm.list_models("openai")

API Reference

For detailed documentation, see refyne.readthedocs.io.