Sometimes you have a huge number of documents to parse. Reducto will autoscale to handle even the largest of jobs (millions of documents) with ease.

Ensure you have our SDK installed first (see quickstart).

from reducto import AsyncReducto
from pathlib import Path
import asyncio
from tqdm.asyncio import tqdm

client = AsyncReducto()

MAX_CONCURRENCY = 1000
FILES_TO_PARSE = list(Path("docs").glob("*.pdf"))


async def main():
    sem = asyncio.Semaphore(MAX_CONCURRENCY)

    async def parse_document(path: Path):
        async with sem:
            upload = await client.upload(file=path)
            result = await client.parse.run(document_url=upload)
            output_path = path.with_suffix(".reducto.json")
            output_path.write_text(result.model_dump_json())

    await tqdm.gather(
        *[parse_document(path) for path in FILES_TO_PARSE], desc="Parsing documents"
    )


if __name__ == "__main__":
    asyncio.run(main())