from reducto import AsyncReducto
from pathlib import Path
import asyncio
from tqdm.asyncio import tqdm
client = AsyncReducto()
MAX_CONCURRENCY = 1000
FILES_TO_PARSE = list(Path("docs").glob("*.pdf"))
async def main():
sem = asyncio.Semaphore(MAX_CONCURRENCY)
async def parse_document(path: Path):
async with sem:
upload = await client.upload(file=path)
result = await client.parse.run(document_url=upload)
output_path = path.with_suffix(".reducto.json")
output_path.write_text(result.model_dump_json())
await tqdm.gather(
*[parse_document(path) for path in FILES_TO_PARSE], desc="Parsing documents"
)
if __name__ == "__main__":
asyncio.run(main())