Split
import requests
url = "https://platform.reducto.ai/split"
payload = {
"split_description": [
{
"name": "<string>",
"description": "<string>",
"partition_key": "<string>"
}
],
"document_url": "<string>",
"options": {
"ocr_mode": "standard",
"extraction_mode": "ocr",
"chunking": {
"chunk_mode": "variable",
"chunk_overlap": 0
},
"table_summary": { "enabled": False },
"figure_summary": {
"advanced_chart_agent": False,
"enabled": False,
"enhanced": False,
"override": False
},
"filter_blocks": [],
"force_url_result": False
},
"advanced_options": {
"ocr_system": "highres",
"table_output_format": "html",
"merge_tables": False,
"include_formula_information": False,
"include_color_information": False,
"include_dropdown_information": False,
"continue_hierarchy": True,
"keep_line_breaks": False,
"page_range": {},
"large_table_chunking": {
"enabled": True,
"size": 50
},
"spreadsheet_table_clustering": "default",
"add_page_markers": False,
"remove_text_formatting": False,
"return_ocr_data": False,
"filter_line_numbers": False,
"read_comments": False,
"persist_results": False,
"exclude_hidden_sheets": False,
"exclude_hidden_rows_cols": False,
"enable_change_tracking": False,
"enable_highlight_detection": False,
"ignore_watermarks": False,
"track_offsets": False,
"track_word_offsets": False,
"track_line_offsets": False
},
"experimental_options": {
"use_fast_inference": False,
"use_gpu_ocr": False,
"max_batch_size": 10,
"num_ocr_crops": 2,
"timeout": 1800,
"extra_metadata": {},
"summarize_all_figures": False,
"overrides": {},
"enrich": {
"enabled": False,
"mode": "standard"
},
"layout_enrichment": False,
"native_docx_parsing": False,
"native_office_conversion": False,
"disable_office_external_links": True,
"enable_checkboxes": False,
"enable_equations": False,
"rotate_pages": False,
"rotate_figures": False,
"enable_scripts": False,
"return_figure_images": False,
"return_table_images": False,
"return_page_images": False,
"layout_model": "default",
"embed_text_metadata_pdf": False,
"embed_pdf_metadata_dpi": 100,
"fast_embed_pdf_metadata": True,
"detect_signatures": False,
"agentic_layout": False,
"danger_filter_wide_boxes": False,
"chunk_table_blocks": False,
"use_streaq_parse": False,
"use_streaq_cpu": False,
"use_single_batch": False,
"use_reducto_lite": False,
"use_local_ocr_pipeline": False,
"use_gemini_experimental_model": False,
"latency_sensitive": False,
"promptable_agentic_text_on_regular_blocks": False,
"deprio_kv_regions": False,
"postprocess_v2": False,
"fast_agentic_tables": True,
"auto_formatting": False,
"shadow_tag": "",
"shadow_original_job_id": "",
"shadow_original_org_id": "",
"shadow_original_org_name": "",
"shadow_affected_domains": [],
"force_save_layout_intermediates": False
},
"split_rules": "Split the document into the applicable sections. Sections may only overlap at their first and last page if at all.",
"priority": False,
"deep_split": False,
"include_confidence": False,
"split_options": {
"table_cutoff": "truncate",
"allow_page_overlap": True
}
}
headers = {
"Authorization": "Bearer <token>",
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text){
"usage": {
"num_pages": 123,
"credits": 123,
"credit_breakdown": {},
"page_billing_breakdown": {}
},
"result": {
"section_mapping": {},
"splits": [
{
"name": "<string>",
"pages": [
123
],
"conf": "low",
"partitions": [
{
"name": "<string>",
"pages": [
123
],
"conf": "low"
}
]
}
]
},
"response_type": "split"
}Document Processing
Split
POST
/
split
Split
import requests
url = "https://platform.reducto.ai/split"
payload = {
"split_description": [
{
"name": "<string>",
"description": "<string>",
"partition_key": "<string>"
}
],
"document_url": "<string>",
"options": {
"ocr_mode": "standard",
"extraction_mode": "ocr",
"chunking": {
"chunk_mode": "variable",
"chunk_overlap": 0
},
"table_summary": { "enabled": False },
"figure_summary": {
"advanced_chart_agent": False,
"enabled": False,
"enhanced": False,
"override": False
},
"filter_blocks": [],
"force_url_result": False
},
"advanced_options": {
"ocr_system": "highres",
"table_output_format": "html",
"merge_tables": False,
"include_formula_information": False,
"include_color_information": False,
"include_dropdown_information": False,
"continue_hierarchy": True,
"keep_line_breaks": False,
"page_range": {},
"large_table_chunking": {
"enabled": True,
"size": 50
},
"spreadsheet_table_clustering": "default",
"add_page_markers": False,
"remove_text_formatting": False,
"return_ocr_data": False,
"filter_line_numbers": False,
"read_comments": False,
"persist_results": False,
"exclude_hidden_sheets": False,
"exclude_hidden_rows_cols": False,
"enable_change_tracking": False,
"enable_highlight_detection": False,
"ignore_watermarks": False,
"track_offsets": False,
"track_word_offsets": False,
"track_line_offsets": False
},
"experimental_options": {
"use_fast_inference": False,
"use_gpu_ocr": False,
"max_batch_size": 10,
"num_ocr_crops": 2,
"timeout": 1800,
"extra_metadata": {},
"summarize_all_figures": False,
"overrides": {},
"enrich": {
"enabled": False,
"mode": "standard"
},
"layout_enrichment": False,
"native_docx_parsing": False,
"native_office_conversion": False,
"disable_office_external_links": True,
"enable_checkboxes": False,
"enable_equations": False,
"rotate_pages": False,
"rotate_figures": False,
"enable_scripts": False,
"return_figure_images": False,
"return_table_images": False,
"return_page_images": False,
"layout_model": "default",
"embed_text_metadata_pdf": False,
"embed_pdf_metadata_dpi": 100,
"fast_embed_pdf_metadata": True,
"detect_signatures": False,
"agentic_layout": False,
"danger_filter_wide_boxes": False,
"chunk_table_blocks": False,
"use_streaq_parse": False,
"use_streaq_cpu": False,
"use_single_batch": False,
"use_reducto_lite": False,
"use_local_ocr_pipeline": False,
"use_gemini_experimental_model": False,
"latency_sensitive": False,
"promptable_agentic_text_on_regular_blocks": False,
"deprio_kv_regions": False,
"postprocess_v2": False,
"fast_agentic_tables": True,
"auto_formatting": False,
"shadow_tag": "",
"shadow_original_job_id": "",
"shadow_original_org_id": "",
"shadow_original_org_name": "",
"shadow_affected_domains": [],
"force_save_layout_intermediates": False
},
"split_rules": "Split the document into the applicable sections. Sections may only overlap at their first and last page if at all.",
"priority": False,
"deep_split": False,
"include_confidence": False,
"split_options": {
"table_cutoff": "truncate",
"allow_page_overlap": True
}
}
headers = {
"Authorization": "Bearer <token>",
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text){
"usage": {
"num_pages": 123,
"credits": 123,
"credit_breakdown": {},
"page_billing_breakdown": {}
},
"result": {
"section_mapping": {},
"splits": [
{
"name": "<string>",
"pages": [
123
],
"conf": "low",
"partitions": [
{
"name": "<string>",
"pages": [
123
],
"conf": "low"
}
]
}
]
},
"response_type": "split"
}Authorizations
Bearer authentication header of the form Bearer <token>, where <token> is your auth token.
Headers
Body
application/json
The configuration options for processing the document.
Show child attributes
Show child attributes
The URL of the document to be processed. You can provide one of the following:
- A publicly available URL
- A presigned S3 URL
- A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document
- A job_id (jobid://) or a list of job_ids (jobid://) obtained from a previous /parse endpoint
Show child attributes
Show child attributes
Show child attributes
Show child attributes
Show child attributes
Show child attributes
split_rules
string
default:Split the document into the applicable sections. Sections may only overlap at their first and last page if at all.
The prompt that describes rules for splitting the document.
If True, attempts to process the job with priority if the user has priority processing budget available; by default, sync jobs are prioritized above async jobs.
Show child attributes
Show child attributes
Was this page helpful?
⌘I