> ## Documentation Index
> Fetch the complete documentation index at: https://docs.reducto.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Pipeline



## OpenAPI

````yaml /openapi.json post /pipeline
openapi: 3.1.0
info:
  title: Reducto API
  version: dev
servers:
  - url: https://platform.reducto.ai
security: []
paths:
  /pipeline:
    post:
      summary: Pipeline
      operationId: pipeline_pipeline_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/V3PipelineConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PipelineResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      security:
        - SkippableHTTPBearer: []
components:
  schemas:
    V3PipelineConfig:
      properties:
        input:
          anyOf:
            - type: string
            - items:
                type: string
              type: array
            - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: >-
            For parse/split/extract pipelines, the URL of the document to be
            processed. You can provide one of the following:
                        1. A publicly available URL
                        2. A presigned S3 URL
                        3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document
                        4. A jobid:// prefixed URL obtained from a previous /parse invocation
                        5. A list of URLs (for multi-document pipelines, V3 API only)

                        For edit pipelines, this should be a string containing the edit instructions 
        pipeline_id:
          type: string
          title: Pipeline Id
          description: The ID of the pipeline to use for the document.
        settings:
          $ref: '#/components/schemas/PipelineSettings'
          default: {}
      type: object
      required:
        - input
        - pipeline_id
      title: V3PipelineConfig
    PipelineResponse:
      properties:
        job_id:
          type: string
          title: Job Id
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          $ref: '#/components/schemas/PipelineResult'
      type: object
      required:
        - job_id
        - usage
        - result
      title: PipelineResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    UploadResponse:
      properties:
        file_id:
          type: string
          title: File Id
        presigned_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Presigned Url
      type: object
      required:
        - file_id
      title: UploadResponse
    PipelineSettings:
      properties:
        document_password:
          anyOf:
            - type: string
            - type: 'null'
          title: Document Password
          description: Password to decrypt password-protected documents.
      additionalProperties: false
      type: object
      title: PipelineSettings
      description: Settings for pipeline execution that override pipeline defaults.
    ParseUsage:
      properties:
        num_pages:
          type: integer
          title: Num Pages
        credits:
          anyOf:
            - type: number
            - type: 'null'
          title: Credits
        credit_breakdown:
          anyOf:
            - additionalProperties:
                type: number
              type: object
            - type: 'null'
          title: Credit Breakdown
      type: object
      required:
        - num_pages
      title: ParseUsage
    PipelineResult:
      properties:
        parse:
          anyOf:
            - $ref: '#/components/schemas/ParseResponse'
            - items:
                $ref: '#/components/schemas/ParseResponse'
              type: array
            - type: 'null'
          title: Parse
        extract:
          anyOf:
            - items:
                $ref: '#/components/schemas/ExtractSplitResponse'
              type: array
            - $ref: '#/components/schemas/ExtractResponse'
            - $ref: '#/components/schemas/V3ExtractResponse'
            - type: 'null'
          title: Extract
        split:
          anyOf:
            - $ref: '#/components/schemas/SplitResponse'
            - type: 'null'
        edit:
          anyOf:
            - $ref: '#/components/schemas/EditResponse'
            - type: 'null'
      type: object
      required:
        - parse
        - extract
        - split
      title: PipelineResult
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
        input:
          title: Input
        ctx:
          type: object
          title: Context
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    ParseResponse:
      properties:
        job_id:
          type: string
          title: Job Id
        duration:
          type: number
          title: Duration
          description: The duration of the parse request in seconds.
        pdf_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Pdf Url
          description: The storage URL of the converted PDF file.
        studio_link:
          anyOf:
            - type: string
            - type: 'null'
          title: Studio Link
          description: The link to the studio pipeline for the document.
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          anyOf:
            - $ref: '#/components/schemas/FullResult'
            - $ref: '#/components/schemas/UrlResult'
          title: Result
          description: >-
            The response from the document processing service. Note that there
            can be two types of responses, Full Result and URL Result. This is
            due to limitations on the max return size on HTTPS. If the response
            is too large, it will be returned as a presigned URL in the URL
            response. You should handle this in your application.
      type: object
      required:
        - job_id
        - duration
        - usage
        - result
      title: ParseResponse
    ExtractSplitResponse:
      properties:
        split_name:
          type: string
          title: Split Name
        page_range:
          items:
            type: integer
          type: array
          title: Page Range
        partition:
          anyOf:
            - type: string
            - type: 'null'
          title: Partition
        result:
          anyOf:
            - $ref: '#/components/schemas/ExtractResponse'
            - $ref: '#/components/schemas/V3ExtractResponse'
          title: Result
      type: object
      required:
        - split_name
        - page_range
        - result
      title: ExtractSplitResponse
      description: This is the response format for Extract -> Split Pipelines
    ExtractResponse:
      properties:
        job_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Job Id
        usage:
          $ref: '#/components/schemas/ExtractUsage'
        studio_link:
          anyOf:
            - type: string
            - type: 'null'
          title: Studio Link
          description: The link to the studio pipeline for the document.
        result:
          items: {}
          type: array
          title: Result
          description: >-
            The extracted response in your provided schema. This is a list of
            dictionaries. If disable_chunking is True (default), then it will be
            a list of length one.
        citations:
          anyOf:
            - items: {}
              type: array
            - type: 'null'
          title: Citations
          description: The citations corresponding to the extracted response.
      type: object
      required:
        - usage
        - result
        - citations
      title: ExtractResponse
    V3ExtractResponse:
      properties:
        job_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Job Id
        usage:
          $ref: '#/components/schemas/ExtractUsage'
        studio_link:
          anyOf:
            - type: string
            - type: 'null'
          title: Studio Link
          description: The link to the studio pipeline for the document.
        result:
          anyOf:
            - {}
            - items: {}
              type: array
          title: Result
          description: >-
            The extracted response in your provided schema. This is a list of
            dictionaries. If disable_chunking is True (default), then it will be
            a list of length one.
      type: object
      required:
        - usage
        - result
      title: V3ExtractResponse
    SplitResponse:
      properties:
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          anyOf:
            - $ref: '#/components/schemas/SplitResult'
            - $ref: '#/components/schemas/DeepSplitResult'
          title: Result
          description: The split result.
      type: object
      required:
        - usage
        - result
      title: SplitResponse
    EditResponse:
      properties:
        document_url:
          type: string
          title: Document Url
          description: Presigned URL to download the edited document.
        form_schema:
          anyOf:
            - items:
                $ref: '#/components/schemas/EditWidget'
              type: array
            - type: 'null'
          title: Form Schema
          description: >-
            Form schema for PDF forms. List of widgets with their types,
            descriptions, and bounding boxes.
        usage:
          anyOf:
            - $ref: '#/components/schemas/ParseUsage'
            - type: 'null'
          description: >-
            Usage information for the edit operation, including number of pages
            and credits charged.
      type: object
      required:
        - document_url
      title: EditResponse
    FullResult:
      properties:
        type:
          type: string
          const: full
          title: Type
          description: type = 'full'
        chunks:
          items:
            $ref: '#/components/schemas/ParseChunk'
          type: array
          title: Chunks
        ocr:
          anyOf:
            - $ref: '#/components/schemas/OCRResult'
            - type: 'null'
        custom:
          anyOf:
            - {}
            - type: 'null'
          title: Custom
      type: object
      required:
        - type
        - chunks
      title: FullResult
    UrlResult:
      properties:
        type:
          type: string
          const: url
          title: Type
          description: type = 'url'
        url:
          type: string
          title: Url
        result_id:
          type: string
          title: Result Id
      type: object
      required:
        - type
        - url
        - result_id
      title: UrlResult
    ExtractUsage:
      properties:
        num_pages:
          type: integer
          title: Num Pages
        num_fields:
          type: integer
          title: Num Fields
        credits:
          anyOf:
            - type: number
            - type: 'null'
          title: Credits
        extract_mode:
          anyOf:
            - type: string
              enum:
                - super_agent
                - extract
                - spreadsheet_agent
            - type: 'null'
          title: Extract Mode
      type: object
      required:
        - num_pages
        - num_fields
      title: ExtractUsage
    SplitResult:
      properties:
        section_mapping:
          anyOf:
            - additionalProperties:
                items:
                  type: integer
                type: array
              type: object
            - type: 'null'
          title: Section Mapping
        splits:
          items:
            $ref: '#/components/schemas/Split'
          type: array
          title: Splits
      type: object
      required:
        - section_mapping
        - splits
      title: SplitResult
    DeepSplitResult:
      properties:
        splits:
          items:
            $ref: '#/components/schemas/DeepSplit'
          type: array
          title: Splits
      type: object
      required:
        - splits
      title: DeepSplitResult
    EditWidget:
      properties:
        bbox:
          $ref: '#/components/schemas/BoundingBox'
          description: Bounding box coordinates of the widget
        description:
          type: string
          title: Description
          description: Description of the widget extracted from the document
        type:
          type: string
          enum:
            - text
            - checkbox
            - radio
            - dropdown
            - barcode
          title: Type
          description: Type of the form widget
        fill:
          type: boolean
          title: Fill
          description: >-
            If True (default), the system will attempt to fill this widget. If
            False, the widget will be created but intentionally left unfilled.
          default: true
        value:
          anyOf:
            - type: string
            - type: 'null'
          title: Value
          description: >-
            If provided, this value will be used directly instead of attempting
            to intelligently determine the field value.
        font_size:
          anyOf:
            - type: number
              maximum: 72
              minimum: 1
            - type: 'null'
          title: Font Size
          description: >-
            Font size in points for this specific field. Takes priority over the
            global font_size in EditOptions. If not set, falls back to the
            global font_size, then to auto-calculated sizing.
      type: object
      required:
        - bbox
        - description
        - type
      title: EditWidget
    ParseChunk:
      properties:
        content:
          type: string
          title: Content
          description: The content of the chunk extracted from the document.
        embed:
          type: string
          title: Embed
          description: Chunk content optimized for embedding and retrieval.
        enriched:
          anyOf:
            - type: string
            - type: 'null'
          title: Enriched
          description: The enriched content of the chunk extracted from the document.
        enrichment_success:
          type: boolean
          title: Enrichment Success
          description: Whether the enrichment was successful.
          default: false
        blocks:
          items:
            $ref: '#/components/schemas/ParseBlock'
          type: array
          title: Blocks
      type: object
      required:
        - content
        - embed
        - enriched
        - blocks
      title: ParseChunk
    OCRResult:
      properties:
        words:
          items:
            $ref: '#/components/schemas/OCRWord'
          type: array
          title: Words
        lines:
          items:
            $ref: '#/components/schemas/OCRLine'
          type: array
          title: Lines
      type: object
      required:
        - words
        - lines
      title: OCRResult
    Split:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            type: integer
          type: array
          title: Pages
        conf:
          type: string
          enum:
            - high
            - low
          title: Conf
          default: low
        partitions:
          anyOf:
            - items:
                $ref: '#/components/schemas/SplitPartition'
              type: array
            - type: 'null'
          title: Partitions
      type: object
      required:
        - name
        - pages
      title: Split
    DeepSplit:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            $ref: '#/components/schemas/DeepSplitPageEvidence'
          type: array
          title: Pages
        partitions:
          anyOf:
            - items:
                $ref: '#/components/schemas/DeepSplitPartition'
              type: array
            - type: 'null'
          title: Partitions
      type: object
      required:
        - name
        - pages
      title: DeepSplit
    BoundingBox:
      properties:
        left:
          type: number
          title: Left
        top:
          type: number
          title: Top
        width:
          type: number
          title: Width
        height:
          type: number
          title: Height
        page:
          type: integer
          title: Page
          description: The page number of the bounding box (1-indexed).
        original_page:
          type: integer
          title: Original Page
          description: >-
            The page number in the original document of the bounding box
            (1-indexed).
      type: object
      required:
        - left
        - top
        - width
        - height
        - page
      title: BoundingBox
    ParseBlock:
      properties:
        type:
          type: string
          enum:
            - Header
            - Footer
            - Title
            - Section Header
            - Page Number
            - List Item
            - Figure
            - Table
            - Key Value
            - Text
            - Comment
            - Signature
          title: Type
          description: The type of block extracted from the document.
        bbox:
          $ref: '#/components/schemas/BoundingBox'
          description: The bounding box of the block extracted from the document.
        content:
          type: string
          title: Content
          description: The content of the block extracted from the document.
        image_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Image Url
          description: (Experimental) The URL of the image associated with the block.
        chart_data:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Chart Data
          description: >-
            (Experimental) The URL/link to chart data JSON for figure blocks
            processed by chart agent.
        confidence:
          anyOf:
            - type: string
            - type: 'null'
          title: Confidence
          description: >-
            The confidence for the block. It is either low or high and takes
            into account factors like OCR and table structure
          default: low
        granular_confidence:
          anyOf:
            - $ref: '#/components/schemas/GranularConfidence'
            - type: 'null'
          description: >-
            Granular confidence scores for the block. It is a dictionary of
            confidence scores for the block. The confidence scores will not be
            None if the user has enabled numeric confidence scores.
        extra:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Extra
          description: >-
            Extra metadata fields for the block. Fields like 'is_chart' will
            only appear when set to True.
      type: object
      required:
        - type
        - bbox
        - content
      title: ParseBlock
    OCRWord:
      properties:
        text:
          type: string
          title: Text
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          anyOf:
            - type: number
            - type: 'null'
          title: Confidence
          description: >-
            OCR confidence score between 0 and 1, where 1 indicates highest
            confidence
        chunk_index:
          anyOf:
            - type: integer
            - type: 'null'
          title: Chunk Index
          description: The index of the chunk that the word belongs to.
        rotation:
          anyOf:
            - type: integer
            - type: 'null'
          title: Rotation
          description: The rotation angle in degrees, from 0 to 360, counterclockwise.
      type: object
      required:
        - text
        - bbox
      title: OCRWord
    OCRLine:
      properties:
        text:
          type: string
          title: Text
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          anyOf:
            - type: number
            - type: 'null'
          title: Confidence
          description: >-
            OCR confidence score between 0 and 1, where 1 indicates highest
            confidence
        chunk_index:
          anyOf:
            - type: integer
            - type: 'null'
          title: Chunk Index
          description: The index of the chunk that the line belongs to.
        rotation:
          anyOf:
            - type: integer
            - type: 'null'
          title: Rotation
          description: The rotation angle in degrees, from 0 to 360, counterclockwise.
      type: object
      required:
        - text
        - bbox
      title: OCRLine
    SplitPartition:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            type: integer
          type: array
          title: Pages
        conf:
          type: string
          enum:
            - high
            - low
          title: Conf
          default: low
      type: object
      required:
        - name
        - pages
      title: SplitPartition
    DeepSplitPageEvidence:
      properties:
        page_number:
          type: integer
          title: Page Number
        evidence:
          type: string
          title: Evidence
        confidence:
          anyOf:
            - type: string
              enum:
                - high
                - medium
                - low
            - type: 'null'
          title: Confidence
      type: object
      required:
        - page_number
        - evidence
      title: DeepSplitPageEvidence
    DeepSplitPartition:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            $ref: '#/components/schemas/DeepSplitPageEvidence'
          type: array
          title: Pages
      type: object
      required:
        - name
        - pages
      title: DeepSplitPartition
    GranularConfidence:
      properties:
        extract_confidence:
          anyOf:
            - type: number
            - type: 'null'
          title: Extract Confidence
        parse_confidence:
          anyOf:
            - type: number
            - type: 'null'
          title: Parse Confidence
      type: object
      title: GranularConfidence
  securitySchemes:
    SkippableHTTPBearer:
      type: http
      scheme: bearer

````