Skip to main content
The Extract.Run() method pulls specific fields from documents as structured JSON. You define a JSON schema with the fields you need, and Extract returns values matching that schema.

Basic Usage

package main

import (
    "context"
    "fmt"
    "io"
    "os"
    
    reducto "github.com/reductoai/reducto-go-sdk"
    "github.com/reductoai/reducto-go-sdk/option"
    "github.com/reductoai/reducto-go-sdk/shared"
)

func main() {
    client := reducto.NewClient(option.WithAPIKey(os.Getenv("REDUCTO_API_KEY")))
    
    // Upload
    file, _ := os.Open("invoice.pdf")
    defer file.Close()
    
    upload, _ := client.Upload(context.Background(), reducto.UploadParams{
        File: reducto.F[io.Reader](file),
    })
    
    // Define schema
    schema := map[string]interface{}{
        "type": "object",
        "properties": map[string]interface{}{
            "invoice_number": map[string]interface{}{
                "type":        "string",
                "description": "The invoice number, typically at the top",
            },
            "total": map[string]interface{}{
                "type":        "number",
                "description": "The total amount due",
            },
        },
    }
    
    // Extract
    result, err := client.Extract.Run(context.Background(), reducto.ExtractRunParams{
        ExtractConfig: reducto.ExtractConfigParam{
            DocumentURL: reducto.F[reducto.ExtractConfigDocumentURLUnionParam](
                shared.UnionString(upload.FileID),
            ),
            Schema: reducto.F[interface{}](schema),
        },
    })
    if err != nil {
        fmt.Printf("Extract error: %v\n", err)
        return
    }
    
    // Access extracted values
    // Result is []interface{} where each element is a map[string]interface{}
    if len(result.Result) > 0 {
        extracted := result.Result[0].(map[string]interface{})
        fmt.Printf("Invoice Number: %v\n", extracted["invoice_number"])
        fmt.Printf("Total: %v\n", extracted["total"])
    }
    
    // Usage information
    fmt.Printf("Pages processed: %d\n", result.Usage.NumPages)
    fmt.Printf("Fields extracted: %d\n", result.Usage.NumFields)
}

Method Signatures

Synchronous Extract

func (s *ExtractService) Run(
    ctx context.Context,
    body ExtractRunParams,
    opts ...option.RequestOption,
) (*shared.ExtractResponse, error)

Asynchronous Extract

func (s *ExtractService) RunJob(
    ctx context.Context,
    body ExtractRunJobParams,
    opts ...option.RequestOption,
) (*ExtractRunJobResponse, error)
The RunJob method returns a JobID that you can use with client.Job.Get() to retrieve results.

Array Extraction

For documents with repeating data (line items, transactions), enable array extraction:
schema := map[string]interface{}{
    "type": "object",
    "properties": map[string]interface{}{
        "line_items": map[string]interface{}{
            "type": "array",
            "items": map[string]interface{}{
                "type": "object",
                "properties": map[string]interface{}{
                    "description": map[string]interface{}{"type": "string"},
                    "quantity":    map[string]interface{}{"type": "number"},
                    "price":       map[string]interface{}{"type": "number"},
                },
            },
        },
    },
}

result, err := client.Extract.Run(context.Background(), reducto.ExtractRunParams{
    ExtractConfig: reducto.ExtractConfigParam{
        DocumentURL: reducto.F[reducto.ExtractConfigDocumentURLUnionParam](
            shared.UnionString(upload.FileID),
        ),
        Schema: reducto.F[interface{}](schema),
        ArrayExtract: reducto.F(shared.ArrayExtractConfigParam{
            Enabled: reducto.F(true),
            Mode:    reducto.F(shared.ArrayExtractConfigModeAuto),
        }),
    },
})

Citations

Enable citations to get source locations for each extracted value:
result, err := client.Extract.Run(context.Background(), reducto.ExtractRunParams{
    ExtractConfig: reducto.ExtractConfigParam{
        DocumentURL: reducto.F[reducto.ExtractConfigDocumentURLUnionParam](
            shared.UnionString(upload.FileID),
        ),
        Schema:           reducto.F[interface{}](schema),
        GenerateCitations: reducto.F(true),
    },
})

System Prompt

Add a system prompt to guide extraction:
result, err := client.Extract.Run(context.Background(), reducto.ExtractRunParams{
    ExtractConfig: reducto.ExtractConfigParam{
        DocumentURL: reducto.F[reducto.ExtractConfigDocumentURLUnionParam](
            shared.UnionString(upload.FileID),
        ),
        Schema:       reducto.F[interface{}](schema),
        SystemPrompt: reducto.F("Extract financial data from this investment statement."),
    },
})

Next Steps