Struktur
Examples

Enrich Records from URLs

Fetch URLs and extract structured data to enrich existing records.

The pattern

You have records with URLs (e.g., customer records with contract URLs). You want to fetch each URL and extract additional data.

CLI approach

cat customers.json | jq -c '.[]' | while read -r row; do
  url=$(echo "$row" | jq -r '.contract_url')
  curl -s "$url" | struktur --stdin \
    --schema-json '{"type":"object","properties":{"start_date":{"type":"string"},"value":{"type":"number"}},"required":["start_date","value"],"additionalProperties":false}' \
    --model openai/gpt-4o-mini | \
  jq --argjson orig "$row" '$orig + .'
done | jq -s '.'

SDK

import { extract, simple } from "@struktur/sdk";
import { openai } from "@ai-sdk/openai";
import { parse } from "@struktur/sdk";

const schema = {
  type: "object",
  properties: {
    start_date: { type: "string" },
    value: { type: "number" },
  },
  required: ["start_date", "value"],
  additionalProperties: false,
};

async function enrichRecords(records) {
  const enriched = [];

  for (const record of records) {
    try {
      // Fetch URL content
      const response = await fetch(record.contract_url);
      const text = await response.text();

      // Parse as artifact
      const artifacts = await parse({ kind: "text", text });

      // Extract
      const result = await extract({
        artifacts,
        schema,
        strategy: simple({ model: openai("gpt-4o-mini") }),
      });

      // Merge back
      enriched.push({ ...record, ...result.data });
    } catch (error) {
      console.error(`Failed to enrich: ${record.contract_url}`);
      enriched.push(record);  // Keep original on failure
    }
  }

  return enriched;
}

const customers = [
  { name: "Acme Corp", contract_url: "https://example.com/contracts/1" },
  { name: "Globex", contract_url: "https://example.com/contracts/2" },
];

const result = await enrichRecords(customers);
console.log(result);

With concurrency

import { extract, parallel } from "@struktur/sdk";

async function enrichRecordsParallel(records, concurrency = 5) {
  const results = await Promise.all(
    records.map(async (record, index) => {
      // Stagger requests to avoid rate limits
      await new Promise(r => setTimeout(r, index * 100));

      const response = await fetch(record.contract_url);
      const text = await response.text();
      const artifacts = await parse({ kind: "text", text });

      const result = await extract({
        artifacts,
        schema,
        strategy: simple({ model: openai("gpt-4o-mini") }),
      });

      return { ...record, ...result.data };
    })
  );

  return results;
}

See also

On this page