POST
/
api
/
endpoints
/
{id}
/
completions
curl --request POST \
  --url https://app.baseplate.ai/api/endpoints/{id}/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '{
  "values": "<any>",
  "prompt": [
    {}
  ],
  "messages": [
    {}
  ],
  "stream": true,
  "user": "<string>"
}'

Generates a completion

Generates a completion from endpoint . Your endpoint ID can be found in the API tab of your endpoint or in the URL.

Parameters

Paths

id
string
required

Baseplate endpoint ID

Authorization
string
required

Baseplate API key. Needs to be in the format “Bearer &BASEPLATE_API_KEY”

Content-Type
string

Use application/json

Body

values
json

A json object with keys matching the variables of your deployed template

prompt
array

A prompt for Anthropic, ada, babbage, curie, and davinci models. One of messages or prompt is required.

messages
array

An array of the chat history, for gpt-3.5 and gpt-4 models. One of messages or prompt is required.

stream
boolean

If true, response will be sent back as Server Sent Events. A [DONE] message is sent at the end of the completion and the search results (if applicable) are sent after the [DONE]. See below for example.

user
string

A user string to use for OpenAI’s API.

{
  "id": "cmpl-6htWn45Ch0G37ZZFI48zt29k5atNe",
  "object": "text_completion",
  "created": 1675919573,
  "model": "text-davinci-003",
  "choices": [
    {
      "text": "\n\nThis Jacket is the ultimate in shoe fashion. It's sleek, stylish",
      "index": 0,
      "logprobs": null,
      "finish_reason": "length",
      "usage_id": "89b572c6-f0c9-40b5-bdb4-dc5cb6a99144"
    }
  ],
  "usage": {
    "prompt_tokens": 23,
    "completion_tokens": 16,
    "total_tokens": 39
  },
  "search_results": [
    {
      "data": {
        "text": "Example text."
      },
      "embedding": "Example text.",
      "image_paths": null,
      "confidence": 0.84870479,
      "metadata": {
        "documentId": "007c8ab6-4972-4457-9a1b-244fdefb22bc",
        "rowId": 571724,
        "url": "/5274962e-04c3-4a23-b3d5-1d7e1ea6a230/text.pdf"
      },
      "variable": "context",
      "last_accessed": "2023-05-03T00:03:40.525+00:00",
      "query": "example query"
    }
  ],
  "prompt_cost": 0.00046,
  "completion_cost": 0.00032,
  "baseplate_model_name": "text-davinci-003"
}

Steaming Example (Next.js 13)

Heres an example if you are using NextJS 13/React:

Backend (/api/completions/chat/route.ts):

import {
  createParser,
  ParsedEvent,
  ReconnectInterval,
} from "eventsource-parser";

export async function POST(request: Request) {
const body = await request.json();
  const encoder = new TextEncoder();
  const decoder = new TextDecoder();
const res = await fetch("https://app.baseplate.ai/api/endpoints/${endpoint-id}/completions", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      Authorization: `Bearer ${process.env.BASEPLATE_API_KEY}`,
    },
    body: JSON.stringify({
      messages: body.messages,
      stream: true,
    }),
  });
  const stream = new ReadableStream({
    async start(controller) {
      // callback
      function onParse(event: ParsedEvent | ReconnectInterval) {
        if (event.type === "event") {
          const data = event.data;
          // https://beta.openai.com/docs/api-reference/completions/create#completions/create-stream
          if (data === "[DONE]") {
            controller.close();
            return;
          }
          try {
            const json = JSON.parse(data);
            const text =
              json.choices[0].delta?.content || json.choices[0].text || "";
            const queue = encoder.encode(text);
            controller.enqueue(queue);
          } catch (e) {
            // maybe parse error
            controller.error(e);
          }
        }
      }

      // stream response (SSE) from OpenAI may be fragmented into multiple chunks
      // this ensures we properly read chunks and invoke an event for each SSE event stream
      const parser = createParser(onParse);
      // https://web.dev/streams/#asynchronous-iteration
      for await (const chunk of res.body as any) {
        parser.feed(decoder.decode(chunk));
      }
    },
  });
  return new Response(stream);
}

Front end (inside some function):

try {
      res = await fetch(`/api/completions/chat`, {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          messages: [{role: "user", content: "Hello"}],
        }),
      }).catch((err) => {
        throw err;
      });
    } catch (e: any) {
      console.error(e);
      return;
    }
    setGenerating(false);
    if (!res.ok) {
      const error = await res.json();
      console.error(`Error generating: ${error.message}`);
      return;
    }
    const stream = res.body;
    if (!stream) {
      return;
    }
    const reader = stream.getReader();
    const decoder = new TextDecoder();
    let done = false;
    while (!done) {
      const { value, done: doneReading } = await reader.read();
      done = doneReading;
      const chunkValue = decoder.decode(value);
      console.log(chunkValue);
    }