Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/major-teeth-greet.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/ai-openai': minor
---

Introduces a single-source-of-truth model registry for all OpenAI models, preventing silent drift between capability declarations and runtime validation. Significantly expands model coverage across text, image, video, and audio categories.
9 changes: 5 additions & 4 deletions packages/typescript/ai-openai/src/adapters/image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ import {
validateNumberOfImages,
validatePrompt,
} from '../image/image-provider-options'
import type { OpenAIImageModel } from '../model-meta'
import type {
OpenAIImageModel,
OpenAIImageModelProviderOptionsByName,
OpenAIImageModelSizeByName,
OpenAIImageProviderOptions,
} from '../image/image-provider-options'
} from '../model-meta'
import type { OpenAIImageProviderOptions } from '../image/image-provider-options'
import type {
GeneratedImage,
ImageGenerationOptions,
Expand All @@ -32,7 +32,8 @@ export interface OpenAIImageConfig extends OpenAIClientConfig {}
* OpenAI Image Generation Adapter
*
* Tree-shakeable adapter for OpenAI image generation functionality.
* Supports gpt-image-1, gpt-image-1-mini, dall-e-3, and dall-e-2 models.
* Supports gpt-image-1.5, chatgpt-image-latest, gpt-image-1, gpt-image-1-mini,
* dall-e-3, and dall-e-2 models.
*
* Features:
* - Model-specific type-safe provider options
Expand Down
12 changes: 5 additions & 7 deletions packages/typescript/ai-openai/src/adapters/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import type {
TextOptions,
} from '@tanstack/ai'
import type {
ExternalTextProviderOptions,
InternalTextProviderOptions,
} from '../text/text-provider-options'
import type {
Expand All @@ -47,20 +46,19 @@ export interface OpenAITextConfig extends OpenAIClientConfig {}
/**
* Alias for TextProviderOptions
*/
export type OpenAITextProviderOptions = ExternalTextProviderOptions
export type OpenAITextProviderOptions<
TModel extends OpenAIChatModel = OpenAIChatModel,
> = OpenAIChatModelProviderOptionsByName[TModel]

// ===========================
// Type Resolution Helpers
// ===========================

/**
* Resolve provider options for a specific model.
* If the model has explicit options in the map, use those; otherwise use base options.
*/
type ResolveProviderOptions<TModel extends string> =
TModel extends keyof OpenAIChatModelProviderOptionsByName
? OpenAIChatModelProviderOptionsByName[TModel]
: OpenAITextProviderOptions
type ResolveProviderOptions<TModel extends OpenAIChatModel> =
OpenAIChatModelProviderOptionsByName[TModel]

/**
* Resolve input modalities for a specific model.
Expand Down
32 changes: 25 additions & 7 deletions packages/typescript/ai-openai/src/adapters/transcription.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ export class OpenAITranscriptionAdapter<
const file = this.prepareAudioFile(audio)

// Build request
const request: OpenAI_SDK.Audio.TranscriptionCreateParams = {
const requestBase: Omit<
OpenAI_SDK.Audio.TranscriptionCreateParamsNonStreaming,
'response_format'
> = {
model,
file,
language,
prompt,
response_format: this.mapResponseFormat(responseFormat),
...modelOptions,
}

Expand All @@ -69,9 +71,14 @@ export class OpenAITranscriptionAdapter<
(!responseFormat && model !== 'whisper-1')

if (useVerbose) {
const verboseRequest: OpenAI_SDK.Audio.TranscriptionCreateParamsNonStreaming<'verbose_json'> =
{
...requestBase,
response_format: 'verbose_json',
stream: false,
}
const response = await this.client.audio.transcriptions.create({
...request,
response_format: 'verbose_json',
...verboseRequest,
})

return {
Expand All @@ -96,12 +103,23 @@ export class OpenAITranscriptionAdapter<
})),
}
} else {
const request: OpenAI_SDK.Audio.TranscriptionCreateParamsNonStreaming =
{
...requestBase,
response_format: this.mapResponseFormat(responseFormat),
stream: false,
}
const response = await this.client.audio.transcriptions.create(request)

return {
id: generateId(this.name),
model,
text: typeof response === 'string' ? response : response.text,
text:
typeof response === 'string'
? response
: 'text' in response
? response.text
: '',
language,
}
}
Expand Down Expand Up @@ -157,9 +175,9 @@ export class OpenAITranscriptionAdapter<

private mapResponseFormat(
format?: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt',
): OpenAI_SDK.Audio.TranscriptionCreateParams['response_format'] {
): OpenAI_SDK.Audio.AudioResponseFormat {
if (!format) return 'json'
return format as OpenAI_SDK.Audio.TranscriptionCreateParams['response_format']
return format
}
}

Expand Down
2 changes: 1 addition & 1 deletion packages/typescript/ai-openai/src/adapters/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export interface OpenAITTSConfig extends OpenAIClientConfig {}
* OpenAI Text-to-Speech Adapter
*
* Tree-shakeable adapter for OpenAI TTS functionality.
* Supports tts-1, tts-1-hd, and gpt-4o-audio-preview models.
* Supports gpt-4o-mini-tts, tts-1, and tts-1-hd models.
*
* Features:
* - Multiple voice options: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, verse
Expand Down
13 changes: 8 additions & 5 deletions packages/typescript/ai-openai/src/adapters/video.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import {
validateVideoSize,
} from '../video/video-provider-options'
import type { VideoModel } from 'openai/resources'
import type { OpenAIVideoModel } from '../model-meta'
import type {
OpenAIVideoModel,
OpenAIVideoModelProviderOptionsByName,
OpenAIVideoModelSizeByName,
OpenAIVideoProviderOptions,
} from '../video/video-provider-options'
} from '../model-meta'
import type { OpenAIVideoProviderOptions } from '../video/video-provider-options'
import type {
VideoGenerationOptions,
VideoJobResult,
Expand Down Expand Up @@ -53,9 +53,11 @@ export class OpenAIVideoAdapter<
readonly name = 'openai' as const

private client: OpenAI_SDK
private readonly clientConfig: OpenAIVideoConfig

constructor(config: OpenAIVideoConfig, model: TModel) {
super(config, model)
this.clientConfig = config
this.client = createOpenAIClient(config)
}

Expand Down Expand Up @@ -212,8 +214,9 @@ export class OpenAIVideoAdapter<
// Option 3: Return a proxy URL through our server

// Let's try fetching and returning a data URL for now
const baseUrl = this.config.baseUrl || 'https://api.openai.com/v1'
const apiKey = this.config.apiKey
const baseUrl =
this.clientConfig.baseURL || 'https://api.openai.com/v1'
const apiKey = this.clientConfig.apiKey

const contentResponse = await fetch(
`${baseUrl}/videos/${jobId}/content`,
Expand Down
35 changes: 31 additions & 4 deletions packages/typescript/ai-openai/src/audio/audio-provider-options.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { TTS_MODELS } from '../models/audio'

export interface AudioProviderOptions {
/**
* The text to generate audio for. The maximum length is 4096 characters.
Expand Down Expand Up @@ -46,13 +48,28 @@ export interface AudioProviderOptions {
stream_format?: 'sse' | 'audio'
}

/**
* Validates the requested stream format against the selected TTS model.
*/
export const validateStreamFormat = (options: AudioProviderOptions) => {
const unsupportedModels = ['tts-1', 'tts-1-hd']
if (options.stream_format && unsupportedModels.includes(options.model)) {
if (!Object.hasOwn(TTS_MODELS, options.model)) {
if (options.stream_format) {
console.warn(
`Unknown TTS model: ${options.model}. stream_format may not be supported.`,
)
}
return
}

const modelMeta = TTS_MODELS[options.model as keyof typeof TTS_MODELS]
if (options.stream_format && !modelMeta.supportsStreaming) {
throw new Error(`The model ${options.model} does not support streaming.`)
}
}

/**
* Validates that the requested speech speed falls within OpenAI's supported range.
*/
export const validateSpeed = (options: AudioProviderOptions) => {
if (options.speed) {
if (options.speed < 0.25 || options.speed > 4.0) {
Expand All @@ -61,13 +78,23 @@ export const validateSpeed = (options: AudioProviderOptions) => {
}
}

/**
* Validates that the selected TTS model supports voice instructions.
*/
export const validateInstructions = (options: AudioProviderOptions) => {
const unsupportedModels = ['tts-1', 'tts-1-hd']
if (options.instructions && unsupportedModels.includes(options.model)) {
if (!Object.hasOwn(TTS_MODELS, options.model)) {
throw new Error(`Unknown TTS model: ${options.model}`)
}

const modelMeta = TTS_MODELS[options.model as keyof typeof TTS_MODELS]
if (options.instructions && !modelMeta.supportsInstructions) {
throw new Error(`The model ${options.model} does not support instructions.`)
}
}

/**
* Validates the maximum input length for text-to-speech requests.
*/
export const validateAudioInput = (options: AudioProviderOptions) => {
if (options.input.length > 4096) {
throw new Error('Input text exceeds maximum length of 4096 characters.')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ export interface OpenAITranscriptionProviderOptions {
* Additional information to include in the transcription response. logprobs will return the log probabilities
* of the tokens in the response to understand the model's confidence in the transcription.
* logprobs only works with response_format set to json and only with the models gpt-4o-transcribe,
* gpt-4o-mini-transcribe, and gpt-4o-mini-transcribe-2025-12-15.
* gpt-4o-mini-transcribe, gpt-4o-mini-transcribe-2025-12-15, and
* gpt-4o-mini-transcribe-2025-03-20.
* This field is not supported when using gpt-4o-transcribe-diarize.
*/
include?: OpenAI.Audio.TranscriptionCreateParams['include']
Expand Down
89 changes: 28 additions & 61 deletions packages/typescript/ai-openai/src/image/image-provider-options.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { IMAGE_MODELS } from '../models/image'

/**
* OpenAI Image Generation Provider Options
*
Expand Down Expand Up @@ -176,27 +178,6 @@ export type OpenAIImageProviderOptions =
| DallE3ProviderOptions
| DallE2ProviderOptions

/**
* Type-only map from model name to its specific provider options.
* Used by the core AI types to narrow providerOptions based on the selected model.
*/
export type OpenAIImageModelProviderOptionsByName = {
'gpt-image-1': GptImage1ProviderOptions
'gpt-image-1-mini': GptImage1MiniProviderOptions
'dall-e-3': DallE3ProviderOptions
'dall-e-2': DallE2ProviderOptions
}

/**
* Type-only map from model name to its supported sizes.
*/
export type OpenAIImageModelSizeByName = {
'gpt-image-1': GptImageSize
'gpt-image-1-mini': GptImageSize
'dall-e-3': DallE3Size
'dall-e-2': DallE2Size
}

/**
* Internal options interface for validation
*/
Expand All @@ -206,6 +187,14 @@ interface ImageValidationOptions {
background?: 'transparent' | 'opaque' | 'auto' | null
}

function getImageModelMeta(model: string) {
if (!Object.hasOwn(IMAGE_MODELS, model)) {
throw new Error(`Unknown image model: ${model}`)
}

return IMAGE_MODELS[model as keyof typeof IMAGE_MODELS]
}

/**
* Validates that the provided size is supported by the model.
* Throws a descriptive error if the size is not supported.
Expand All @@ -216,19 +205,10 @@ export function validateImageSize(
): void {
if (!size || size === 'auto') return

const validSizes: Record<string, Array<string>> = {
'gpt-image-1': ['1024x1024', '1536x1024', '1024x1536', 'auto'],
'gpt-image-1-mini': ['1024x1024', '1536x1024', '1024x1536', 'auto'],
'dall-e-3': ['1024x1024', '1792x1024', '1024x1792'],
'dall-e-2': ['256x256', '512x512', '1024x1024'],
}
const modelMeta = getImageModelMeta(model)
const modelSizes = modelMeta.sizes

const modelSizes = validSizes[model]
if (!modelSizes) {
throw new Error(`Unknown image model: ${model}`)
}

if (!modelSizes.includes(size)) {
if (!(modelSizes as ReadonlyArray<string>).includes(size)) {
throw new Error(
`Size "${size}" is not supported by model "${model}". ` +
`Supported sizes: ${modelSizes.join(', ')}`,
Expand All @@ -245,53 +225,40 @@ export function validateNumberOfImages(
): void {
if (numberOfImages === undefined) return

// dall-e-3 only supports n=1
if (model === 'dall-e-3' && numberOfImages !== 1) {
throw new Error(
`Model "dall-e-3" only supports generating 1 image at a time. ` +
`Requested: ${numberOfImages}`,
)
}
const modelMeta = getImageModelMeta(model)

// Other models support 1-10
if (numberOfImages < 1 || numberOfImages > 10) {
if (numberOfImages < 1 || numberOfImages > modelMeta.maxImages) {
throw new Error(
`Number of images must be between 1 and 10. Requested: ${numberOfImages}`,
`Number of images must be between 1 and ${modelMeta.maxImages}. Requested: ${numberOfImages}`,
)
}
}

/**
* Validates that the selected image model supports background control.
*/
export const validateBackground = (options: ImageValidationOptions) => {
if (options.background) {
const supportedModels = ['gpt-image-1', 'gpt-image-1-mini']
if (!supportedModels.includes(options.model)) {
if (options.background != null) {
const modelMeta = getImageModelMeta(options.model)
if (!('supportsBackground' in modelMeta)) {
throw new Error(
`The model ${options.model} does not support background option.`,
)
}
}
}

/**
* Validates prompt presence and model-specific prompt length limits.
*/
export const validatePrompt = (options: ImageValidationOptions) => {
if (options.prompt.length === 0) {
throw new Error('Prompt cannot be empty.')
}
if (
(options.model === 'gpt-image-1' || options.model === 'gpt-image-1-mini') &&
options.prompt.length > 32000
) {
throw new Error(
'For gpt-image-1/gpt-image-1-mini, prompt length must be less than or equal to 32000 characters.',
)
}
if (options.model === 'dall-e-2' && options.prompt.length > 1000) {
throw new Error(
'For dall-e-2, prompt length must be less than or equal to 1000 characters.',
)
}
if (options.model === 'dall-e-3' && options.prompt.length > 4000) {
const modelMeta = getImageModelMeta(options.model)
if (options.prompt.length > modelMeta.maxPromptLength) {
throw new Error(
'For dall-e-3, prompt length must be less than or equal to 4000 characters.',
`For ${options.model}, prompt length must be less than or equal to ${modelMeta.maxPromptLength} characters.`,
)
}
}
Loading