-
Notifications
You must be signed in to change notification settings - Fork 2.3k
feat: add GLM-4.5 and OpenAI gpt-oss models to Fireworks provider #6784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,10 @@ export type FireworksModelId = | |
| "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct" | ||
| "accounts/fireworks/models/deepseek-r1-0528" | ||
| "accounts/fireworks/models/deepseek-v3" | ||
| "accounts/fireworks/models/glm-4p5" | ||
| "accounts/fireworks/models/glm-4p5-air" | ||
| "accounts/fireworks/models/gpt-oss-20b" | ||
| "accounts/fireworks/models/gpt-oss-120b" | ||
|
||
export const fireworksDefaultModelId: FireworksModelId = "accounts/fireworks/models/kimi-k2-instruct" | ||
|
||
|
@@ -58,4 +62,44 @@ export const fireworksModels = { | |
description: | ||
"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.", | ||
}, | ||
"accounts/fireworks/models/glm-4p5": { | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.55, | ||
outputPrice: 2.19, | ||
description: | ||
"Z.ai GLM-4.5 with 355B total parameters and 32B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.", | ||
}, | ||
"accounts/fireworks/models/glm-4p5-air": { | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.55, | ||
outputPrice: 2.19, | ||
description: | ||
"Z.ai GLM-4.5-Air with 106B total parameters and 12B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.", | ||
}, | ||
"accounts/fireworks/models/gpt-oss-20b": { | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.07, | ||
outputPrice: 0.3, | ||
description: | ||
"OpenAI gpt-oss-20b: Compact model for local/edge deployments. Optimized for low-latency and resource-constrained environments with chain-of-thought output, adjustable reasoning, and agentic workflows.", | ||
}, | ||
"accounts/fireworks/models/gpt-oss-120b": { | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.15, | ||
outputPrice: 0.6, | ||
description: | ||
"OpenAI gpt-oss-120b: Production-grade, general-purpose model that fits on a single H100 GPU. Features complex reasoning, configurable effort, full chain-of-thought transparency, and supports function calling, tool use, and structured outputs.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The description mentions function calling and tool use support. Should we consider adding a boolean to the model info for clarity, similar to how we have ? |
||
}, | ||
} as const satisfies Record<string, ModelInfo> |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -179,6 +179,90 @@ describe("FireworksHandler", () => { | |
) | ||
}) | ||
|
||
it("should return GLM-4.5 model with correct configuration", () => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While the configuration tests are thorough, could we add tests that verify these models actually work with the API? Even mock tests simulating API responses would increase confidence that the integration works correctly. |
||
const testModelId: FireworksModelId = "accounts/fireworks/models/glm-4p5" | ||
const handlerWithModel = new FireworksHandler({ | ||
apiModelId: testModelId, | ||
fireworksApiKey: "test-fireworks-api-key", | ||
}) | ||
const model = handlerWithModel.getModel() | ||
expect(model.id).toBe(testModelId) | ||
expect(model.info).toEqual( | ||
expect.objectContaining({ | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.55, | ||
outputPrice: 2.19, | ||
description: expect.stringContaining("Z.ai GLM-4.5 with 355B total parameters"), | ||
}), | ||
) | ||
}) | ||
|
||
it("should return GLM-4.5-Air model with correct configuration", () => { | ||
const testModelId: FireworksModelId = "accounts/fireworks/models/glm-4p5-air" | ||
const handlerWithModel = new FireworksHandler({ | ||
apiModelId: testModelId, | ||
fireworksApiKey: "test-fireworks-api-key", | ||
}) | ||
const model = handlerWithModel.getModel() | ||
expect(model.id).toBe(testModelId) | ||
expect(model.info).toEqual( | ||
expect.objectContaining({ | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.55, | ||
outputPrice: 2.19, | ||
description: expect.stringContaining("Z.ai GLM-4.5-Air with 106B total parameters"), | ||
}), | ||
) | ||
}) | ||
|
||
it("should return gpt-oss-20b model with correct configuration", () => { | ||
const testModelId: FireworksModelId = "accounts/fireworks/models/gpt-oss-20b" | ||
const handlerWithModel = new FireworksHandler({ | ||
apiModelId: testModelId, | ||
fireworksApiKey: "test-fireworks-api-key", | ||
}) | ||
const model = handlerWithModel.getModel() | ||
expect(model.id).toBe(testModelId) | ||
expect(model.info).toEqual( | ||
expect.objectContaining({ | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.07, | ||
outputPrice: 0.3, | ||
description: expect.stringContaining("OpenAI gpt-oss-20b: Compact model for local/edge deployments"), | ||
}), | ||
) | ||
}) | ||
|
||
it("should return gpt-oss-120b model with correct configuration", () => { | ||
const testModelId: FireworksModelId = "accounts/fireworks/models/gpt-oss-120b" | ||
const handlerWithModel = new FireworksHandler({ | ||
apiModelId: testModelId, | ||
fireworksApiKey: "test-fireworks-api-key", | ||
}) | ||
const model = handlerWithModel.getModel() | ||
expect(model.id).toBe(testModelId) | ||
expect(model.info).toEqual( | ||
expect.objectContaining({ | ||
maxTokens: 16384, | ||
contextWindow: 128000, | ||
supportsImages: false, | ||
supportsPromptCache: false, | ||
inputPrice: 0.15, | ||
outputPrice: 0.6, | ||
description: expect.stringContaining("OpenAI gpt-oss-120b: Production-grade, general-purpose model"), | ||
}), | ||
) | ||
}) | ||
|
||
it("completePrompt method should return text from Fireworks API", async () => { | ||
const expectedResponse = "This is a test response from Fireworks" | ||
mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] }) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The descriptions are good, but could we add more technical details? For example, mentioning that GLM models use MoE architecture or specific optimization techniques would help users choose the right model.