Skip to content

Commit c64030a

Browse files
committed
feat: add Avian as a cloud LLM inference provider
Add Avian (https://avian.io) as a Go backend that proxies requests to the Avian OpenAI-compatible API at https://api.avian.io/v1. Backend implementation: - Go gRPC backend at backend/go/avian/ following the huggingface backend pattern - Supports chat completions with structured messages and streaming (SSE) - Authentication via AVIAN_API_KEY environment variable - Configurable base URL via AVIAN_API_BASE environment variable Gallery models: - deepseek/deepseek-v3.2: 164K context, $0.26/$0.38 per 1M tokens - moonshotai/kimi-k2.5: 131K context, $0.45/$2.20 per 1M tokens - z-ai/glm-5: 131K context, $0.30/$2.55 per 1M tokens - minimax/minimax-m2.5: 1M context, $0.30/$1.10 per 1M tokens Build infrastructure: - Backend definition in Makefile (golang backend) - CI workflow entries for Linux (amd64/arm64) and macOS (metal) - Backend index.yaml entries with OCI image references Signed-off-by: Kyle D <deximia@hotmail.com>
1 parent 00abf1b commit c64030a

File tree

10 files changed

+528
-2
lines changed

10 files changed

+528
-2
lines changed

.github/workflows/backend.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,6 +1795,20 @@ jobs:
17951795
dockerfile: "./backend/Dockerfile.golang"
17961796
context: "./"
17971797
ubuntu-version: '2404'
1798+
# avian
1799+
- build-type: ''
1800+
cuda-major-version: ""
1801+
cuda-minor-version: ""
1802+
platforms: 'linux/amd64,linux/arm64'
1803+
tag-latest: 'auto'
1804+
tag-suffix: '-avian'
1805+
runs-on: 'ubuntu-latest'
1806+
base-image: "ubuntu:24.04"
1807+
skip-drivers: 'false'
1808+
backend: "avian"
1809+
dockerfile: "./backend/Dockerfile.golang"
1810+
context: "./"
1811+
ubuntu-version: '2404'
17981812
# rfdetr
17991813
- build-type: ''
18001814
cuda-major-version: ""
@@ -2089,6 +2103,10 @@ jobs:
20892103
tag-suffix: "-metal-darwin-arm64-huggingface"
20902104
build-type: "metal"
20912105
lang: "go"
2106+
- backend: "avian"
2107+
tag-suffix: "-metal-darwin-arm64-avian"
2108+
build-type: "metal"
2109+
lang: "go"
20922110
with:
20932111
backend: ${{ matrix.backend }}
20942112
build-type: ${{ matrix.build-type }}

Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Disable parallel execution for backend builds
2-
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/voxtral
2+
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/avian backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/voxtral
33

44
GOCMD=go
55
GOTEST=$(GOCMD) test
@@ -452,6 +452,7 @@ BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false
452452
BACKEND_PIPER = piper|golang|.|false|true
453453
BACKEND_LOCAL_STORE = local-store|golang|.|false|true
454454
BACKEND_HUGGINGFACE = huggingface|golang|.|false|true
455+
BACKEND_AVIAN = avian|golang|.|false|true
455456
BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
456457
BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
457458
BACKEND_WHISPER = whisper|golang|.|false|true
@@ -507,6 +508,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
507508
$(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
508509
$(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
509510
$(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
511+
$(eval $(call generate-docker-build-target,$(BACKEND_AVIAN)))
510512
$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
511513
$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
512514
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
@@ -539,7 +541,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP)))
539541
docker-save-%: backend-images
540542
docker save local-ai-backend:$* -o backend-images/$*.tar
541543

542-
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral
544+
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral docker-build-avian
543545

544546
########################################################
545547
### Mock Backend for E2E Tests

backend/go/avian/Makefile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
GOCMD=go
2+
3+
avian:
4+
CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o avian ./
5+
6+
package:
7+
bash package.sh
8+
9+
build: avian package
10+
11+
clean:
12+
rm -f avian

backend/go/avian/avian.go

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
"os"
10+
"strings"
11+
12+
"github.com/mudler/LocalAI/pkg/grpc/base"
13+
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
14+
)
15+
16+
const (
17+
defaultBaseURL = "https://api.avian.io/v1"
18+
)
19+
20+
type Avian struct {
21+
base.SingleThread
22+
23+
apiKey string
24+
baseURL string
25+
model string
26+
}
27+
28+
// chatMessage represents an OpenAI-compatible chat message.
29+
type chatMessage struct {
30+
Role string `json:"role"`
31+
Content string `json:"content"`
32+
Name string `json:"name,omitempty"`
33+
ToolCallID string `json:"tool_call_id,omitempty"`
34+
}
35+
36+
// chatRequest represents an OpenAI-compatible chat completion request.
37+
type chatRequest struct {
38+
Model string `json:"model"`
39+
Messages []chatMessage `json:"messages"`
40+
MaxTokens int `json:"max_tokens,omitempty"`
41+
Temperature float32 `json:"temperature,omitempty"`
42+
TopP float32 `json:"top_p,omitempty"`
43+
Stream bool `json:"stream"`
44+
Stop []string `json:"stop,omitempty"`
45+
}
46+
47+
// chatChoice represents a single choice in a chat completion response.
48+
type chatChoice struct {
49+
Index int `json:"index"`
50+
Message chatMessage `json:"message"`
51+
FinishReason string `json:"finish_reason"`
52+
}
53+
54+
// chatUsage represents token usage in a chat completion response.
55+
type chatUsage struct {
56+
PromptTokens int `json:"prompt_tokens"`
57+
CompletionTokens int `json:"completion_tokens"`
58+
TotalTokens int `json:"total_tokens"`
59+
}
60+
61+
// chatResponse represents an OpenAI-compatible chat completion response.
62+
type chatResponse struct {
63+
Choices []chatChoice `json:"choices"`
64+
Usage chatUsage `json:"usage"`
65+
}
66+
67+
// streamDelta represents the delta in a streaming response chunk.
68+
type streamDelta struct {
69+
Role string `json:"role,omitempty"`
70+
Content string `json:"content,omitempty"`
71+
}
72+
73+
// streamChoice represents a choice in a streaming response chunk.
74+
type streamChoice struct {
75+
Index int `json:"index"`
76+
Delta streamDelta `json:"delta"`
77+
FinishReason *string `json:"finish_reason"`
78+
}
79+
80+
// streamChunk represents a single chunk in a streaming response.
81+
type streamChunk struct {
82+
Choices []streamChoice `json:"choices"`
83+
Usage *chatUsage `json:"usage,omitempty"`
84+
}
85+
86+
func (a *Avian) Load(opts *pb.ModelOptions) error {
87+
a.apiKey = os.Getenv("AVIAN_API_KEY")
88+
if a.apiKey == "" {
89+
return fmt.Errorf("AVIAN_API_KEY environment variable is required")
90+
}
91+
92+
a.baseURL = os.Getenv("AVIAN_API_BASE")
93+
if a.baseURL == "" {
94+
a.baseURL = defaultBaseURL
95+
}
96+
97+
a.model = opts.Model
98+
if a.model == "" {
99+
return fmt.Errorf("model name is required")
100+
}
101+
102+
return nil
103+
}
104+
105+
func (a *Avian) buildMessages(opts *pb.PredictOptions) []chatMessage {
106+
// If structured messages are provided (from chat completions), use them directly
107+
if len(opts.Messages) > 0 {
108+
messages := make([]chatMessage, len(opts.Messages))
109+
for i, msg := range opts.Messages {
110+
messages[i] = chatMessage{
111+
Role: msg.Role,
112+
Content: msg.Content,
113+
Name: msg.Name,
114+
ToolCallID: msg.ToolCallId,
115+
}
116+
}
117+
return messages
118+
}
119+
120+
// Fall back to using the prompt as a single user message
121+
return []chatMessage{
122+
{Role: "user", Content: opts.Prompt},
123+
}
124+
}
125+
126+
func (a *Avian) Predict(opts *pb.PredictOptions) (string, error) {
127+
reqBody := chatRequest{
128+
Model: a.model,
129+
Messages: a.buildMessages(opts),
130+
Stream: false,
131+
}
132+
133+
if opts.Tokens > 0 {
134+
reqBody.MaxTokens = int(opts.Tokens)
135+
}
136+
if opts.Temperature > 0 {
137+
reqBody.Temperature = opts.Temperature
138+
}
139+
if opts.TopP > 0 {
140+
reqBody.TopP = opts.TopP
141+
}
142+
if len(opts.StopPrompts) > 0 {
143+
reqBody.Stop = opts.StopPrompts
144+
}
145+
146+
jsonBody, err := json.Marshal(reqBody)
147+
if err != nil {
148+
return "", fmt.Errorf("failed to marshal request: %w", err)
149+
}
150+
151+
req, err := http.NewRequest("POST", a.baseURL+"/chat/completions", bytes.NewReader(jsonBody))
152+
if err != nil {
153+
return "", fmt.Errorf("failed to create request: %w", err)
154+
}
155+
156+
req.Header.Set("Content-Type", "application/json")
157+
req.Header.Set("Authorization", "Bearer "+a.apiKey)
158+
159+
resp, err := http.DefaultClient.Do(req)
160+
if err != nil {
161+
return "", fmt.Errorf("request failed: %w", err)
162+
}
163+
defer resp.Body.Close()
164+
165+
body, err := io.ReadAll(resp.Body)
166+
if err != nil {
167+
return "", fmt.Errorf("failed to read response: %w", err)
168+
}
169+
170+
if resp.StatusCode != http.StatusOK {
171+
return "", fmt.Errorf("API returned status %d: %s", resp.StatusCode, string(body))
172+
}
173+
174+
var chatResp chatResponse
175+
if err := json.Unmarshal(body, &chatResp); err != nil {
176+
return "", fmt.Errorf("failed to parse response: %w", err)
177+
}
178+
179+
if len(chatResp.Choices) == 0 {
180+
return "", fmt.Errorf("no choices in response")
181+
}
182+
183+
return chatResp.Choices[0].Message.Content, nil
184+
}
185+
186+
func (a *Avian) PredictStream(opts *pb.PredictOptions, results chan string) error {
187+
reqBody := chatRequest{
188+
Model: a.model,
189+
Messages: a.buildMessages(opts),
190+
Stream: true,
191+
}
192+
193+
if opts.Tokens > 0 {
194+
reqBody.MaxTokens = int(opts.Tokens)
195+
}
196+
if opts.Temperature > 0 {
197+
reqBody.Temperature = opts.Temperature
198+
}
199+
if opts.TopP > 0 {
200+
reqBody.TopP = opts.TopP
201+
}
202+
if len(opts.StopPrompts) > 0 {
203+
reqBody.Stop = opts.StopPrompts
204+
}
205+
206+
jsonBody, err := json.Marshal(reqBody)
207+
if err != nil {
208+
close(results)
209+
return fmt.Errorf("failed to marshal request: %w", err)
210+
}
211+
212+
req, err := http.NewRequest("POST", a.baseURL+"/chat/completions", bytes.NewReader(jsonBody))
213+
if err != nil {
214+
close(results)
215+
return fmt.Errorf("failed to create request: %w", err)
216+
}
217+
218+
req.Header.Set("Content-Type", "application/json")
219+
req.Header.Set("Authorization", "Bearer "+a.apiKey)
220+
req.Header.Set("Accept", "text/event-stream")
221+
222+
go func() {
223+
defer close(results)
224+
225+
resp, err := http.DefaultClient.Do(req)
226+
if err != nil {
227+
fmt.Fprintf(os.Stderr, "avian: stream request failed: %v\n", err)
228+
return
229+
}
230+
defer resp.Body.Close()
231+
232+
if resp.StatusCode != http.StatusOK {
233+
body, _ := io.ReadAll(resp.Body)
234+
fmt.Fprintf(os.Stderr, "avian: API returned status %d: %s\n", resp.StatusCode, string(body))
235+
return
236+
}
237+
238+
// Read SSE stream
239+
buf := make([]byte, 4096)
240+
var lineBuf strings.Builder
241+
242+
for {
243+
n, err := resp.Body.Read(buf)
244+
if n > 0 {
245+
lineBuf.Write(buf[:n])
246+
247+
// Process complete lines
248+
for {
249+
text := lineBuf.String()
250+
idx := strings.Index(text, "\n")
251+
if idx < 0 {
252+
break
253+
}
254+
255+
line := strings.TrimSpace(text[:idx])
256+
lineBuf.Reset()
257+
lineBuf.WriteString(text[idx+1:])
258+
259+
if line == "" || line == "data: [DONE]" {
260+
continue
261+
}
262+
263+
if !strings.HasPrefix(line, "data: ") {
264+
continue
265+
}
266+
267+
data := strings.TrimPrefix(line, "data: ")
268+
269+
var chunk streamChunk
270+
if jsonErr := json.Unmarshal([]byte(data), &chunk); jsonErr != nil {
271+
continue
272+
}
273+
274+
if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" {
275+
results <- chunk.Choices[0].Delta.Content
276+
}
277+
}
278+
}
279+
280+
if err != nil {
281+
if err != io.EOF {
282+
fmt.Fprintf(os.Stderr, "avian: stream read error: %v\n", err)
283+
}
284+
break
285+
}
286+
}
287+
}()
288+
289+
return nil
290+
}

backend/go/avian/main.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package main
2+
3+
// Avian backend - proxies requests to the Avian API (https://api.avian.io/v1)
4+
// Avian provides an OpenAI-compatible API for LLM inference.
5+
6+
import (
7+
"flag"
8+
9+
grpc "github.com/mudler/LocalAI/pkg/grpc"
10+
)
11+
12+
var (
13+
addr = flag.String("addr", "localhost:50051", "the address to connect to")
14+
)
15+
16+
func main() {
17+
flag.Parse()
18+
19+
if err := grpc.StartServer(*addr, &Avian{}); err != nil {
20+
panic(err)
21+
}
22+
}

0 commit comments

Comments
 (0)