|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + "encoding/json" |
| 6 | + "fmt" |
| 7 | + "io" |
| 8 | + "net/http" |
| 9 | + "os" |
| 10 | + "strings" |
| 11 | + |
| 12 | + "github.com/mudler/LocalAI/pkg/grpc/base" |
| 13 | + pb "github.com/mudler/LocalAI/pkg/grpc/proto" |
| 14 | +) |
| 15 | + |
| 16 | +const ( |
| 17 | + defaultBaseURL = "https://api.avian.io/v1" |
| 18 | +) |
| 19 | + |
| 20 | +type Avian struct { |
| 21 | + base.SingleThread |
| 22 | + |
| 23 | + apiKey string |
| 24 | + baseURL string |
| 25 | + model string |
| 26 | +} |
| 27 | + |
| 28 | +// chatMessage represents an OpenAI-compatible chat message. |
| 29 | +type chatMessage struct { |
| 30 | + Role string `json:"role"` |
| 31 | + Content string `json:"content"` |
| 32 | + Name string `json:"name,omitempty"` |
| 33 | + ToolCallID string `json:"tool_call_id,omitempty"` |
| 34 | +} |
| 35 | + |
| 36 | +// chatRequest represents an OpenAI-compatible chat completion request. |
| 37 | +type chatRequest struct { |
| 38 | + Model string `json:"model"` |
| 39 | + Messages []chatMessage `json:"messages"` |
| 40 | + MaxTokens int `json:"max_tokens,omitempty"` |
| 41 | + Temperature float32 `json:"temperature,omitempty"` |
| 42 | + TopP float32 `json:"top_p,omitempty"` |
| 43 | + Stream bool `json:"stream"` |
| 44 | + Stop []string `json:"stop,omitempty"` |
| 45 | +} |
| 46 | + |
| 47 | +// chatChoice represents a single choice in a chat completion response. |
| 48 | +type chatChoice struct { |
| 49 | + Index int `json:"index"` |
| 50 | + Message chatMessage `json:"message"` |
| 51 | + FinishReason string `json:"finish_reason"` |
| 52 | +} |
| 53 | + |
| 54 | +// chatUsage represents token usage in a chat completion response. |
| 55 | +type chatUsage struct { |
| 56 | + PromptTokens int `json:"prompt_tokens"` |
| 57 | + CompletionTokens int `json:"completion_tokens"` |
| 58 | + TotalTokens int `json:"total_tokens"` |
| 59 | +} |
| 60 | + |
| 61 | +// chatResponse represents an OpenAI-compatible chat completion response. |
| 62 | +type chatResponse struct { |
| 63 | + Choices []chatChoice `json:"choices"` |
| 64 | + Usage chatUsage `json:"usage"` |
| 65 | +} |
| 66 | + |
| 67 | +// streamDelta represents the delta in a streaming response chunk. |
| 68 | +type streamDelta struct { |
| 69 | + Role string `json:"role,omitempty"` |
| 70 | + Content string `json:"content,omitempty"` |
| 71 | +} |
| 72 | + |
| 73 | +// streamChoice represents a choice in a streaming response chunk. |
| 74 | +type streamChoice struct { |
| 75 | + Index int `json:"index"` |
| 76 | + Delta streamDelta `json:"delta"` |
| 77 | + FinishReason *string `json:"finish_reason"` |
| 78 | +} |
| 79 | + |
| 80 | +// streamChunk represents a single chunk in a streaming response. |
| 81 | +type streamChunk struct { |
| 82 | + Choices []streamChoice `json:"choices"` |
| 83 | + Usage *chatUsage `json:"usage,omitempty"` |
| 84 | +} |
| 85 | + |
| 86 | +func (a *Avian) Load(opts *pb.ModelOptions) error { |
| 87 | + a.apiKey = os.Getenv("AVIAN_API_KEY") |
| 88 | + if a.apiKey == "" { |
| 89 | + return fmt.Errorf("AVIAN_API_KEY environment variable is required") |
| 90 | + } |
| 91 | + |
| 92 | + a.baseURL = os.Getenv("AVIAN_API_BASE") |
| 93 | + if a.baseURL == "" { |
| 94 | + a.baseURL = defaultBaseURL |
| 95 | + } |
| 96 | + |
| 97 | + a.model = opts.Model |
| 98 | + if a.model == "" { |
| 99 | + return fmt.Errorf("model name is required") |
| 100 | + } |
| 101 | + |
| 102 | + return nil |
| 103 | +} |
| 104 | + |
| 105 | +func (a *Avian) buildMessages(opts *pb.PredictOptions) []chatMessage { |
| 106 | + // If structured messages are provided (from chat completions), use them directly |
| 107 | + if len(opts.Messages) > 0 { |
| 108 | + messages := make([]chatMessage, len(opts.Messages)) |
| 109 | + for i, msg := range opts.Messages { |
| 110 | + messages[i] = chatMessage{ |
| 111 | + Role: msg.Role, |
| 112 | + Content: msg.Content, |
| 113 | + Name: msg.Name, |
| 114 | + ToolCallID: msg.ToolCallId, |
| 115 | + } |
| 116 | + } |
| 117 | + return messages |
| 118 | + } |
| 119 | + |
| 120 | + // Fall back to using the prompt as a single user message |
| 121 | + return []chatMessage{ |
| 122 | + {Role: "user", Content: opts.Prompt}, |
| 123 | + } |
| 124 | +} |
| 125 | + |
| 126 | +func (a *Avian) Predict(opts *pb.PredictOptions) (string, error) { |
| 127 | + reqBody := chatRequest{ |
| 128 | + Model: a.model, |
| 129 | + Messages: a.buildMessages(opts), |
| 130 | + Stream: false, |
| 131 | + } |
| 132 | + |
| 133 | + if opts.Tokens > 0 { |
| 134 | + reqBody.MaxTokens = int(opts.Tokens) |
| 135 | + } |
| 136 | + if opts.Temperature > 0 { |
| 137 | + reqBody.Temperature = opts.Temperature |
| 138 | + } |
| 139 | + if opts.TopP > 0 { |
| 140 | + reqBody.TopP = opts.TopP |
| 141 | + } |
| 142 | + if len(opts.StopPrompts) > 0 { |
| 143 | + reqBody.Stop = opts.StopPrompts |
| 144 | + } |
| 145 | + |
| 146 | + jsonBody, err := json.Marshal(reqBody) |
| 147 | + if err != nil { |
| 148 | + return "", fmt.Errorf("failed to marshal request: %w", err) |
| 149 | + } |
| 150 | + |
| 151 | + req, err := http.NewRequest("POST", a.baseURL+"/chat/completions", bytes.NewReader(jsonBody)) |
| 152 | + if err != nil { |
| 153 | + return "", fmt.Errorf("failed to create request: %w", err) |
| 154 | + } |
| 155 | + |
| 156 | + req.Header.Set("Content-Type", "application/json") |
| 157 | + req.Header.Set("Authorization", "Bearer "+a.apiKey) |
| 158 | + |
| 159 | + resp, err := http.DefaultClient.Do(req) |
| 160 | + if err != nil { |
| 161 | + return "", fmt.Errorf("request failed: %w", err) |
| 162 | + } |
| 163 | + defer resp.Body.Close() |
| 164 | + |
| 165 | + body, err := io.ReadAll(resp.Body) |
| 166 | + if err != nil { |
| 167 | + return "", fmt.Errorf("failed to read response: %w", err) |
| 168 | + } |
| 169 | + |
| 170 | + if resp.StatusCode != http.StatusOK { |
| 171 | + return "", fmt.Errorf("API returned status %d: %s", resp.StatusCode, string(body)) |
| 172 | + } |
| 173 | + |
| 174 | + var chatResp chatResponse |
| 175 | + if err := json.Unmarshal(body, &chatResp); err != nil { |
| 176 | + return "", fmt.Errorf("failed to parse response: %w", err) |
| 177 | + } |
| 178 | + |
| 179 | + if len(chatResp.Choices) == 0 { |
| 180 | + return "", fmt.Errorf("no choices in response") |
| 181 | + } |
| 182 | + |
| 183 | + return chatResp.Choices[0].Message.Content, nil |
| 184 | +} |
| 185 | + |
| 186 | +func (a *Avian) PredictStream(opts *pb.PredictOptions, results chan string) error { |
| 187 | + reqBody := chatRequest{ |
| 188 | + Model: a.model, |
| 189 | + Messages: a.buildMessages(opts), |
| 190 | + Stream: true, |
| 191 | + } |
| 192 | + |
| 193 | + if opts.Tokens > 0 { |
| 194 | + reqBody.MaxTokens = int(opts.Tokens) |
| 195 | + } |
| 196 | + if opts.Temperature > 0 { |
| 197 | + reqBody.Temperature = opts.Temperature |
| 198 | + } |
| 199 | + if opts.TopP > 0 { |
| 200 | + reqBody.TopP = opts.TopP |
| 201 | + } |
| 202 | + if len(opts.StopPrompts) > 0 { |
| 203 | + reqBody.Stop = opts.StopPrompts |
| 204 | + } |
| 205 | + |
| 206 | + jsonBody, err := json.Marshal(reqBody) |
| 207 | + if err != nil { |
| 208 | + close(results) |
| 209 | + return fmt.Errorf("failed to marshal request: %w", err) |
| 210 | + } |
| 211 | + |
| 212 | + req, err := http.NewRequest("POST", a.baseURL+"/chat/completions", bytes.NewReader(jsonBody)) |
| 213 | + if err != nil { |
| 214 | + close(results) |
| 215 | + return fmt.Errorf("failed to create request: %w", err) |
| 216 | + } |
| 217 | + |
| 218 | + req.Header.Set("Content-Type", "application/json") |
| 219 | + req.Header.Set("Authorization", "Bearer "+a.apiKey) |
| 220 | + req.Header.Set("Accept", "text/event-stream") |
| 221 | + |
| 222 | + go func() { |
| 223 | + defer close(results) |
| 224 | + |
| 225 | + resp, err := http.DefaultClient.Do(req) |
| 226 | + if err != nil { |
| 227 | + fmt.Fprintf(os.Stderr, "avian: stream request failed: %v\n", err) |
| 228 | + return |
| 229 | + } |
| 230 | + defer resp.Body.Close() |
| 231 | + |
| 232 | + if resp.StatusCode != http.StatusOK { |
| 233 | + body, _ := io.ReadAll(resp.Body) |
| 234 | + fmt.Fprintf(os.Stderr, "avian: API returned status %d: %s\n", resp.StatusCode, string(body)) |
| 235 | + return |
| 236 | + } |
| 237 | + |
| 238 | + // Read SSE stream |
| 239 | + buf := make([]byte, 4096) |
| 240 | + var lineBuf strings.Builder |
| 241 | + |
| 242 | + for { |
| 243 | + n, err := resp.Body.Read(buf) |
| 244 | + if n > 0 { |
| 245 | + lineBuf.Write(buf[:n]) |
| 246 | + |
| 247 | + // Process complete lines |
| 248 | + for { |
| 249 | + text := lineBuf.String() |
| 250 | + idx := strings.Index(text, "\n") |
| 251 | + if idx < 0 { |
| 252 | + break |
| 253 | + } |
| 254 | + |
| 255 | + line := strings.TrimSpace(text[:idx]) |
| 256 | + lineBuf.Reset() |
| 257 | + lineBuf.WriteString(text[idx+1:]) |
| 258 | + |
| 259 | + if line == "" || line == "data: [DONE]" { |
| 260 | + continue |
| 261 | + } |
| 262 | + |
| 263 | + if !strings.HasPrefix(line, "data: ") { |
| 264 | + continue |
| 265 | + } |
| 266 | + |
| 267 | + data := strings.TrimPrefix(line, "data: ") |
| 268 | + |
| 269 | + var chunk streamChunk |
| 270 | + if jsonErr := json.Unmarshal([]byte(data), &chunk); jsonErr != nil { |
| 271 | + continue |
| 272 | + } |
| 273 | + |
| 274 | + if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" { |
| 275 | + results <- chunk.Choices[0].Delta.Content |
| 276 | + } |
| 277 | + } |
| 278 | + } |
| 279 | + |
| 280 | + if err != nil { |
| 281 | + if err != io.EOF { |
| 282 | + fmt.Fprintf(os.Stderr, "avian: stream read error: %v\n", err) |
| 283 | + } |
| 284 | + break |
| 285 | + } |
| 286 | + } |
| 287 | + }() |
| 288 | + |
| 289 | + return nil |
| 290 | +} |
0 commit comments