KnowledgeRefinery/daemon-go/internal/lmstudio/client.go
oho 38a99476d6 Knowledge Refinery: local-first semantic search & 3D concept visualization
macOS app for corpus ingestion, semantic search, and concept universe
visualization powered by local LLMs via LM Studio.

Architecture:
- Go daemon (17MB single binary, zero dependencies)
  - chi router, pure-Go SQLite, tiktoken tokenizer
  - 6-stage pipeline: scan → extract → chunk → embed → annotate → conceptualize
  - Brute-force cosine vector search in memory
  - 89 tests across 8 packages
- SwiftUI app (macOS 15+)
  - Multi-workspace management with auto-start daemons
  - Live pipeline progress, search, concept browser
  - WebGPU 3D universe renderer with Canvas2D fallback
  - Custom crystal app icon
2026-02-13 18:09:46 +01:00

345 lines
8.1 KiB
Go

package lmstudio
import (
"bytes"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"strings"
"time"
)
// Client communicates with LM Studio's OpenAI-compatible API.
type Client struct {
baseURL string // e.g. http://127.0.0.1:1234/v1
rootURL string // e.g. http://127.0.0.1:1234 (for native API)
httpClient *http.Client
contextLength *int // cached after first query
}
func NewClient(baseURL string, timeout float64) *Client {
root := strings.TrimRight(baseURL, "/")
root = strings.TrimSuffix(root, "/v1")
return &Client{
baseURL: baseURL,
rootURL: root,
httpClient: &http.Client{
Timeout: time.Duration(timeout * float64(time.Second)),
},
}
}
// -- Model types --
type modelEntry struct {
ID string `json:"id"`
Object string `json:"object"`
}
type modelsResponse struct {
Data []modelEntry `json:"data"`
}
type embeddingItem struct {
Embedding []float64 `json:"embedding"`
}
type embeddingsResponse struct {
Data []embeddingItem `json:"data"`
}
// ChatMessage represents a chat message.
type ChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
// ChatMsg creates a ChatMessage.
func ChatMsg(role, content string) ChatMessage {
return ChatMessage{Role: role, Content: content}
}
type chatChoice struct {
Message ChatMessage `json:"message"`
}
type chatResponse struct {
Choices []chatChoice `json:"choices"`
}
// HealthCheck returns true if LM Studio has at least one model loaded.
func (c *Client) HealthCheck() bool {
models := c.ListModels()
return len(models) > 0
}
// ListModels returns all loaded models.
func (c *Client) ListModels() []modelEntry {
resp, err := c.httpClient.Get(c.baseURL + "/models")
if err != nil {
slog.Warn("LM Studio health check failed", "error", err)
return nil
}
defer resp.Body.Close()
var result modelsResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
slog.Warn("Failed to decode models response", "error", err)
return nil
}
return result.Data
}
// GetContextLength queries LM Studio's native API for the loaded context window size.
func (c *Client) GetContextLength(modelID *string) int {
if c.contextLength != nil {
return *c.contextLength
}
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Get(c.rootURL + "/api/v0/models")
if err != nil {
slog.Warn("Failed to query context length", "error", err)
fallback := 4096
c.contextLength = &fallback
return fallback
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
fallback := 4096
c.contextLength = &fallback
return fallback
}
var result struct {
Data []map[string]any `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
fallback := 4096
c.contextLength = &fallback
return fallback
}
target := ""
if modelID != nil {
target = *modelID
} else if m := c.GetChatModel(); m != nil {
target = *m
}
for _, m := range result.Data {
if id, ok := m["id"].(string); ok && id == target {
ctx := getContextFromModel(m)
c.contextLength = &ctx
slog.Info("LM Studio context window", "model", target, "tokens", ctx)
return ctx
}
}
// Fallback: first LLM model
for _, m := range result.Data {
if t, ok := m["type"].(string); ok && t == "llm" {
ctx := getContextFromModel(m)
c.contextLength = &ctx
return ctx
}
}
fallback := 4096
c.contextLength = &fallback
return fallback
}
func getContextFromModel(m map[string]any) int {
if v, ok := m["loaded_context_length"].(float64); ok && v > 0 {
return int(v)
}
if v, ok := m["max_context_length"].(float64); ok && v > 0 {
return int(v)
}
return 4096
}
// GetEmbeddingModel returns the first embedding-like model, or first model as fallback.
func (c *Client) GetEmbeddingModel() *string {
models := c.ListModels()
embedKeywords := []string{"embed", "e5", "bge", "gte", "nomic"}
for _, m := range models {
lower := strings.ToLower(m.ID)
for _, kw := range embedKeywords {
if strings.Contains(lower, kw) {
return &m.ID
}
}
}
if len(models) > 0 {
return &models[0].ID
}
return nil
}
// GetChatModel returns the first non-embedding model.
func (c *Client) GetChatModel() *string {
models := c.ListModels()
excludeKeywords := []string{"embed", "e5", "bge", "gte", "nomic", "whisper"}
for _, m := range models {
lower := strings.ToLower(m.ID)
isExcluded := false
for _, kw := range excludeKeywords {
if strings.Contains(lower, kw) {
isExcluded = true
break
}
}
if !isExcluded {
return &m.ID
}
}
if len(models) > 0 {
return &models[0].ID
}
return nil
}
// Embed sends texts to the embedding endpoint and returns vectors.
func (c *Client) Embed(texts []string, model *string) ([][]float64, error) {
if model == nil {
model = c.GetEmbeddingModel()
}
if model == nil {
return nil, fmt.Errorf("no embedding model available in LM Studio")
}
body := map[string]any{
"model": *model,
"input": texts,
}
payload, _ := json.Marshal(body)
resp, err := c.httpClient.Post(c.baseURL+"/embeddings", "application/json", bytes.NewReader(payload))
if err != nil {
return nil, fmt.Errorf("embed request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("embed failed (status %d): %s", resp.StatusCode, string(b))
}
var result embeddingsResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("decode embeddings: %w", err)
}
vectors := make([][]float64, len(result.Data))
for i, item := range result.Data {
vectors[i] = item.Embedding
}
return vectors, nil
}
// EmbedSingle embeds a single text.
func (c *Client) EmbedSingle(text string, model *string) ([]float64, error) {
vecs, err := c.Embed([]string{text}, model)
if err != nil {
return nil, err
}
if len(vecs) == 0 {
return nil, fmt.Errorf("no embedding returned")
}
return vecs[0], nil
}
// Chat sends messages to the chat completions endpoint.
func (c *Client) Chat(messages []ChatMessage, model *string, temperature float64, maxTokens int) (string, error) {
if model == nil {
model = c.GetChatModel()
}
if model == nil {
return "", fmt.Errorf("no chat model available in LM Studio")
}
body := map[string]any{
"model": *model,
"messages": messages,
"temperature": temperature,
"max_tokens": maxTokens,
}
payload, _ := json.Marshal(body)
resp, err := c.httpClient.Post(c.baseURL+"/chat/completions", "application/json", bytes.NewReader(payload))
if err != nil {
return "", fmt.Errorf("chat request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
b, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("chat failed (status %d): %s", resp.StatusCode, string(b))
}
var result chatResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", fmt.Errorf("decode chat response: %w", err)
}
if len(result.Choices) == 0 {
return "", nil
}
text := result.Choices[0].Message.Content
// Strip <think>...</think> blocks from thinking models
if strings.Contains(text, "</think>") {
parts := strings.SplitN(text, "</think>", 2)
text = strings.TrimSpace(parts[1])
} else if strings.HasPrefix(text, "<think>") {
text = ""
}
return text, nil
}
// AnnotateChunk sends a chunk to the LLM for annotation, with context-aware truncation.
func (c *Client) AnnotateChunk(chunkText, promptTemplate string, model *string) (string, error) {
ctx := c.GetContextLength(model)
maxChunkChars := max(400, (ctx-2000)*3)
truncated := chunkText
if len(truncated) > maxChunkChars {
truncated = truncated[:maxChunkChars]
}
messages := []ChatMessage{
{Role: "system", Content: promptTemplate},
{Role: "user", Content: truncated},
}
raw, err := c.Chat(messages, model, 0.1, 2048)
if err != nil {
return "", err
}
// Strip markdown code fences
text := strings.TrimSpace(raw)
if strings.HasPrefix(text, "```") {
lines := strings.Split(text, "\n")
var filtered []string
for _, l := range lines {
if !strings.HasPrefix(strings.TrimSpace(l), "```") {
filtered = append(filtered, l)
}
}
text = strings.TrimSpace(strings.Join(filtered, "\n"))
}
return text, nil
}
func max(a, b int) int {
if a > b {
return a
}
return b
}