Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,6 @@ src/training/cache_embeddings/aws/vllm-inventory-*.ini
grafana/
grafana-data/
prometheus-data/

# Local dev makefile (not committed)
Makefile.local
10 changes: 10 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,16 @@ global:
embedding_model: all-MiniLM-L6-v2
request_timeout_seconds: 30
search_type: hybrid
valkey:
host: valkey
port: 6379
database: 0
password: valkey-secret
connect_timeout: 5
collection_prefix: "vsr_vs_"
metric_type: COSINE
index_m: 16
index_ef_construction: 200

integrations:
tools:
Expand Down
157 changes: 157 additions & 0 deletions deploy/addons/valkey/valkey-vectorstore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package main

import (
"context"
"fmt"
"log"
"time"

candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/vectorstore"
)

type doc struct {
id, fileID, filename, content string
}

var sampleDocs = []doc{
// Europe
{"c1", "f1", "france.txt", "The capital of France is Paris. It is known for the Eiffel Tower."},
{"c2", "f1", "france.txt", "France is a country in Western Europe with a rich cultural heritage."},
{"c3", "f2", "germany.txt", "Berlin is the capital of Germany. It is famous for the Brandenburg Gate."},
{"c4", "f2", "germany.txt", "Germany is the largest economy in Europe and a leader in engineering."},
// Asia
{"c5", "f3", "japan.txt", "Tokyo is the capital of Japan. It is one of the most populous cities in the world."},
{"c6", "f3", "japan.txt", "Japan is an island nation in East Asia known for its technology and cuisine."},
{"c7", "f4", "india.txt", "New Delhi is the capital of India. Mumbai is the most populated city in India."},
{"c8", "f4", "india.txt", "India is the most populous country in the world with over 1.4 billion people."},
{"c9", "f5", "china.txt", "Beijing is the capital of China. Shanghai is the largest city by population."},
{"c10", "f5", "china.txt", "China has the second largest economy in the world and a long history of innovation."},
}

func main() {
fmt.Println("Valkey Vector Store Backend Example")
fmt.Println("====================================")

ctx := context.Background()
backend := initBackend()
defer backend.Close()

storeID := fmt.Sprintf("demo_%d", time.Now().UnixNano())
createCollection(ctx, backend, storeID)
defer cleanupCollection(ctx, backend, storeID)

embedAndInsert(ctx, backend, storeID)
time.Sleep(500 * time.Millisecond)
runSearches(ctx, backend, storeID)
runFilteredSearch(ctx, backend, storeID)

fmt.Println("\n✓ Example completed successfully!")
}

func initBackend() *vectorstore.ValkeyBackend {
fmt.Println("\n1. Initializing embedding model...")
if err := candle_binding.InitModel("sentence-transformers/all-MiniLM-L6-v2", true); err != nil {
log.Fatalf("Failed to initialize embedding model: %v", err)
}
fmt.Println("✓ Embedding model initialized")

fmt.Println("\n2. Connecting to Valkey...")
backend, err := vectorstore.NewValkeyBackend(vectorstore.ValkeyBackendConfig{
Host: "localhost",
Port: 6379,
CollectionPrefix: "example_vs_",
MetricType: "COSINE",
ConnectTimeout: 5,
})
if err != nil {
log.Fatalf("Failed to connect to Valkey: %v", err)
}
fmt.Println("✓ Connected to Valkey")
return backend
}

func createCollection(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
dimension := 384
fmt.Printf("\n3. Creating collection %q (dimension=%d)...\n", storeID, dimension)
if err := backend.CreateCollection(ctx, storeID, dimension); err != nil {
log.Fatalf("Failed to create collection: %v", err)
}
fmt.Println("✓ Collection created")
}

func cleanupCollection(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
fmt.Printf("\n7. Cleaning up collection %q...\n", storeID)
if err := backend.DeleteCollection(ctx, storeID); err != nil {
log.Printf("Warning: cleanup failed: %v", err)
} else {
fmt.Println("✓ Collection deleted")
}
}

func embedAndInsert(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
fmt.Println("\n4. Embedding and inserting documents...")
chunks := make([]vectorstore.EmbeddedChunk, 0, len(sampleDocs))
for i, d := range sampleDocs {
embedding, err := candle_binding.GetEmbedding(d.content, 0)
if err != nil {
log.Fatalf("Failed to embed document %d: %v", i, err)
}
chunks = append(chunks, vectorstore.EmbeddedChunk{
ID: d.id, FileID: d.fileID, Filename: d.filename,
Content: d.content, Embedding: embedding,
ChunkIndex: i, VectorStoreID: storeID,
})
}
if err := backend.InsertChunks(ctx, storeID, chunks); err != nil {
log.Fatalf("Failed to insert chunks: %v", err)
}
fmt.Printf("✓ Inserted %d chunks\n", len(chunks))
}

func runSearches(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
fmt.Println("\n5. Searching for similar documents (threshold=0.80)...")
for _, query := range []string{
"What is the capital of France?",
"Tell me about German engineering",
"Most populated city in Asia",
} {
fmt.Printf("\n Query: %q\n", query)
qEmb, err := candle_binding.GetEmbedding(query, 0)
if err != nil {
log.Fatalf("Failed to embed query: %v", err)
}
results, err := backend.Search(ctx, storeID, qEmb, 3, 0.80, nil)
if err != nil {
log.Fatalf("Search failed: %v", err)
}
if len(results) == 0 {
fmt.Println(" (no results above threshold)")
}
for rank, r := range results {
fmt.Printf(" #%d [%.4f] %s: %s\n", rank+1, r.Score, r.Filename, truncate(r.Content, 70))
}
}
}

func runFilteredSearch(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
fmt.Println("\n6. Searching with file_id filter (only germany.txt)...")
qEmb, err := candle_binding.GetEmbedding("capital city", 0)
if err != nil {
log.Fatalf("Failed to embed query: %v", err)
}
results, err := backend.Search(ctx, storeID, qEmb, 5, 0.0, map[string]interface{}{"file_id": "f2"})
if err != nil {
log.Fatalf("Filtered search failed: %v", err)
}
for rank, r := range results {
fmt.Printf(" #%d [%.4f] %s: %s\n", rank+1, r.Score, r.Filename, truncate(r.Content, 70))
}
}

func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen-3] + "..."
}
102 changes: 102 additions & 0 deletions deploy/examples/runtime/vector-store/valkey.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Valkey Vector Store Configuration
# This configuration file contains settings for using Valkey as the vector store backend.
# To use this configuration:
# 1. Set backend_type: "valkey" in your main config.yaml under global.stores.vector_store
# 2. Inline the Valkey settings from this file into your canonical config.yaml
# 3. Ensure Valkey is running with the valkey-search module enabled
# 4. The valkey-bundle:latest image includes the valkey-search module
#
# Quick start:
# docker run -d --name valkey -p 6379:6379 valkey/valkey-bundle:latest
#
# Note: CI and local dev targets use valkey-bundle:unstable because
# valkey-search 1.2.0-rc3+ (text search support) is only in that tag for now.

# Valkey connection settings
connection:
# Valkey server host (change for production deployment)
host: "localhost"

# Valkey server port
port: 6379

# Database number (0-15)
database: 0

# Password for Valkey authentication (empty for no auth)
password: ""

# Connection timeout in seconds
timeout: 5

# Index settings for HNSW vector index
index:
# Prefix for all vector store keys/indexes in Valkey
collection_prefix: "vsr_vs_"

# Metric type for similarity calculation
# Options: COSINE (default), L2, IP (Inner Product)
metric_type: COSINE

# HNSW index parameters
params:
M: 16 # Number of bi-directional links per node
efConstruction: 200 # Search scope during index construction

# Search configuration
search:
# Number of top results to retrieve
topk: 10

# Development and debugging settings
development:
# Drop indexes on startup (WARNING: deletes all indexed data)
drop_index_on_startup: false

# Create indexes automatically if they don't exist
auto_create_index: true

# Print detailed error messages
verbose_errors: true

# Logging settings
logging:
# Log level for Valkey client operations (debug, info, warn, error)
level: info

# Enable query/search logging for debugging
enable_query_log: false

# Enable performance metrics collection
enable_metrics: true

# Example configurations for different environments:
#
# Local Development (Docker):
# docker run -d --name valkey -p 6379:6379 valkey/valkey-bundle:latest
# connection:
# host: "localhost"
# port: 6379
# development:
# drop_index_on_startup: true
# verbose_errors: true
#
# Production:
# connection:
# host: "valkey.production.svc.cluster.local"
# port: 6379
# password: "${VALKEY_PASSWORD}"
# timeout: 10
# development:
# drop_index_on_startup: false
# auto_create_index: false
# verbose_errors: false
# logging:
# level: warn
# enable_query_log: false
#
# Kubernetes Deployment:
# connection:
# host: "valkey-service.valkey-system.svc.cluster.local"
# port: 6379
# timeout: 10
15 changes: 15 additions & 0 deletions src/semantic-router/cmd/runtime_bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,21 @@ func buildVectorStoreBackendConfigs(cfg *config.RouterConfig) vectorstore.Backen
SearchType: lsCfg.SearchType,
},
}
case "valkey":
vCfg := cfg.VectorStore.Valkey
return vectorstore.BackendConfigs{
Valkey: vectorstore.ValkeyBackendConfig{
Host: vCfg.Host,
Port: vCfg.Port,
Password: vCfg.Password,
Database: vCfg.Database,
CollectionPrefix: vCfg.CollectionPrefix,
MetricType: vCfg.MetricType,
IndexM: vCfg.IndexM,
IndexEf: vCfg.IndexEfConstruction,
ConnectTimeout: vCfg.ConnectTimeout,
},
}
default:
return vectorstore.BackendConfigs{}
}
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ require (
github.com/stretchr/testify v1.11.1
github.com/tidwall/gjson v1.18.0
github.com/tidwall/sjson v1.2.5
github.com/valkey-io/valkey-glide/go/v2 v2.2.7
github.com/valkey-io/valkey-glide/go/v2 v2.3.0
github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
github.com/vllm-project/semantic-router/ml-binding v0.0.0-00010101000000-000000000000
github.com/vllm-project/semantic-router/nlp-binding v0.0.0-00010101000000-000000000000
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,8 @@ github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVM
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
github.com/valkey-io/valkey-glide/go/v2 v2.2.7 h1:xOl37intKSQ1pty1tE4a+kQ5GWrX0Fk0OmYpfo2eVTk=
github.com/valkey-io/valkey-glide/go/v2 v2.2.7/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho=
github.com/valkey-io/valkey-glide/go/v2 v2.3.0 h1:oer4fOteJYJv3MY3T2t9RIdZHuZzuJYcKyFOyZksFNM=
github.com/valkey-io/valkey-glide/go/v2 v2.3.0/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ func assertReferenceConfigVectorStoreCoverage(t testingT, vectorStore map[string
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "memory"), reflect.TypeOf(VectorStoreMemoryConfig{}), "global.stores.vector_store.memory")
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "llama_stack"), reflect.TypeOf(LlamaStackVectorStoreConfig{}), "global.stores.vector_store.llama_stack")
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "milvus"), reflect.TypeOf(MilvusConfig{}), "global.stores.vector_store.milvus")
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "valkey"), reflect.TypeOf(ValkeyVectorStoreConfig{}), "global.stores.vector_store.valkey")
}

func assertReferenceConfigIntegrationGlobalCoverage(t testingT, integrations map[string]interface{}) {
Expand Down
Loading
Loading