111 lines
2.4 KiB
Go
111 lines
2.4 KiB
Go
package rag
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
)
|
|
|
|
// Service provides RAG (Retrieval-Augmented Generation) functionality
|
|
type Service interface {
|
|
Retrieve(ctx context.Context, query string, tenantID string, topK int) ([]RetrievedDoc, error)
|
|
Ingest(ctx context.Context, doc *Document) error
|
|
}
|
|
|
|
// RetrievedDoc represents a retrieved document
|
|
type RetrievedDoc struct {
|
|
ID string
|
|
Title string
|
|
Content string
|
|
URL string
|
|
Score float64
|
|
}
|
|
|
|
// Document represents a document to be ingested
|
|
type Document struct {
|
|
ID string
|
|
TenantID string
|
|
Title string
|
|
Content string
|
|
URL string
|
|
Metadata map[string]interface{}
|
|
}
|
|
|
|
// RAGService implements RAG using pgvector
|
|
type RAGService struct {
|
|
db *pgxpool.Pool
|
|
}
|
|
|
|
// NewRAGService creates a new RAG service
|
|
func NewRAGService(db *pgxpool.Pool) *RAGService {
|
|
return &RAGService{
|
|
db: db,
|
|
}
|
|
}
|
|
|
|
// Retrieve retrieves relevant documents
|
|
func (s *RAGService) Retrieve(ctx context.Context, query string, tenantID string, topK int) ([]RetrievedDoc, error) {
|
|
if topK <= 0 {
|
|
topK = 5
|
|
}
|
|
|
|
// TODO: Generate embedding for query
|
|
// For now, use simple text search
|
|
querySQL := `
|
|
SELECT id, title, content, metadata->>'url' as url,
|
|
ts_rank(to_tsvector('english', content), plainto_tsquery('english', $1)) as score
|
|
FROM knowledge_base
|
|
WHERE tenant_id = $2
|
|
ORDER BY score DESC
|
|
LIMIT $3
|
|
`
|
|
|
|
rows, err := s.db.Query(ctx, querySQL, query, tenantID, topK)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to query: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
var docs []RetrievedDoc
|
|
for rows.Next() {
|
|
var doc RetrievedDoc
|
|
var url *string
|
|
if err := rows.Scan(&doc.ID, &doc.Title, &doc.Content, &url, &doc.Score); err != nil {
|
|
continue
|
|
}
|
|
if url != nil {
|
|
doc.URL = *url
|
|
}
|
|
docs = append(docs, doc)
|
|
}
|
|
|
|
return docs, nil
|
|
}
|
|
|
|
// Ingest ingests a document into the knowledge base
|
|
func (s *RAGService) Ingest(ctx context.Context, doc *Document) error {
|
|
// TODO: Generate embedding for document content
|
|
// For now, just insert without embedding
|
|
query := `
|
|
INSERT INTO knowledge_base (id, tenant_id, title, content, metadata)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
title = $3,
|
|
content = $4,
|
|
metadata = $5,
|
|
updated_at = NOW()
|
|
`
|
|
|
|
metadata := map[string]interface{}{
|
|
"url": doc.URL,
|
|
}
|
|
for k, v := range doc.Metadata {
|
|
metadata[k] = v
|
|
}
|
|
|
|
_, err := s.db.Exec(ctx, query, doc.ID, doc.TenantID, doc.Title, doc.Content, metadata)
|
|
return err
|
|
}
|
|
|