222 lines
4.7 KiB
Go
222 lines
4.7 KiB
Go
package audiocap
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/google/generative-ai-go/genai"
|
|
"google.golang.org/api/option"
|
|
)
|
|
|
|
const (
|
|
Reset = "\033[0m"
|
|
Dim = "\033[2m"
|
|
Red = "\033[31m"
|
|
Green = "\033[32m"
|
|
Yellow = "\033[33m"
|
|
Cyan = "\033[36m"
|
|
)
|
|
|
|
type Responder struct {
|
|
tts *Client
|
|
BufferedResponse string
|
|
mu sync.Mutex
|
|
running bool
|
|
genaiClient *genai.Client
|
|
model *genai.GenerativeModel
|
|
}
|
|
|
|
func NewResponder(ctx context.Context, ttsClient *Client) (*Responder, error) {
|
|
apiKey := os.Getenv("GEMINI_API_KEY")
|
|
if apiKey == "" {
|
|
return nil, errors.New("GEMINI_API_KEY not set")
|
|
}
|
|
|
|
client, err := genai.NewClient(ctx, option.WithAPIKey(apiKey))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("genai client: %w", err)
|
|
}
|
|
|
|
model := client.GenerativeModel("gemini-2.5-flash")
|
|
model.SetTemperature(0.43)
|
|
model.SystemInstruction = &genai.Content{
|
|
Parts: []genai.Part{genai.Text(fmt.Sprintf(sysPrompt, userCV))},
|
|
}
|
|
model.SafetySettings = []*genai.SafetySetting{
|
|
{Category: genai.HarmCategoryHarassment, Threshold: genai.HarmBlockNone},
|
|
{Category: genai.HarmCategoryHateSpeech, Threshold: genai.HarmBlockNone},
|
|
{Category: genai.HarmCategorySexuallyExplicit, Threshold: genai.HarmBlockNone},
|
|
{Category: genai.HarmCategoryDangerousContent, Threshold: genai.HarmBlockNone},
|
|
}
|
|
|
|
return &Responder{
|
|
tts: ttsClient,
|
|
genaiClient: client,
|
|
model: model,
|
|
}, nil
|
|
}
|
|
|
|
func (r *Responder) ResponseDaemon(ctx context.Context) {
|
|
ticker := time.NewTicker(2 * time.Second)
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
puffMsg := r.Get() // In case a final text was transmitted during speak. 2s is fine.
|
|
if puffMsg != "" {
|
|
r.GetResponse("")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const sysPrompt = `You are a user at an interview, give one short suitable and good response that fit into a job interview.
|
|
Respond directly with the answer just if you are spoken directly to.
|
|
|
|
This is YOUR CV:
|
|
%s`
|
|
|
|
func (r *Responder) Set(message string) {
|
|
r.mu.Lock()
|
|
r.BufferedResponse = r.BufferedResponse + message
|
|
r.mu.Unlock()
|
|
}
|
|
func (r *Responder) GetAndClear() string {
|
|
r.mu.Lock()
|
|
res := r.BufferedResponse
|
|
r.BufferedResponse = ""
|
|
r.mu.Unlock()
|
|
return res
|
|
}
|
|
|
|
func (r *Responder) Get() string {
|
|
r.mu.Lock()
|
|
res := r.BufferedResponse
|
|
r.mu.Unlock()
|
|
return res
|
|
}
|
|
|
|
func (r *Responder) Clear() {
|
|
r.mu.Lock()
|
|
r.BufferedResponse = ""
|
|
r.mu.Unlock()
|
|
}
|
|
|
|
type HistoryMessage struct {
|
|
Role string
|
|
Message string
|
|
}
|
|
|
|
type HistoryT struct {
|
|
History []HistoryMessage
|
|
my sync.Mutex
|
|
}
|
|
|
|
var History HistoryT
|
|
|
|
func (h *HistoryT) Set(role, message string) {
|
|
h.my.Lock()
|
|
h.History = append(h.History, HistoryMessage{
|
|
Role: role,
|
|
Message: message,
|
|
})
|
|
h.my.Unlock()
|
|
}
|
|
|
|
func (h *HistoryT) GetLastN(n int) string {
|
|
h.my.Lock()
|
|
defer h.my.Unlock()
|
|
|
|
start := 0
|
|
if len(h.History) > n {
|
|
start = len(h.History) - n
|
|
}
|
|
|
|
var sb strings.Builder
|
|
for _, m := range h.History[start:] {
|
|
fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Message)
|
|
}
|
|
return sb.String()
|
|
}
|
|
|
|
func buildPrompt(question string) string {
|
|
return fmt.Sprintf(`Last 5 messages:
|
|
%s
|
|
|
|
CURRENT QUESTION: %s`, History.GetLastN(5), question)
|
|
}
|
|
|
|
func (r *Responder) GetResponse(question string) {
|
|
|
|
go func() {
|
|
if question != "" {
|
|
r.Set(question + " ")
|
|
}
|
|
|
|
r.mu.Lock()
|
|
if r.running {
|
|
r.mu.Unlock()
|
|
return
|
|
}
|
|
r.running = true
|
|
r.mu.Unlock()
|
|
|
|
finalQuestion := r.GetAndClear()
|
|
prompt := buildPrompt(finalQuestion) // Wenn sie laufen soll, den Buffer leeren.
|
|
|
|
res, err := r.callGemini(prompt)
|
|
if err != nil {
|
|
log.Printf("❌ gemini: %v", err)
|
|
r.mu.Lock()
|
|
r.running = false
|
|
r.mu.Unlock()
|
|
return
|
|
}
|
|
fmt.Printf("%s🤖 %s%s\n", Green, res, Reset)
|
|
|
|
History.Set("user", finalQuestion)
|
|
History.Set("model", res)
|
|
|
|
if r.tts != nil {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
|
defer cancel()
|
|
|
|
fmt.Printf("%s🤖 %s%s\n", Cyan, "Speak Start!", Reset)
|
|
if err := r.tts.Speak(ctx, res); err != nil {
|
|
log.Printf("%s❌ tts: %v%s", Red, err, Reset)
|
|
} else {
|
|
fmt.Printf("%s🤖 %s%s\n", Cyan, "Speak End!", Reset)
|
|
}
|
|
}
|
|
r.mu.Lock()
|
|
r.running = false
|
|
r.mu.Unlock()
|
|
|
|
}()
|
|
}
|
|
|
|
func (r *Responder) callGemini(prompt string) (string, error) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
defer cancel()
|
|
|
|
resp, err := r.model.GenerateContent(ctx, genai.Text(prompt))
|
|
if err != nil {
|
|
return "", fmt.Errorf("generate: %w", err)
|
|
}
|
|
if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 {
|
|
return "", errors.New("no content")
|
|
}
|
|
text, ok := resp.Candidates[0].Content.Parts[0].(genai.Text)
|
|
if !ok {
|
|
return "", errors.New("unexpected content type")
|
|
}
|
|
return string(text), nil
|
|
}
|