This commit is contained in:
Karl Breuer 2026-04-09 10:21:20 +02:00
parent 4c2345c176
commit ecebf9f5c1
10 changed files with 1040 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
.env
responder
loadenv.sh

View File

@ -1,2 +1,48 @@
# responder # responder
**Responder** automatically sits through AI-powered job interviews for you.
It is **not** designed to fool humans, and it is not capable of doing so. It handles the audio-only, fully automated "AI interviewer" screenings that some companies now put in front of candidates — the ones where no human is on the other end, where you talk to a bot, and where a model decides whether you proceed.
If a bot can do your side of the conversation too, maybe the format was never doing what it claimed to.
🤖↔️🤖
## The point
Automated AI interviews are disrespectful to candidates. They waste your time, they reduce a conversation into a one-sided interrogation by a system that cannot actually listen, and they hide the fact that no one at the company cared enough to show up. Responder is a small demonstration that this format is hollow: if both sides can be automated, the "interview" is just two language models exchanging tokens.
Use a real human. Or don't be surprised when candidates stop showing up as humans either.
## How it works
Responder captures the system audio of your call (what the bot says), transcribes it via Google Speech-to-Text, generates a response with Gemini based on your CV, and speaks the answer back through a virtual microphone that you select as the input device in your call app.
## Requirements
- Linux with PipeWire or PulseAudio (uses `parec` and a virtual sink/source)
- A Google Cloud project with Speech-to-Text v2 enabled
- A Gemini API key
## Setup
1. Set up Google credentials for Speech-to-Text and Gemini. The onboarding is rough, but at least it is all from one vendor.
```sh
export GEMINI_API_KEY=YOUR_KEY
export GOOGLE_APPLICATION_CREDENTIALS=$HOME/.config/gcloud/YOUR_CONFIG.json
```
2. Replace `cv.go` with your own CV as plain text.
3. Build and run:
```sh
go build && ./responder
```
4. Start the Browser, join the call with the bot and select the new virtual microphone as your audio input.
## Disclaimer
This project is a demonstration and a statement. No responsibility is assumed for any use or consequences thereof. You are responsible for complying with the terms of service, laws, and ethical norms that apply to you.

46
go.mod Normal file
View File

@ -0,0 +1,46 @@
module gitea.karlbreuer.com/karl/responder
go 1.25.0
require cloud.google.com/go/speech v1.31.0
require (
cloud.google.com/go v0.123.0 // indirect
cloud.google.com/go/ai v0.8.0 // indirect
cloud.google.com/go/auth v0.20.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
cloud.google.com/go/compute/metadata v0.9.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/s2a-go v0.1.9 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect
github.com/googleapis/gax-go/v2 v2.21.0 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect
go.opentelemetry.io/otel v1.43.0 // indirect
go.opentelemetry.io/otel/metric v1.43.0 // indirect
go.opentelemetry.io/otel/trace v1.43.0 // indirect
golang.org/x/crypto v0.49.0 // indirect
golang.org/x/oauth2 v0.36.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/time v0.15.0 // indirect
google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 // indirect
)
require (
cloud.google.com/go/longrunning v0.8.0 // indirect
cloud.google.com/go/texttospeech v1.17.0
github.com/google/generative-ai-go v0.20.1
golang.org/x/net v0.52.0 // indirect
golang.org/x/sys v0.42.0 // indirect
golang.org/x/text v0.35.0 // indirect
google.golang.org/api v0.275.0
google.golang.org/genproto/googleapis/api v0.0.0-20260401001100-f93e5f3e9f0f // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect
google.golang.org/grpc v1.80.0
google.golang.org/protobuf v1.36.11 // indirect
)

100
go.sum Normal file
View File

@ -0,0 +1,100 @@
cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE=
cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU=
cloud.google.com/go/ai v0.8.0 h1:rXUEz8Wp2OlrM8r1bfmpF2+VKqc1VJpafE3HgzRnD/w=
cloud.google.com/go/ai v0.8.0/go.mod h1:t3Dfk4cM61sytiggo2UyGsDVW3RF1qGZaUKDrZFyqkE=
cloud.google.com/go/auth v0.20.0 h1:kXTssoVb4azsVDoUiF8KvxAqrsQcQtB53DcSgta74CA=
cloud.google.com/go/auth v0.20.0/go.mod h1:942/yi/itH1SsmpyrbnTMDgGfdy2BUqIKyd0cyYLc5Q=
cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc=
cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c=
cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
cloud.google.com/go/longrunning v0.8.0 h1:LiKK77J3bx5gDLi4SMViHixjD2ohlkwBi+mKA7EhfW8=
cloud.google.com/go/longrunning v0.8.0/go.mod h1:UmErU2Onzi+fKDg2gR7dusz11Pe26aknR4kHmJJqIfk=
cloud.google.com/go/speech v1.31.0 h1:+K91seSmxnmk7lKzzUjn7H0NwMfs4BBfLBvJgeHGl6U=
cloud.google.com/go/speech v1.31.0/go.mod h1:8U80+TvXb17d7L/5jDLC3BGGKTrVqR7jFX0SvTkXs30=
cloud.google.com/go/texttospeech v1.17.0 h1:zwqHUyaxtzsmcIYMAygBHFuC/5/C31iBhRTvFYe8mvM=
cloud.google.com/go/texttospeech v1.17.0/go.mod h1:qXUktcF2pHd2Gh9GcY2NFEtZPnIeC+kKVPC3wes7LSk=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w=
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA=
github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g=
github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98=
github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4=
github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/generative-ai-go v0.20.1 h1:6dEIujpgN2V0PgLhr6c/M1ynRdc7ARtiIDPFzj45uNQ=
github.com/google/generative-ai-go v0.20.1/go.mod h1:TjOnZJmZKzarWbjUJgy+r3Ee7HGBRVLhOIgupnwR4Bg=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8=
github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg=
github.com/googleapis/gax-go/v2 v2.21.0 h1:h45NjjzEO3faG9Lg/cFrBh2PgegVVgzqKzuZl/wMbiI=
github.com/googleapis/gax-go/v2 v2.21.0/go.mod h1:But/NJU6TnZsrLai/xBAQLLz+Hc7fHZJt/hsCz3Fih4=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 h1:yI1/OhfEPy7J9eoa6Sj051C7n5dvpj0QX8g4sRchg04=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0/go.mod h1:NoUCKYWK+3ecatC4HjkRktREheMeEtrXoQxrqYFeHSc=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0/go.mod h1:C2NGBr+kAB4bk3xtMXfZ94gqFDtg/GkI7e9zqGh5Beg=
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs=
golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
google.golang.org/api v0.275.0 h1:vfY5d9vFVJeWEZT65QDd9hbndr7FyZ2+6mIzGAh71NI=
google.golang.org/api v0.275.0/go.mod h1:Fnag/EWUPIcJXuIkP1pjoTgS5vdxlk3eeemL7Do6bvw=
google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 h1:XzmzkmB14QhVhgnawEVsOn6OFsnpyxNPRY9QV01dNB0=
google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:L43LFes82YgSonw6iTXTxXUX1OlULt4AQtkik4ULL/I=
google.golang.org/genproto/googleapis/api v0.0.0-20260401001100-f93e5f3e9f0f h1:K3zPU40OFjwD5YKADLMLoiL0L7JJpBgEdLqGuCNPfp0=
google.golang.org/genproto/googleapis/api v0.0.0-20260401001100-f93e5f3e9f0f/go.mod h1:EIQZ5bFCfRQDV4MhRle7+OgjNtZ6P1PiZBgAKuxXu/Y=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8=
google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM=
google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

9
main.go Normal file
View File

@ -0,0 +1,9 @@
package main
import "gitea.karlbreuer.com/karl/responder/pkg/audiocap"
func main() {
audiocap.Start()
}

335
pkg/audiocap/audiocap.go Normal file
View File

@ -0,0 +1,335 @@
package audiocap
import (
"context"
"fmt"
"io"
"log"
"os/exec"
"os/signal"
"strings"
"sync"
"syscall"
"time"
speech "cloud.google.com/go/speech/apiv2"
"cloud.google.com/go/speech/apiv2/speechpb"
"google.golang.org/api/option"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
const (
sampleRate = 16000
chunkBytes = 3200 // 100ms @ 16kHz mono s16le
streamMaxAge = 4 * time.Minute // Googles max age is 5 min
audioDevice = "@DEFAULT_MONITOR@"
location = "europe-west4"
languageCode = "en-US"
endpoint = "europe-west4-speech.googleapis.com:443"
)
var responder *Responder
func (r *Responder) Close() error {
if r.genaiClient != nil {
return r.genaiClient.Close()
}
return nil
}
func Start() {
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()
mic, err := Setup()
if err != nil {
log.Fatalf("virtmic: %v", err)
}
defer mic.Teardown()
log.Printf("%s🎤 Virtmic ready: select '%s' as input in your call app%s",
Yellow, SourceName, Reset)
// 2. TTS client
ttsClient, err := New(ctx, mic.SinkForPlayback())
if err != nil {
log.Fatalf("tts: %v", err)
}
defer ttsClient.Close()
client, err := speech.NewClient(ctx, option.WithEndpoint(endpoint))
if err != nil {
log.Fatalf("speech client: %v", err)
}
defer client.Close()
responder, err = NewResponder(ctx, ttsClient)
if err != nil {
log.Fatalf("responder: %v", err)
}
defer responder.Close()
go responder.ResponseDaemon(ctx)
audio, err := startCapture(ctx)
if err != nil {
log.Fatalf("capture: %v", err)
}
defer audio.Close()
recognizer := fmt.Sprintf("projects/%s/locations/%s/recognizers/_", "cheater-492707", location)
log.Printf("🎙 Listening on %s → %s", audioDevice, recognizer)
if err := run(ctx, client, recognizer, audio); err != nil && ctx.Err() == nil {
log.Fatalf("run: %v", err)
}
log.Println("👋 Bye")
}
// startCapture spawns parec and returns its stdout as raw PCM.
func startCapture(ctx context.Context) (io.ReadCloser, error) {
cmd := exec.CommandContext(ctx, "parec",
"-d", audioDevice,
"--format=s16le",
"--rate=16000",
"--channels=1",
"--latency-msec=20",
)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
cmd.Stderr = log.Writer()
if err := cmd.Start(); err != nil {
return nil, err
}
return stdout, nil
}
// run reads audio into a channel and dispatches it to rotating streams.
func run(ctx context.Context, client *speech.Client, recognizer string, audio io.Reader) error {
audioCh := make(chan []byte, 32)
// Audio pump — never blocks on stream send
go func() {
defer close(audioCh)
buf := make([]byte, chunkBytes)
for {
if ctx.Err() != nil {
return
}
n, err := io.ReadFull(audio, buf)
if err != nil && err != io.ErrUnexpectedEOF {
log.Printf("❌ audio read: %v", err)
return
}
if n == 0 {
continue
}
chunk := make([]byte, n)
copy(chunk, buf[:n])
select {
case audioCh <- chunk:
case <-ctx.Done():
return
}
}
}()
var (
mu sync.Mutex
current *activeStream
rotateAt = time.Now().Add(streamMaxAge)
)
openNew := func() error {
s, err := newStream(ctx, client, recognizer)
if err != nil {
return err
}
mu.Lock()
old := current
current = s
rotateAt = time.Now().Add(streamMaxAge)
mu.Unlock()
if old != nil {
go old.close()
}
return nil
}
if err := openNew(); err != nil {
return fmt.Errorf("open initial stream: %w", err)
}
for {
select {
case <-ctx.Done():
mu.Lock()
if current != nil {
current.close()
}
mu.Unlock()
return ctx.Err()
case chunk, ok := <-audioCh:
if !ok {
return nil
}
mu.Lock()
needRotate := time.Now().After(rotateAt)
mu.Unlock()
if needRotate {
log.Println("🔄 Rotating stream")
if err := openNew(); err != nil {
log.Printf("❌ rotate: %v", err)
}
}
mu.Lock()
s := current
mu.Unlock()
if err := s.send(chunk); err != nil {
log.Printf("⚠️ send: %v — reopening", err)
if err := openNew(); err != nil {
log.Printf("❌ reopen: %v", err)
}
}
}
}
}
// activeStream wraps a v2 streaming session and dedupes transcript output.
type activeStream struct {
stream speechpb.Speech_StreamingRecognizeClient
cancel context.CancelFunc
mu sync.Mutex
closed bool
lastInterim string // last printed interim, used to compute deltas
}
func newStream(parent context.Context, client *speech.Client, recognizer string) (*activeStream, error) {
ctx, cancel := context.WithCancel(parent)
stream, err := client.StreamingRecognize(ctx)
if err != nil {
cancel()
return nil, err
}
configReq := &speechpb.StreamingRecognizeRequest{
Recognizer: recognizer,
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{
ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{
Encoding: speechpb.ExplicitDecodingConfig_LINEAR16,
SampleRateHertz: sampleRate,
AudioChannelCount: 1,
},
},
Model: "chirp_2",
LanguageCodes: []string{languageCode},
Features: &speechpb.RecognitionFeatures{
EnableAutomaticPunctuation: true,
},
},
StreamingFeatures: &speechpb.StreamingRecognitionFeatures{
InterimResults: true,
},
},
},
}
if err := stream.Send(configReq); err != nil {
cancel()
return nil, err
}
as := &activeStream{stream: stream, cancel: cancel}
// Receive loop
go func() {
defer cancel()
for {
resp, err := stream.Recv()
if err == io.EOF {
return
}
if err != nil {
if st, ok := status.FromError(err); ok && st.Code() == codes.Canceled {
return
}
log.Printf("❌ recv: %v", err)
return
}
for _, result := range resp.Results {
if len(result.Alternatives) == 0 {
continue
}
as.handleTranscript(result.Alternatives[0].Transcript, result.IsFinal)
}
}
}()
return as, nil
}
func (s *activeStream) send(chunk []byte) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.closed {
return fmt.Errorf("stream closed")
}
return s.stream.Send(&speechpb.StreamingRecognizeRequest{
StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
Audio: chunk,
},
})
}
func (s *activeStream) close() {
s.mu.Lock()
if s.closed {
s.mu.Unlock()
return
}
s.closed = true
s.mu.Unlock()
_ = s.stream.CloseSend()
time.AfterFunc(2*time.Second, s.cancel)
}
// handleTranscript prints only the new part of a growing interim transcript.
func (s *activeStream) handleTranscript(text string, final bool) {
s.mu.Lock()
defer s.mu.Unlock()
if final {
delta := strings.TrimSpace(strings.TrimPrefix(text, s.lastInterim))
if delta != "" {
fmt.Printf("%s✓ %s%s\n", Red, delta, Reset)
}
s.lastInterim = ""
responder.GetResponse(text)
return
}
if after, ok := strings.CutPrefix(text, s.lastInterim); ok {
delta := strings.TrimSpace(after)
if delta != "" {
fmt.Printf("… %s\n", delta)
}
} else {
// Model revised earlier text — reprint full line
fmt.Printf("… %s\n", text)
}
s.lastInterim = text
}

72
pkg/audiocap/cv.go Normal file
View File

@ -0,0 +1,72 @@
package audiocap
const userCV = `Karl Breuer
Dipl.-Ing. | Freelance Software Developer
+49 160 2057504 | mail@karlbreuer.com
Wegscheiderstr. 5, 06110 Halle, Germany
Profile
Fullstack developer and graduated engineer (Dipl.-Ing.).
For over 10 years I have been working at the intersection of engineering and software, first
in industry at Siemens and KSB, now as a freelancer.
What I do: translate complex problems into clean, maintainable software. Whether as a
technical lead within a team or as a developer who takes a project from architecture to
deployment on my own.
My background in process engineering, thermodynamics, and industrial project
management helps me get up to speed quickly in new domains. But even without a
technical subject domain, I bring what matters:
I deliver results, on time and on target.
Technology Stack
Frontend: React, TypeScript, Vite, JavaScript, Tailwind CSS, HTML, CSS
Backend: Go, Python, Django, PostgreSQL, MariaDB, Redis, REST APIs, WebSocket
DevOps & Infrastructure: Docker, Linux, Git, GitLab CI/CD, VPS
Architecture: Microservices, Real-time Systems, Legacy Migration, AI/LLM Integration,
RAG Systems, AI Agents
Selected Freelance Projects
Calculation Platform | 2025 PWA; real-time collaboration, revision safety.
https://karlbreuer.com/blog/ktool/en | React, Vite, Go, PostgreSQL, WebSocket
AI Interaction Platform | 2024 Extended and customized OpenWebUI with AI agents
and RAG capabilities for enterprise use. Tech: OpenWebUI, FastAPI, React, TypeScript,
Go, Docker, PostgreSQL, SQLite3, Azure OpenAI, MS Entra
Financial Services Web Application | 2024 Architected and developed a high-
performance web application for the financial sector. Tech: React, TypeScript, Go
(Go4lage), Docker, PostgreSQL, Linux
Railway Industry Data Management System | 2023 Present Built a robust CRUD
application for data management in the railway sector. Tech: JavaScript, jQuery, Django,
Docker, MariaDB, Redis, GitLab, Linux
Cybersecurity Awareness Dashboard | 2023 Developed a web dashboard and
backend system for cybersecurity awareness training. Tech: React, TypeScript, Django,
MariaDB, GitLab, Linux
Open Source & Portfolio
Go4lage High-performance web framework I built from scratch in Go. Production-ready
with Docker and PostgreSQL integration. https://go4lage.com
Go4lage Tools VS Code extension for seamless Go and TypeScript development
workflows. https://github.com/Karl1b/go4lagetools
NLP Solver Mathematical solver plugin for OnlyOffice. Go compiled to WebAssembly.
https://github.com/Karl1b/only-office-nlp-solver
AI Email Responder Automated email response system with IMAP synchronization.
https://www.youtube.com/watch?v=sm1j6QjbP5Q
GeminiCV AI-powered resume optimization tool. https://geminicv.karlbreuer.com |
https://www.youtube.com/watch?v=jHNNeVSqJMI
Previous Engineering Roles
Project Manager | KSB SE & Co. KGaA, Halle | Nov 2018 Apr 2022 Led technical
consulting for national and international clients. Managed complex projects and drove
process optimization initiatives.
Mechanical Component Engineer | Siemens AG, Görlitz (via Brunel) | Sep 2017 Jun
2018 Designed and specified turbine components. Provided technical consulting for
engineering teams.
Process Engineer | RVT Process Equipment, Steinwiesen | Aug 2015 May 2017
Planned flue gas treatment systems. Performed hydraulic calculations and process
optimization.
Education
Diplom-Ingenieur (M.Sc. equivalent), Food Technology TU Berlin | 2008 2015
Specialization: Process Engineering
Patent
Heat Exchanger with Phase Change Storage (WO 2020/151850) Energy storage solution
for renewable energy applications, including wind power.
Languages
German (Native) | English (Fluent)
Additional Skills
Project estimation & budgeting, client presentations, technical mentoring, patent
development
Available for remote work and on-site projects in Germany/EU
Halle (Saale), March 18, 2026`

221
pkg/audiocap/gemini.go Normal file
View File

@ -0,0 +1,221 @@
package audiocap
import (
"context"
"errors"
"fmt"
"log"
"os"
"strings"
"sync"
"time"
"github.com/google/generative-ai-go/genai"
"google.golang.org/api/option"
)
const (
Reset = "\033[0m"
Dim = "\033[2m"
Red = "\033[31m"
Green = "\033[32m"
Yellow = "\033[33m"
Cyan = "\033[36m"
)
type Responder struct {
tts *Client
BufferedResponse string
mu sync.Mutex
running bool
genaiClient *genai.Client
model *genai.GenerativeModel
}
func NewResponder(ctx context.Context, ttsClient *Client) (*Responder, error) {
apiKey := os.Getenv("GEMINI_API_KEY")
if apiKey == "" {
return nil, errors.New("GEMINI_API_KEY not set")
}
client, err := genai.NewClient(ctx, option.WithAPIKey(apiKey))
if err != nil {
return nil, fmt.Errorf("genai client: %w", err)
}
model := client.GenerativeModel("gemini-2.5-flash")
model.SetTemperature(0.43)
model.SystemInstruction = &genai.Content{
Parts: []genai.Part{genai.Text(fmt.Sprintf(sysPrompt, userCV))},
}
model.SafetySettings = []*genai.SafetySetting{
{Category: genai.HarmCategoryHarassment, Threshold: genai.HarmBlockNone},
{Category: genai.HarmCategoryHateSpeech, Threshold: genai.HarmBlockNone},
{Category: genai.HarmCategorySexuallyExplicit, Threshold: genai.HarmBlockNone},
{Category: genai.HarmCategoryDangerousContent, Threshold: genai.HarmBlockNone},
}
return &Responder{
tts: ttsClient,
genaiClient: client,
model: model,
}, nil
}
func (r *Responder) ResponseDaemon(ctx context.Context) {
ticker := time.NewTicker(2 * time.Second)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
puffMsg := r.Get() // In case a final text was transmitted during speak. 2s is fine.
if puffMsg != "" {
r.GetResponse("")
}
}
}
}
const sysPrompt = `You are a user at an interview, give one short suitable and good response that fit into a job interview.
Respond directly with the answer just if you are spoken directly to.
This is YOUR CV:
%s`
func (r *Responder) Set(message string) {
r.mu.Lock()
r.BufferedResponse = r.BufferedResponse + message
r.mu.Unlock()
}
func (r *Responder) GetAndClear() string {
r.mu.Lock()
res := r.BufferedResponse
r.BufferedResponse = ""
r.mu.Unlock()
return res
}
func (r *Responder) Get() string {
r.mu.Lock()
res := r.BufferedResponse
r.mu.Unlock()
return res
}
func (r *Responder) Clear() {
r.mu.Lock()
r.BufferedResponse = ""
r.mu.Unlock()
}
type HistoryMessage struct {
Role string
Message string
}
type HistoryT struct {
History []HistoryMessage
my sync.Mutex
}
var History HistoryT
func (h *HistoryT) Set(role, message string) {
h.my.Lock()
h.History = append(h.History, HistoryMessage{
Role: role,
Message: message,
})
h.my.Unlock()
}
func (h *HistoryT) GetLastN(n int) string {
h.my.Lock()
defer h.my.Unlock()
start := 0
if len(h.History) > n {
start = len(h.History) - n
}
var sb strings.Builder
for _, m := range h.History[start:] {
fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Message)
}
return sb.String()
}
func buildPrompt(question string) string {
return fmt.Sprintf(`Last 5 messages:
%s
CURRENT QUESTION: %s`, History.GetLastN(5), question)
}
func (r *Responder) GetResponse(question string) {
go func() {
if question != "" {
r.Set(question + " ")
}
r.mu.Lock()
if r.running {
r.mu.Unlock()
return
}
r.running = true
r.mu.Unlock()
finalQuestion := r.GetAndClear()
prompt := buildPrompt(finalQuestion) // Wenn sie laufen soll, den Buffer leeren.
res, err := r.callGemini(prompt)
if err != nil {
log.Printf("❌ gemini: %v", err)
r.mu.Lock()
r.running = false
r.mu.Unlock()
return
}
fmt.Printf("%s🤖 %s%s\n", Green, res, Reset)
History.Set("user", finalQuestion)
History.Set("model", res)
if r.tts != nil {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
fmt.Printf("%s🤖 %s%s\n", Cyan, "Speak Start!", Reset)
if err := r.tts.Speak(ctx, res); err != nil {
log.Printf("%s❌ tts: %v%s", Red, err, Reset)
} else {
fmt.Printf("%s🤖 %s%s\n", Cyan, "Speak End!", Reset)
}
}
r.mu.Lock()
r.running = false
r.mu.Unlock()
}()
}
func (r *Responder) callGemini(prompt string) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
resp, err := r.model.GenerateContent(ctx, genai.Text(prompt))
if err != nil {
return "", fmt.Errorf("generate: %w", err)
}
if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 {
return "", errors.New("no content")
}
text, ok := resp.Candidates[0].Content.Parts[0].(genai.Text)
if !ok {
return "", errors.New("unexpected content type")
}
return string(text), nil
}

67
pkg/audiocap/tts.go Normal file
View File

@ -0,0 +1,67 @@
package audiocap
import (
"context"
"fmt"
"os/exec"
texttospeech "cloud.google.com/go/texttospeech/apiv1"
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
)
type Client struct {
client *texttospeech.Client
sink string // pulse sink to play into
}
func New(ctx context.Context, sink string) (*Client, error) {
c, err := texttospeech.NewClient(ctx)
if err != nil {
return nil, fmt.Errorf("tts client: %w", err)
}
return &Client{client: c, sink: sink}, nil
}
func (c *Client) Close() error {
return c.client.Close()
}
// Speak synthesizes text and plays it into the configured sink.
func (c *Client) Speak(ctx context.Context, text string) error {
req := &texttospeechpb.SynthesizeSpeechRequest{
Input: &texttospeechpb.SynthesisInput{
InputSource: &texttospeechpb.SynthesisInput_Text{Text: text},
},
Voice: &texttospeechpb.VoiceSelectionParams{
LanguageCode: "en-US",
Name: "en-US-Neural2-D",
},
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
SampleRateHertz: 24000,
},
}
resp, err := c.client.SynthesizeSpeech(ctx, req)
if err != nil {
return fmt.Errorf("synthesize: %w", err)
}
// LINEAR16 from Google TTS is raw PCM wrapped in a WAV header.
// paplay handles WAV directly.
cmd := exec.CommandContext(ctx, "paplay",
"--device="+c.sink,
)
stdin, err := cmd.StdinPipe()
if err != nil {
return fmt.Errorf("stdin: %w", err)
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("paplay: %w", err)
}
if _, err := stdin.Write(resp.AudioContent); err != nil {
return fmt.Errorf("write: %w", err)
}
stdin.Close()
return cmd.Wait()
}

141
pkg/audiocap/virtmic.go Normal file
View File

@ -0,0 +1,141 @@
package audiocap
import (
"fmt"
"os/exec"
"strings"
)
const (
SinkName = "cheater_mic"
SourceName = "CheaterMic"
RecordingSink = "recording"
)
type Handle struct {
// virtmic modules
sinkModuleID string
sourceModuleID string
// recording modules
recordingSinkID string
callLoopbackID string
ttsLoopbackID string
}
func Setup() (*Handle, error) {
h := &Handle{}
// 1. virtmic null-sink
sinkID, err := pactlLoad("module-null-sink",
"sink_name="+SinkName,
"sink_properties=device.description=CheaterSink",
)
if err != nil {
return nil, fmt.Errorf("null-sink: %w", err)
}
h.sinkModuleID = sinkID
// 2. virtmic remap-source
sourceID, err := pactlLoad("module-remap-source",
"source_name="+SourceName,
"master="+SinkName+".monitor",
"source_properties=device.description=CheaterMic",
)
if err != nil {
h.Teardown()
return nil, fmt.Errorf("remap-source: %w", err)
}
h.sourceModuleID = sourceID
// 3. recording null-sink
recID, err := pactlLoad("module-null-sink",
"sink_name="+RecordingSink,
"sink_properties=device.description=Recording",
)
if err != nil {
h.Teardown()
return nil, fmt.Errorf("recording sink: %w", err)
}
h.recordingSinkID = recID
// 4. Call-audio loopback: resolve default sink monitor explicitly
defaultMonitor, err := defaultSinkMonitor()
if err != nil {
h.Teardown()
return nil, fmt.Errorf("resolve default monitor: %w", err)
}
callID, err := pactlLoad("module-loopback",
"source="+defaultMonitor,
"sink="+RecordingSink,
"latency_msec=20",
)
if err != nil {
h.Teardown()
return nil, fmt.Errorf("call loopback: %w", err)
}
h.callLoopbackID = callID
// 5. TTS loopback: cheater_mic monitor → recording
ttsID, err := pactlLoad("module-loopback",
"source="+SinkName+".monitor",
"sink="+RecordingSink,
"latency_msec=20",
)
if err != nil {
h.Teardown()
return nil, fmt.Errorf("tts loopback: %w", err)
}
h.ttsLoopbackID = ttsID
return h, nil
}
// Teardown unloads in reverse order. Safe to call even after partial Setup.
func (h *Handle) Teardown() {
if h.ttsLoopbackID != "" {
_ = pactlUnload(h.ttsLoopbackID)
}
if h.callLoopbackID != "" {
_ = pactlUnload(h.callLoopbackID)
}
if h.recordingSinkID != "" {
_ = pactlUnload(h.recordingSinkID)
}
if h.sourceModuleID != "" {
_ = pactlUnload(h.sourceModuleID)
}
if h.sinkModuleID != "" {
_ = pactlUnload(h.sinkModuleID)
}
}
func (h *Handle) SinkForPlayback() string {
return SinkName
}
// defaultSinkMonitor returns "<default-sink-name>.monitor"
func defaultSinkMonitor() (string, error) {
out, err := exec.Command("pactl", "get-default-sink").Output()
if err != nil {
return "", err
}
sink := strings.TrimSpace(string(out))
if sink == "" {
return "", fmt.Errorf("empty default sink")
}
return sink + ".monitor", nil
}
func pactlLoad(module string, args ...string) (string, error) {
cmdArgs := append([]string{"load-module", module}, args...)
out, err := exec.Command("pactl", cmdArgs...).Output()
if err != nil {
return "", err
}
return strings.TrimSpace(string(out)), nil
}
func pactlUnload(id string) error {
return exec.Command("pactl", "unload-module", id).Run()
}