diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..742a6a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +responder +loadenv.sh diff --git a/README.md b/README.md index ac1c13b..7ca9374 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,48 @@ # responder +**Responder** automatically sits through AI-powered job interviews for you. + +It is **not** designed to fool humans, and it is not capable of doing so. It handles the audio-only, fully automated "AI interviewer" screenings that some companies now put in front of candidates β€” the ones where no human is on the other end, where you talk to a bot, and where a model decides whether you proceed. + +If a bot can do your side of the conversation too, maybe the format was never doing what it claimed to. + +πŸ€–β†”οΈπŸ€– + +## The point + +Automated AI interviews are disrespectful to candidates. They waste your time, they reduce a conversation into a one-sided interrogation by a system that cannot actually listen, and they hide the fact that no one at the company cared enough to show up. Responder is a small demonstration that this format is hollow: if both sides can be automated, the "interview" is just two language models exchanging tokens. + +Use a real human. Or don't be surprised when candidates stop showing up as humans either. + +## How it works + +Responder captures the system audio of your call (what the bot says), transcribes it via Google Speech-to-Text, generates a response with Gemini based on your CV, and speaks the answer back through a virtual microphone that you select as the input device in your call app. + +## Requirements + +- Linux with PipeWire or PulseAudio (uses `parec` and a virtual sink/source) +- A Google Cloud project with Speech-to-Text v2 enabled +- A Gemini API key + +## Setup + +1. Set up Google credentials for Speech-to-Text and Gemini. The onboarding is rough, but at least it is all from one vendor. + +```sh + export GEMINI_API_KEY=YOUR_KEY + export GOOGLE_APPLICATION_CREDENTIALS=$HOME/.config/gcloud/YOUR_CONFIG.json +``` + +2. Replace `cv.go` with your own CV as plain text. + +3. Build and run: + +```sh + go build && ./responder +``` + +4. Start the Browser, join the call with the bot and select the new virtual microphone as your audio input. + +## Disclaimer + +This project is a demonstration and a statement. No responsibility is assumed for any use or consequences thereof. You are responsible for complying with the terms of service, laws, and ethical norms that apply to you. \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c0fdb1c --- /dev/null +++ b/go.mod @@ -0,0 +1,46 @@ +module gitea.karlbreuer.com/karl/responder + +go 1.25.0 + +require cloud.google.com/go/speech v1.31.0 + +require ( + cloud.google.com/go v0.123.0 // indirect + cloud.google.com/go/ai v0.8.0 // indirect + cloud.google.com/go/auth v0.20.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect + github.com/googleapis/gax-go/v2 v2.21.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect + go.opentelemetry.io/otel v1.43.0 // indirect + go.opentelemetry.io/otel/metric v1.43.0 // indirect + go.opentelemetry.io/otel/trace v1.43.0 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/oauth2 v0.36.0 // indirect + golang.org/x/sync v0.20.0 // indirect + golang.org/x/time v0.15.0 // indirect + google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 // indirect +) + +require ( + cloud.google.com/go/longrunning v0.8.0 // indirect + cloud.google.com/go/texttospeech v1.17.0 + github.com/google/generative-ai-go v0.20.1 + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + google.golang.org/api v0.275.0 + google.golang.org/genproto/googleapis/api v0.0.0-20260401001100-f93e5f3e9f0f // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 + google.golang.org/protobuf v1.36.11 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..113279b --- /dev/null +++ b/go.sum @@ -0,0 +1,100 @@ +cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= +cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= +cloud.google.com/go/ai v0.8.0 h1:rXUEz8Wp2OlrM8r1bfmpF2+VKqc1VJpafE3HgzRnD/w= +cloud.google.com/go/ai v0.8.0/go.mod h1:t3Dfk4cM61sytiggo2UyGsDVW3RF1qGZaUKDrZFyqkE= +cloud.google.com/go/auth v0.20.0 h1:kXTssoVb4azsVDoUiF8KvxAqrsQcQtB53DcSgta74CA= +cloud.google.com/go/auth v0.20.0/go.mod h1:942/yi/itH1SsmpyrbnTMDgGfdy2BUqIKyd0cyYLc5Q= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= +cloud.google.com/go/longrunning v0.8.0 h1:LiKK77J3bx5gDLi4SMViHixjD2ohlkwBi+mKA7EhfW8= +cloud.google.com/go/longrunning v0.8.0/go.mod h1:UmErU2Onzi+fKDg2gR7dusz11Pe26aknR4kHmJJqIfk= +cloud.google.com/go/speech v1.31.0 h1:+K91seSmxnmk7lKzzUjn7H0NwMfs4BBfLBvJgeHGl6U= +cloud.google.com/go/speech v1.31.0/go.mod h1:8U80+TvXb17d7L/5jDLC3BGGKTrVqR7jFX0SvTkXs30= +cloud.google.com/go/texttospeech v1.17.0 h1:zwqHUyaxtzsmcIYMAygBHFuC/5/C31iBhRTvFYe8mvM= +cloud.google.com/go/texttospeech v1.17.0/go.mod h1:qXUktcF2pHd2Gh9GcY2NFEtZPnIeC+kKVPC3wes7LSk= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= +github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= +github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= +github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4= +github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/generative-ai-go v0.20.1 h1:6dEIujpgN2V0PgLhr6c/M1ynRdc7ARtiIDPFzj45uNQ= +github.com/google/generative-ai-go v0.20.1/go.mod h1:TjOnZJmZKzarWbjUJgy+r3Ee7HGBRVLhOIgupnwR4Bg= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8= +github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= +github.com/googleapis/gax-go/v2 v2.21.0 h1:h45NjjzEO3faG9Lg/cFrBh2PgegVVgzqKzuZl/wMbiI= +github.com/googleapis/gax-go/v2 v2.21.0/go.mod h1:But/NJU6TnZsrLai/xBAQLLz+Hc7fHZJt/hsCz3Fih4= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 h1:yI1/OhfEPy7J9eoa6Sj051C7n5dvpj0QX8g4sRchg04= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0/go.mod h1:NoUCKYWK+3ecatC4HjkRktREheMeEtrXoQxrqYFeHSc= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0/go.mod h1:C2NGBr+kAB4bk3xtMXfZ94gqFDtg/GkI7e9zqGh5Beg= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA= +go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= +golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= +golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/api v0.275.0 h1:vfY5d9vFVJeWEZT65QDd9hbndr7FyZ2+6mIzGAh71NI= +google.golang.org/api v0.275.0/go.mod h1:Fnag/EWUPIcJXuIkP1pjoTgS5vdxlk3eeemL7Do6bvw= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 h1:XzmzkmB14QhVhgnawEVsOn6OFsnpyxNPRY9QV01dNB0= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:L43LFes82YgSonw6iTXTxXUX1OlULt4AQtkik4ULL/I= +google.golang.org/genproto/googleapis/api v0.0.0-20260401001100-f93e5f3e9f0f h1:K3zPU40OFjwD5YKADLMLoiL0L7JJpBgEdLqGuCNPfp0= +google.golang.org/genproto/googleapis/api v0.0.0-20260401001100-f93e5f3e9f0f/go.mod h1:EIQZ5bFCfRQDV4MhRle7+OgjNtZ6P1PiZBgAKuxXu/Y= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..d7a96ff --- /dev/null +++ b/main.go @@ -0,0 +1,9 @@ +package main + +import "gitea.karlbreuer.com/karl/responder/pkg/audiocap" + +func main() { + + audiocap.Start() + +} diff --git a/pkg/audiocap/audiocap.go b/pkg/audiocap/audiocap.go new file mode 100644 index 0000000..bda40d6 --- /dev/null +++ b/pkg/audiocap/audiocap.go @@ -0,0 +1,335 @@ +package audiocap + +import ( + "context" + "fmt" + "io" + "log" + "os/exec" + "os/signal" + "strings" + "sync" + "syscall" + "time" + + speech "cloud.google.com/go/speech/apiv2" + "cloud.google.com/go/speech/apiv2/speechpb" + "google.golang.org/api/option" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +const ( + sampleRate = 16000 + chunkBytes = 3200 // 100ms @ 16kHz mono s16le + streamMaxAge = 4 * time.Minute // Googles max age is 5 min + audioDevice = "@DEFAULT_MONITOR@" + location = "europe-west4" + languageCode = "en-US" + endpoint = "europe-west4-speech.googleapis.com:443" +) + +var responder *Responder + +func (r *Responder) Close() error { + if r.genaiClient != nil { + return r.genaiClient.Close() + } + return nil +} + +func Start() { + + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + mic, err := Setup() + if err != nil { + log.Fatalf("virtmic: %v", err) + } + defer mic.Teardown() + log.Printf("%s🎀 Virtmic ready: select '%s' as input in your call app%s", + Yellow, SourceName, Reset) + + // 2. TTS client + ttsClient, err := New(ctx, mic.SinkForPlayback()) + if err != nil { + log.Fatalf("tts: %v", err) + } + defer ttsClient.Close() + + client, err := speech.NewClient(ctx, option.WithEndpoint(endpoint)) + if err != nil { + log.Fatalf("speech client: %v", err) + } + defer client.Close() + + responder, err = NewResponder(ctx, ttsClient) + if err != nil { + log.Fatalf("responder: %v", err) + } + defer responder.Close() + + go responder.ResponseDaemon(ctx) + + audio, err := startCapture(ctx) + if err != nil { + log.Fatalf("capture: %v", err) + } + defer audio.Close() + + recognizer := fmt.Sprintf("projects/%s/locations/%s/recognizers/_", "cheater-492707", location) + log.Printf("πŸŽ™ Listening on %s β†’ %s", audioDevice, recognizer) + + if err := run(ctx, client, recognizer, audio); err != nil && ctx.Err() == nil { + log.Fatalf("run: %v", err) + } + log.Println("πŸ‘‹ Bye") +} + +// startCapture spawns parec and returns its stdout as raw PCM. +func startCapture(ctx context.Context) (io.ReadCloser, error) { + cmd := exec.CommandContext(ctx, "parec", + "-d", audioDevice, + "--format=s16le", + "--rate=16000", + "--channels=1", + "--latency-msec=20", + ) + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + cmd.Stderr = log.Writer() + if err := cmd.Start(); err != nil { + return nil, err + } + return stdout, nil +} + +// run reads audio into a channel and dispatches it to rotating streams. +func run(ctx context.Context, client *speech.Client, recognizer string, audio io.Reader) error { + audioCh := make(chan []byte, 32) + + // Audio pump β€” never blocks on stream send + go func() { + defer close(audioCh) + buf := make([]byte, chunkBytes) + for { + if ctx.Err() != nil { + return + } + n, err := io.ReadFull(audio, buf) + if err != nil && err != io.ErrUnexpectedEOF { + log.Printf("❌ audio read: %v", err) + return + } + if n == 0 { + continue + } + chunk := make([]byte, n) + copy(chunk, buf[:n]) + select { + case audioCh <- chunk: + case <-ctx.Done(): + return + } + } + }() + + var ( + mu sync.Mutex + current *activeStream + rotateAt = time.Now().Add(streamMaxAge) + ) + + openNew := func() error { + s, err := newStream(ctx, client, recognizer) + if err != nil { + return err + } + mu.Lock() + old := current + current = s + rotateAt = time.Now().Add(streamMaxAge) + mu.Unlock() + + if old != nil { + go old.close() + } + return nil + } + + if err := openNew(); err != nil { + return fmt.Errorf("open initial stream: %w", err) + } + + for { + select { + case <-ctx.Done(): + mu.Lock() + if current != nil { + current.close() + } + mu.Unlock() + return ctx.Err() + + case chunk, ok := <-audioCh: + if !ok { + return nil + } + + mu.Lock() + needRotate := time.Now().After(rotateAt) + mu.Unlock() + if needRotate { + log.Println("πŸ”„ Rotating stream") + if err := openNew(); err != nil { + log.Printf("❌ rotate: %v", err) + } + } + + mu.Lock() + s := current + mu.Unlock() + + if err := s.send(chunk); err != nil { + log.Printf("⚠️ send: %v β€” reopening", err) + if err := openNew(); err != nil { + log.Printf("❌ reopen: %v", err) + } + } + } + } +} + +// activeStream wraps a v2 streaming session and dedupes transcript output. +type activeStream struct { + stream speechpb.Speech_StreamingRecognizeClient + cancel context.CancelFunc + mu sync.Mutex + closed bool + lastInterim string // last printed interim, used to compute deltas +} + +func newStream(parent context.Context, client *speech.Client, recognizer string) (*activeStream, error) { + ctx, cancel := context.WithCancel(parent) + stream, err := client.StreamingRecognize(ctx) + if err != nil { + cancel() + return nil, err + } + + configReq := &speechpb.StreamingRecognizeRequest{ + Recognizer: recognizer, + StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{ + StreamingConfig: &speechpb.StreamingRecognitionConfig{ + Config: &speechpb.RecognitionConfig{ + DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{ + ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{ + Encoding: speechpb.ExplicitDecodingConfig_LINEAR16, + SampleRateHertz: sampleRate, + AudioChannelCount: 1, + }, + }, + Model: "chirp_2", + LanguageCodes: []string{languageCode}, + Features: &speechpb.RecognitionFeatures{ + EnableAutomaticPunctuation: true, + }, + }, + StreamingFeatures: &speechpb.StreamingRecognitionFeatures{ + InterimResults: true, + }, + }, + }, + } + if err := stream.Send(configReq); err != nil { + cancel() + return nil, err + } + + as := &activeStream{stream: stream, cancel: cancel} + + // Receive loop + go func() { + defer cancel() + + for { + resp, err := stream.Recv() + if err == io.EOF { + return + } + if err != nil { + if st, ok := status.FromError(err); ok && st.Code() == codes.Canceled { + return + } + log.Printf("❌ recv: %v", err) + return + } + for _, result := range resp.Results { + if len(result.Alternatives) == 0 { + continue + } + as.handleTranscript(result.Alternatives[0].Transcript, result.IsFinal) + } + } + }() + + return as, nil +} + +func (s *activeStream) send(chunk []byte) error { + s.mu.Lock() + defer s.mu.Unlock() + if s.closed { + return fmt.Errorf("stream closed") + } + return s.stream.Send(&speechpb.StreamingRecognizeRequest{ + StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{ + Audio: chunk, + }, + }) +} + +func (s *activeStream) close() { + s.mu.Lock() + if s.closed { + s.mu.Unlock() + return + } + s.closed = true + s.mu.Unlock() + + _ = s.stream.CloseSend() + time.AfterFunc(2*time.Second, s.cancel) +} + +// handleTranscript prints only the new part of a growing interim transcript. +func (s *activeStream) handleTranscript(text string, final bool) { + s.mu.Lock() + defer s.mu.Unlock() + + if final { + delta := strings.TrimSpace(strings.TrimPrefix(text, s.lastInterim)) + if delta != "" { + fmt.Printf("%sβœ“ %s%s\n", Red, delta, Reset) + } + s.lastInterim = "" + + responder.GetResponse(text) + + return + } + + if after, ok := strings.CutPrefix(text, s.lastInterim); ok { + delta := strings.TrimSpace(after) + if delta != "" { + fmt.Printf("… %s\n", delta) + } + } else { + // Model revised earlier text β€” reprint full line + fmt.Printf("… %s\n", text) + } + s.lastInterim = text +} diff --git a/pkg/audiocap/cv.go b/pkg/audiocap/cv.go new file mode 100644 index 0000000..22ad60d --- /dev/null +++ b/pkg/audiocap/cv.go @@ -0,0 +1,72 @@ +package audiocap + +const userCV = `Karl Breuer +Dipl.-Ing. | Freelance Software Developer ++49 160 2057504 | mail@karlbreuer.com +Wegscheiderstr. 5, 06110 Halle, Germany +Profile +Fullstack developer and graduated engineer (Dipl.-Ing.). +For over 10 years I have been working at the intersection of engineering and software, first +in industry at Siemens and KSB, now as a freelancer. +What I do: translate complex problems into clean, maintainable software. Whether as a +technical lead within a team or as a developer who takes a project from architecture to +deployment on my own. +My background in process engineering, thermodynamics, and industrial project +management helps me get up to speed quickly in new domains. But even without a +technical subject domain, I bring what matters: +I deliver results, on time and on target. +Technology Stack +Frontend: React, TypeScript, Vite, JavaScript, Tailwind CSS, HTML, CSS +Backend: Go, Python, Django, PostgreSQL, MariaDB, Redis, REST APIs, WebSocket +DevOps & Infrastructure: Docker, Linux, Git, GitLab CI/CD, VPS +Architecture: Microservices, Real-time Systems, Legacy Migration, AI/LLM Integration, +RAG Systems, AI Agents +Selected Freelance Projects +Calculation Platform | 2025 PWA; real-time collaboration, revision safety. +https://karlbreuer.com/blog/ktool/en | React, Vite, Go, PostgreSQL, WebSocket +AI Interaction Platform | 2024 Extended and customized OpenWebUI with AI agents +and RAG capabilities for enterprise use. Tech: OpenWebUI, FastAPI, React, TypeScript, +Go, Docker, PostgreSQL, SQLite3, Azure OpenAI, MS Entra +Financial Services Web Application | 2024 Architected and developed a high- +performance web application for the financial sector. Tech: React, TypeScript, Go +(Go4lage), Docker, PostgreSQL, Linux +Railway Industry Data Management System | 2023 – Present Built a robust CRUD +application for data management in the railway sector. Tech: JavaScript, jQuery, Django, +Docker, MariaDB, Redis, GitLab, Linux +Cybersecurity Awareness Dashboard | 2023 Developed a web dashboard and +backend system for cybersecurity awareness training. Tech: React, TypeScript, Django, +MariaDB, GitLab, Linux +Open Source & Portfolio +Go4lage High-performance web framework I built from scratch in Go. Production-ready +with Docker and PostgreSQL integration. https://go4lage.com +Go4lage Tools VS Code extension for seamless Go and TypeScript development +workflows. https://github.com/Karl1b/go4lagetools +NLP Solver Mathematical solver plugin for OnlyOffice. Go compiled to WebAssembly. +https://github.com/Karl1b/only-office-nlp-solver +AI Email Responder Automated email response system with IMAP synchronization. +https://www.youtube.com/watch?v=sm1j6QjbP5Q +GeminiCV AI-powered resume optimization tool. https://geminicv.karlbreuer.com | +https://www.youtube.com/watch?v=jHNNeVSqJMI +Previous Engineering Roles +Project Manager | KSB SE & Co. KGaA, Halle | Nov 2018 – Apr 2022 Led technical +consulting for national and international clients. Managed complex projects and drove +process optimization initiatives. +Mechanical Component Engineer | Siemens AG, GΓΆrlitz (via Brunel) | Sep 2017 – Jun +2018 Designed and specified turbine components. Provided technical consulting for +engineering teams. +Process Engineer | RVT Process Equipment, Steinwiesen | Aug 2015 – May 2017 +Planned flue gas treatment systems. Performed hydraulic calculations and process +optimization. +Education +Diplom-Ingenieur (M.Sc. equivalent), Food Technology TU Berlin | 2008 – 2015 +Specialization: Process Engineering +Patent +Heat Exchanger with Phase Change Storage (WO 2020/151850) Energy storage solution +for renewable energy applications, including wind power. +Languages +German (Native) | English (Fluent) +Additional Skills +Project estimation & budgeting, client presentations, technical mentoring, patent +development +Available for remote work and on-site projects in Germany/EU +Halle (Saale), March 18, 2026` diff --git a/pkg/audiocap/gemini.go b/pkg/audiocap/gemini.go new file mode 100644 index 0000000..a5fe7d1 --- /dev/null +++ b/pkg/audiocap/gemini.go @@ -0,0 +1,221 @@ +package audiocap + +import ( + "context" + "errors" + "fmt" + "log" + "os" + "strings" + "sync" + "time" + + "github.com/google/generative-ai-go/genai" + "google.golang.org/api/option" +) + +const ( + Reset = "\033[0m" + Dim = "\033[2m" + Red = "\033[31m" + Green = "\033[32m" + Yellow = "\033[33m" + Cyan = "\033[36m" +) + +type Responder struct { + tts *Client + BufferedResponse string + mu sync.Mutex + running bool + genaiClient *genai.Client + model *genai.GenerativeModel +} + +func NewResponder(ctx context.Context, ttsClient *Client) (*Responder, error) { + apiKey := os.Getenv("GEMINI_API_KEY") + if apiKey == "" { + return nil, errors.New("GEMINI_API_KEY not set") + } + + client, err := genai.NewClient(ctx, option.WithAPIKey(apiKey)) + if err != nil { + return nil, fmt.Errorf("genai client: %w", err) + } + + model := client.GenerativeModel("gemini-2.5-flash") + model.SetTemperature(0.43) + model.SystemInstruction = &genai.Content{ + Parts: []genai.Part{genai.Text(fmt.Sprintf(sysPrompt, userCV))}, + } + model.SafetySettings = []*genai.SafetySetting{ + {Category: genai.HarmCategoryHarassment, Threshold: genai.HarmBlockNone}, + {Category: genai.HarmCategoryHateSpeech, Threshold: genai.HarmBlockNone}, + {Category: genai.HarmCategorySexuallyExplicit, Threshold: genai.HarmBlockNone}, + {Category: genai.HarmCategoryDangerousContent, Threshold: genai.HarmBlockNone}, + } + + return &Responder{ + tts: ttsClient, + genaiClient: client, + model: model, + }, nil +} + +func (r *Responder) ResponseDaemon(ctx context.Context) { + ticker := time.NewTicker(2 * time.Second) + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + puffMsg := r.Get() // In case a final text was transmitted during speak. 2s is fine. + if puffMsg != "" { + r.GetResponse("") + } + } + } +} + +const sysPrompt = `You are a user at an interview, give one short suitable and good response that fit into a job interview. +Respond directly with the answer just if you are spoken directly to. + +This is YOUR CV: +%s` + +func (r *Responder) Set(message string) { + r.mu.Lock() + r.BufferedResponse = r.BufferedResponse + message + r.mu.Unlock() +} +func (r *Responder) GetAndClear() string { + r.mu.Lock() + res := r.BufferedResponse + r.BufferedResponse = "" + r.mu.Unlock() + return res +} + +func (r *Responder) Get() string { + r.mu.Lock() + res := r.BufferedResponse + r.mu.Unlock() + return res +} + +func (r *Responder) Clear() { + r.mu.Lock() + r.BufferedResponse = "" + r.mu.Unlock() +} + +type HistoryMessage struct { + Role string + Message string +} + +type HistoryT struct { + History []HistoryMessage + my sync.Mutex +} + +var History HistoryT + +func (h *HistoryT) Set(role, message string) { + h.my.Lock() + h.History = append(h.History, HistoryMessage{ + Role: role, + Message: message, + }) + h.my.Unlock() +} + +func (h *HistoryT) GetLastN(n int) string { + h.my.Lock() + defer h.my.Unlock() + + start := 0 + if len(h.History) > n { + start = len(h.History) - n + } + + var sb strings.Builder + for _, m := range h.History[start:] { + fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Message) + } + return sb.String() +} + +func buildPrompt(question string) string { + return fmt.Sprintf(`Last 5 messages: +%s + +CURRENT QUESTION: %s`, History.GetLastN(5), question) +} + +func (r *Responder) GetResponse(question string) { + + go func() { + if question != "" { + r.Set(question + " ") + } + + r.mu.Lock() + if r.running { + r.mu.Unlock() + return + } + r.running = true + r.mu.Unlock() + + finalQuestion := r.GetAndClear() + prompt := buildPrompt(finalQuestion) // Wenn sie laufen soll, den Buffer leeren. + + res, err := r.callGemini(prompt) + if err != nil { + log.Printf("❌ gemini: %v", err) + r.mu.Lock() + r.running = false + r.mu.Unlock() + return + } + fmt.Printf("%sπŸ€– %s%s\n", Green, res, Reset) + + History.Set("user", finalQuestion) + History.Set("model", res) + + if r.tts != nil { + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + fmt.Printf("%sπŸ€– %s%s\n", Cyan, "Speak Start!", Reset) + if err := r.tts.Speak(ctx, res); err != nil { + log.Printf("%s❌ tts: %v%s", Red, err, Reset) + } else { + fmt.Printf("%sπŸ€– %s%s\n", Cyan, "Speak End!", Reset) + } + } + r.mu.Lock() + r.running = false + r.mu.Unlock() + + }() +} + +func (r *Responder) callGemini(prompt string) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + resp, err := r.model.GenerateContent(ctx, genai.Text(prompt)) + if err != nil { + return "", fmt.Errorf("generate: %w", err) + } + if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 { + return "", errors.New("no content") + } + text, ok := resp.Candidates[0].Content.Parts[0].(genai.Text) + if !ok { + return "", errors.New("unexpected content type") + } + return string(text), nil +} diff --git a/pkg/audiocap/tts.go b/pkg/audiocap/tts.go new file mode 100644 index 0000000..884ec87 --- /dev/null +++ b/pkg/audiocap/tts.go @@ -0,0 +1,67 @@ +package audiocap + +import ( + "context" + "fmt" + "os/exec" + + texttospeech "cloud.google.com/go/texttospeech/apiv1" + "cloud.google.com/go/texttospeech/apiv1/texttospeechpb" +) + +type Client struct { + client *texttospeech.Client + sink string // pulse sink to play into +} + +func New(ctx context.Context, sink string) (*Client, error) { + c, err := texttospeech.NewClient(ctx) + if err != nil { + return nil, fmt.Errorf("tts client: %w", err) + } + return &Client{client: c, sink: sink}, nil +} + +func (c *Client) Close() error { + return c.client.Close() +} + +// Speak synthesizes text and plays it into the configured sink. +func (c *Client) Speak(ctx context.Context, text string) error { + req := &texttospeechpb.SynthesizeSpeechRequest{ + Input: &texttospeechpb.SynthesisInput{ + InputSource: &texttospeechpb.SynthesisInput_Text{Text: text}, + }, + Voice: &texttospeechpb.VoiceSelectionParams{ + LanguageCode: "en-US", + Name: "en-US-Neural2-D", + }, + AudioConfig: &texttospeechpb.AudioConfig{ + AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16, + SampleRateHertz: 24000, + }, + } + + resp, err := c.client.SynthesizeSpeech(ctx, req) + if err != nil { + return fmt.Errorf("synthesize: %w", err) + } + + // LINEAR16 from Google TTS is raw PCM wrapped in a WAV header. + // paplay handles WAV directly. + cmd := exec.CommandContext(ctx, "paplay", + "--device="+c.sink, + ) + stdin, err := cmd.StdinPipe() + if err != nil { + return fmt.Errorf("stdin: %w", err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("paplay: %w", err) + } + if _, err := stdin.Write(resp.AudioContent); err != nil { + return fmt.Errorf("write: %w", err) + } + stdin.Close() + return cmd.Wait() +} diff --git a/pkg/audiocap/virtmic.go b/pkg/audiocap/virtmic.go new file mode 100644 index 0000000..0cee86d --- /dev/null +++ b/pkg/audiocap/virtmic.go @@ -0,0 +1,141 @@ +package audiocap + +import ( + "fmt" + "os/exec" + "strings" +) + +const ( + SinkName = "cheater_mic" + SourceName = "CheaterMic" + RecordingSink = "recording" +) + +type Handle struct { + // virtmic modules + sinkModuleID string + sourceModuleID string + + // recording modules + recordingSinkID string + callLoopbackID string + ttsLoopbackID string +} + +func Setup() (*Handle, error) { + h := &Handle{} + + // 1. virtmic null-sink + sinkID, err := pactlLoad("module-null-sink", + "sink_name="+SinkName, + "sink_properties=device.description=CheaterSink", + ) + if err != nil { + return nil, fmt.Errorf("null-sink: %w", err) + } + h.sinkModuleID = sinkID + + // 2. virtmic remap-source + sourceID, err := pactlLoad("module-remap-source", + "source_name="+SourceName, + "master="+SinkName+".monitor", + "source_properties=device.description=CheaterMic", + ) + if err != nil { + h.Teardown() + return nil, fmt.Errorf("remap-source: %w", err) + } + h.sourceModuleID = sourceID + + // 3. recording null-sink + recID, err := pactlLoad("module-null-sink", + "sink_name="+RecordingSink, + "sink_properties=device.description=Recording", + ) + if err != nil { + h.Teardown() + return nil, fmt.Errorf("recording sink: %w", err) + } + h.recordingSinkID = recID + + // 4. Call-audio loopback: resolve default sink monitor explicitly + defaultMonitor, err := defaultSinkMonitor() + if err != nil { + h.Teardown() + return nil, fmt.Errorf("resolve default monitor: %w", err) + } + callID, err := pactlLoad("module-loopback", + "source="+defaultMonitor, + "sink="+RecordingSink, + "latency_msec=20", + ) + if err != nil { + h.Teardown() + return nil, fmt.Errorf("call loopback: %w", err) + } + h.callLoopbackID = callID + + // 5. TTS loopback: cheater_mic monitor β†’ recording + ttsID, err := pactlLoad("module-loopback", + "source="+SinkName+".monitor", + "sink="+RecordingSink, + "latency_msec=20", + ) + if err != nil { + h.Teardown() + return nil, fmt.Errorf("tts loopback: %w", err) + } + h.ttsLoopbackID = ttsID + + return h, nil +} + +// Teardown unloads in reverse order. Safe to call even after partial Setup. +func (h *Handle) Teardown() { + if h.ttsLoopbackID != "" { + _ = pactlUnload(h.ttsLoopbackID) + } + if h.callLoopbackID != "" { + _ = pactlUnload(h.callLoopbackID) + } + if h.recordingSinkID != "" { + _ = pactlUnload(h.recordingSinkID) + } + if h.sourceModuleID != "" { + _ = pactlUnload(h.sourceModuleID) + } + if h.sinkModuleID != "" { + _ = pactlUnload(h.sinkModuleID) + } +} + +func (h *Handle) SinkForPlayback() string { + return SinkName +} + +// defaultSinkMonitor returns ".monitor" +func defaultSinkMonitor() (string, error) { + out, err := exec.Command("pactl", "get-default-sink").Output() + if err != nil { + return "", err + } + sink := strings.TrimSpace(string(out)) + if sink == "" { + return "", fmt.Errorf("empty default sink") + } + return sink + ".monitor", nil +} + +func pactlLoad(module string, args ...string) (string, error) { + cmdArgs := append([]string{"load-module", module}, args...) + out, err := exec.Command("pactl", cmdArgs...).Output() + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil +} + +func pactlUnload(id string) error { + return exec.Command("pactl", "unload-module", id).Run() +}