#!/usr/bin/env python3
# shebangと言って、UNIXでこのファイルを直接実行するコマンドを指定します。
# -*- coding: utf-8 -*- 
"""
"""
import asyncio
import websockets
import pyaudio
#import numpy as np
import base64
import json
import queue
import threading
import os
import sys

# GEMINI2 Multimodal API WebSocket URLとAPI_KEY
MODEL ="gemini-2.0-flash-exp"
API_KEY = os.environ.get('GOOGLE_API_KEY')
WS_URL = f"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key={API_KEY}"

# 送受信キューを初期化
send_queue = queue.Queue()
receive_queue = queue.Queue()

# 音声を送信する非同期関数
async def send_audio(websocket):


        while True:
            audio_data = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
            if audio_data is None:
                continue

            await websocket.send(

                json.dumps(
                    {
                        "realtime_input": {
                            "media_chunks": [
                                {
                                    "data": base64.b64encode(audio_data).decode(),
                                    "mime_type": "audio/pcm",
                                }
                            ]
                        }
                    }
                )
            )





# マイク音声をキューに入れる
def read_audio(mic_stream, CHUNK):
    while True:
        try:
            audio_data = mic_stream.read(CHUNK, exception_on_overflow=False)
            send_queue.put(audio_data)
        except Exception as e:
            print(f"音声読み取りエラー: {e}",flush = True)
            break


# サーバーイベントを受信して音声をキューに格納する非同期関数
async def receive_audio(websocket):
    print("どうぞ、質問を始めてください。")

    async for msg in websocket:
        #print (msg, flush = True)
        response = json.loads(msg)
        try:
            audio_data = response["serverContent"]["modelTurn"]["parts"][0][ "inlineData" ]["data"]
            #self.audio_queue.put_nowait(base64.b64decode(audio_data))

            receive_queue.put(base64.b64decode(audio_data))
            response["serverContent"]["modelTurn"]["parts"][0][ "inlineData" ]["data"] = "--voice data---"		# printを減らすため・・

        except KeyError:
            pass

        try:
            turn_complete = response["serverContent"]["turnComplete"]
        except KeyError:
            pass
        else:
            if turn_complete:
                # If you interrupt the model, it sends an end_of_turn. 
                # For interruptions to work, we need to empty out the audio queue
                # Because it may have loaded much more audio than has played yet.
                print("\nEnd of turn")
                while not receive_queue.empty():
                    receive_queue.get_nowait()
        # print(response, flush = True)


# 音声を再生する関数
def play_audio(spk_stream):
    while True:
        pcm16_audio = receive_queue.get()
        if pcm16_audio:
            spk_stream.write(pcm16_audio)



# テキストを送信する非同期関数
async def send_text(websocket):

    while True:
        text = await asyncio.to_thread(input, "Text> ")
        if text.lower() == "q":
            break

        msg = {
            "client_content": {
                "turn_complete": True,
                "turns": [{"role": "user", "parts": [{"text": text}]}],
            }
        }
        await websocket.send(json.dumps(msg))



# 
async def send_receive_start():

    # Gemini2 Multimodal API WebSocketに接続

    async with websockets.connect(WS_URL, additional_headers={"Content-Type": "application/json"}) as websocket:
        # print("WebSocketに接続しました。")
        await websocket.send(json.dumps({"setup": {"model": f"models/{MODEL}"}}))
        response =await websocket.recv(decode=False) 
        print("Gemini に接続しました。{response}")

        # PyAudioの設定
        INOUT_CHUNK = 512 # 2400  # Gemini の設定は512
        FORMAT = pyaudio.paInt16
        CHANNELS = 1
        INOUT_RATE = 24000
#
        # PyAudioインスタンス
        audio = pyaudio.PyAudio()

        # マイクストリームの初期化
        mic_stream = audio.open(format=FORMAT, channels=CHANNELS, rate=INOUT_RATE, input=True, frames_per_buffer=INOUT_CHUNK)

        # スピーカストリームを初期化
        spk_stream = audio.open(format=FORMAT, channels=CHANNELS, rate=INOUT_RATE, output=True, frames_per_buffer=INOUT_CHUNK)


        # マイク音声読み取りをスレッドで開始
        threading.Thread(target=read_audio, args=(mic_stream, INOUT_CHUNK), daemon=True).start()

        # スピーカ音声再生をスレッドで開始
        threading.Thread(target=play_audio, args=(spk_stream,), daemon=True).start()

        try:
            # WebSockの音声送信タスクと音声受信タスクを非同期実行
            send_task = asyncio.create_task(send_audio(websocket))
            receive_task = asyncio.create_task(receive_audio(websocket))
            text_task = asyncio.create_task(send_text(websocket))

            # タスク終了待機
            #await asyncio.gather(send_task, receive_task,text_task)
            await asyncio.gather(text_task)

            send_task.cancel()
            receive_task.cancel()
            await asyncio.gather(send_task, receive_task, return_exceptions=True)

        except KeyboardInterrupt:
            print("終了")
        except asyncio.CancelledError:  # CancelledErrorを個別にハンドリング
            pass
            #print("キャンセルされました。")
        finally:
            #if websocket.closed:
            await websocket.close()

            if mic_stream.is_active():
                mic_stream.stop_stream()
            mic_stream.close()
            spk_stream.stop_stream()
            spk_stream.close()
            audio.terminate()


if __name__ == "__main__":
    asyncio.run(send_receive_start())
