頁面比較

...

Sample Code

Audio-Only Mode

...

Python (w/o interruption handling)

install pyaudio, websockets

import the generated source code by ProtoBuf (

View file

name	frames_pb2.py

)

Check Source code or the example below

程式碼區塊

language	py

import asyncio
import pyaudio
import websockets
import frames_pb2
from loguru import logger

# Configure the logger to output to the 'packet.log' file.
logger.add('packet.log')

# Configure the audio stream.
FRAMES_PER_BUFFER = 512
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000

# Initialize the PyAudio instance.
p = pyaudio.PyAudio()
# Open the audio stream.
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    output=True,
    frames_per_buffer=FRAMES_PER_BUFFER,
)

# Configure the assistant.
assistant_id = "your_assistant_id_here"
assistant_api_key = "your_api_key_here"

endpoint = f'wss://assistant-audio-prod.dvcbot.net/ws?assistant_id={assistant_id}'  # Websocket endpoint.

async def send(websocket):
    """Coroutine to send audio data to the server via a websocket."""
    while True:
        try:
            # Read audio data from the stream.
            data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
            frame = frames_pb2.Frame()
            frame.audio.audio = data
            frame.audio.sample_rate = RATE
            frame.audio.num_channels = CHANNELS
            logger.debug(f"Sent frame={frame}")
            logger.debug(f"Sent bytestring={frame.SerializeToString()}")

            # Send the audio data to the server.
            await websocket.send(frame.SerializeToString())
        except websockets.exceptions.ConnectionClosedError as e:
            logger.error(e)
            assert e.code == 4008
            break
        except Exception as e:
            logger.error("Not a websocket 4008 error", e)
            assert False
        await asyncio.sleep(0.01)

async def receive(websocket):
    """Coroutine to receive and process messages from the server via a websocket."""
    async for message in websocket:
        logger.debug(f"Received bytestring={message}")

        # Deserialize the message using Protobuf.
        frame = frames_pb2.Frame()
        frame.ParseFromString(message)
        logger.debug(f"Received frame={frame}")
        logger.debug(f"Received frame length={frame.ParseFromString(message)}")

        # Check the type of frame.
        if frame.HasField('audio'):
            # Play the received audio data.
            audio_data = frame.audio.audio
            logger.debug(f"Received audio_data={audio_data}")
            logger.debug(f"Type of audio data={type(audio_data)}")
            stream.write(audio_data[44:])  # Ignore the WAV header in the audio data.

        elif frame.HasField('text'):
            # If an interrupt is received, stop and restart the audio stream to clear the buffer.
            if frame.text.text == '__interrupt__':
                logger.debug(f"Received text={frame.text.text}")
                logger.debug(f"Type of text data={type(frame.text.text)}")
                # stream.stop_stream()
                # stream.start_stream()
        
        # Allow the event loop to handle other tasks.
        await asyncio.sleep(0.01)

async def send_receive():
    """Main coroutine to establish a websocket connection and handle sending/receiving."""
    logger.info(f'Connecting to the websocket at URL: {endpoint}')
    async with websockets.connect(endpoint, subprotocols=['proto', assistant_api_key], ssl=True) as ws:
        await asyncio.sleep(0.1)
        logger.info("Beginning to send messages...")
        # Create tasks for sending and receiving data.
        send_task = asyncio.create_task(send(ws))
        receive_task = asyncio.create_task(receive(ws))
        # Run both tasks concurrently.
        await asyncio.gather(send_task, receive_task)

# Run the main coroutine to start the program.
asyncio.run(send_receive())

...

HTML code w/ embedded JavaScript.

Check this file

Audio+Image Mode

...

Python (w/o interruption handling)

install pyaudio, websockets, aiofiles
import the generated source code by ProtoBuf (frames_pb2.py)
Check Source code or the example below

程式碼區塊

language	py

import asyncio
import pyaudio
import websockets
import frames_pb2
from loguru import logger
import aiofiles
import threading

# Configure the logger to output to the 'packet.log' file.
logger.add('packet.log')

# Configure the audio stream.
FRAMES_PER_BUFFER = 512
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000

# Initialize the PyAudio instance.
p = pyaudio.PyAudio()
# Open the audio stream.
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    output=True,
    frames_per_buffer=FRAMES_PER_BUFFER,
)

# Configure the assistant.
assistant_id = "your_assistant_id_here"
assistant_api_key = "your_api_key_here"
endpoint = f'wss://assistant-audio-stag.dvcbot.net/ws?vision=1&assistant_id={assistant_id}'

async def send_audio(websocket):
    """Coroutine to send audio data to the server via a websocket."""
    while True:
        try:
            data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
            frame = frames_pb2.Frame()
            frame.audio.audio = data
            frame.audio.sample_rate = RATE
            frame.audio.num_channels = CHANNELS
            await websocket.send(frame.SerializeToString())
        except websockets.exceptions.ConnectionClosedError as e:
            logger.error(e)
            break
        except Exception as e:
            logger.error("Error sending audio data", e)
            break
        await asyncio.sleep(0.01)

async def send_image(websocket, image_path):
    """Coroutine to send image data to the server via a websocket."""
    async with aiofiles.open(image_path, 'rb') as img_file:
        image_data = await img_file.read()
        frame = frames_pb2.Frame()
        frame.image.image = image_data
        frame.image.size.extend([640, 480])  # Assuming the image size is 640x480
        frame.image.format = "png"  # or jpg
        logger.info("Sending image...")
        await websocket.send(frame.SerializeToString())
        logger.info("Image sent successfully.")

async def receive(websocket):
    """Coroutine to receive and process messages from the server via a websocket."""
    async for message in websocket:
        frame = frames_pb2.Frame()
        frame.ParseFromString(message)
        if frame.HasField('audio'):
            stream.write(frame.audio.audio[44:])  # Ignore the WAV header in the audio data.
        elif frame.HasField('text'):
            if frame.text.text == '__interrupt__':
                # Handle interrupt if necessary.
                pass

def image_upload_trigger(websocket):
    """Function to wait for user input and trigger image upload."""
    while True:
        image_path = input("Please enter the image path and press Enter to upload (or type 'exit' to quit):\n")
        if image_path.lower() == 'exit':
            break
        if image_path:
            asyncio.run(send_image(websocket, image_path))

async def main():
    """Main function to run the program."""
    async with websockets.connect(endpoint, subprotocols=['proto', assistant_api_key], ssl=True) as ws:
        logger.info('WebSocket connection established.')
        
        # Start audio send/receive in asyncio tasks
        send_task = asyncio.create_task(send_audio(ws))
        receive_task = asyncio.create_task(receive(ws))
        
        # Start a separate thread for waiting user input and image upload
        upload_thread = threading.Thread(target=image_upload_trigger, args=(ws,))
        upload_thread.start()
        
        await asyncio.gather(send_task, receive_task)
        upload_thread.join()

# Run the main function to start the program.
asyncio.run(main())

...

HTML code w/ embedded JavaScript.

Check this file

Appendix: Decode/Encode w/o ProtoBuf

...

已比較的版本

Old Version 4

新版本 5

索引鍵

Sample Code

Audio-Only Mode

Python (w/o interruption handling)

HTML code w/ embedded JavaScript.

Audio+Image Mode

Python (w/o interruption handling)

HTML code w/ embedded JavaScript.

Appendix: Decode/Encode w/o ProtoBuf