已比較的版本

索引鍵

  • 此行已新增。
  • 此行已移除。
  • 格式已變更。

...

Sample Code

Audio-Only Mode

...

Python (w/o interruption handling)

  • install pyaudio, websockets

  • import the generated source code by ProtoBuf (

    View file
    nameframes_pb2.py
    )

    • Check Source code or the example below

      程式碼區塊
      languagepy
      import asyncio
      import pyaudio
      import websockets
      import frames_pb2
      from loguru import logger
      
      # Configure the logger to output to the 'packet.log' file.
      logger.add('packet.log')
      
      # Configure the audio stream.
      FRAMES_PER_BUFFER = 512
      FORMAT = pyaudio.paInt16
      CHANNELS = 1
      RATE = 16000
      
      # Initialize the PyAudio instance.
      p = pyaudio.PyAudio()
      # Open the audio stream.
      stream = p.open(
          format=FORMAT,
          channels=CHANNELS,
          rate=RATE,
          input=True,
          output=True,
          frames_per_buffer=FRAMES_PER_BUFFER,
      )
      
      # Configure the assistant.
      assistant_id = "your_assistant_id_here"
      assistant_api_key = "your_api_key_here"
      
      endpoint = f'wss://assistant-audio-prod.dvcbot.net/ws?assistant_id={assistant_id}'  # Websocket endpoint.
      
      async def send(websocket):
          """Coroutine to send audio data to the server via a websocket."""
          while True:
              try:
                  # Read audio data from the stream.
                  data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
                  frame = frames_pb2.Frame()
                  frame.audio.audio = data
                  frame.audio.sample_rate = RATE
                  frame.audio.num_channels = CHANNELS
                  logger.debug(f"Sent frame={frame}")
                  logger.debug(f"Sent bytestring={frame.SerializeToString()}")
      
                  # Send the audio data to the server.
                  await websocket.send(frame.SerializeToString())
              except websockets.exceptions.ConnectionClosedError as e:
                  logger.error(e)
                  assert e.code == 4008
                  break
              except Exception as e:
                  logger.error("Not a websocket 4008 error", e)
                  assert False
              await asyncio.sleep(0.01)
      
      async def receive(websocket):
          """Coroutine to receive and process messages from the server via a websocket."""
          async for message in websocket:
              logger.debug(f"Received bytestring={message}")
      
              # Deserialize the message using Protobuf.
              frame = frames_pb2.Frame()
              frame.ParseFromString(message)
              logger.debug(f"Received frame={frame}")
              logger.debug(f"Received frame length={frame.ParseFromString(message)}")
      
              # Check the type of frame.
              if frame.HasField('audio'):
                  # Play the received audio data.
                  audio_data = frame.audio.audio
                  logger.debug(f"Received audio_data={audio_data}")
                  logger.debug(f"Type of audio data={type(audio_data)}")
                  stream.write(audio_data[44:])  # Ignore the WAV header in the audio data.
      
              elif frame.HasField('text'):
                  # If an interrupt is received, stop and restart the audio stream to clear the buffer.
                  if frame.text.text == '__interrupt__':
                      logger.debug(f"Received text={frame.text.text}")
                      logger.debug(f"Type of text data={type(frame.text.text)}")
                      # stream.stop_stream()
                      # stream.start_stream()
              
              # Allow the event loop to handle other tasks.
              await asyncio.sleep(0.01)
      
      async def send_receive():
          """Main coroutine to establish a websocket connection and handle sending/receiving."""
          logger.info(f'Connecting to the websocket at URL: {endpoint}')
          async with websockets.connect(endpoint, subprotocols=['proto', assistant_api_key], ssl=True) as ws:
              await asyncio.sleep(0.1)
              logger.info("Beginning to send messages...")
              # Create tasks for sending and receiving data.
              send_task = asyncio.create_task(send(ws))
              receive_task = asyncio.create_task(receive(ws))
              # Run both tasks concurrently.
              await asyncio.gather(send_task, receive_task)
      
      # Run the main coroutine to start the program.
      asyncio.run(send_receive())

...

HTML code w/ embedded JavaScript.

Audio+Image Mode

...

Python (w/o interruption handling)

  • install pyaudio, websockets, aiofiles

  • import the generated source code by ProtoBuf (frames_pb2.py)

  • Check Source code or the example below

程式碼區塊
languagepy
import asyncio
import pyaudio
import websockets
import frames_pb2
from loguru import logger
import aiofiles
import threading

# Configure the logger to output to the 'packet.log' file.
logger.add('packet.log')

# Configure the audio stream.
FRAMES_PER_BUFFER = 512
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000

# Initialize the PyAudio instance.
p = pyaudio.PyAudio()
# Open the audio stream.
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    output=True,
    frames_per_buffer=FRAMES_PER_BUFFER,
)

# Configure the assistant.
assistant_id = "your_assistant_id_here"
assistant_api_key = "your_api_key_here"
endpoint = f'wss://assistant-audio-stag.dvcbot.net/ws?vision=1&assistant_id={assistant_id}'

async def send_audio(websocket):
    """Coroutine to send audio data to the server via a websocket."""
    while True:
        try:
            data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
            frame = frames_pb2.Frame()
            frame.audio.audio = data
            frame.audio.sample_rate = RATE
            frame.audio.num_channels = CHANNELS
            await websocket.send(frame.SerializeToString())
        except websockets.exceptions.ConnectionClosedError as e:
            logger.error(e)
            break
        except Exception as e:
            logger.error("Error sending audio data", e)
            break
        await asyncio.sleep(0.01)

async def send_image(websocket, image_path):
    """Coroutine to send image data to the server via a websocket."""
    async with aiofiles.open(image_path, 'rb') as img_file:
        image_data = await img_file.read()
        frame = frames_pb2.Frame()
        frame.image.image = image_data
        frame.image.size.extend([640, 480])  # Assuming the image size is 640x480
        frame.image.format = "png"  # or jpg
        logger.info("Sending image...")
        await websocket.send(frame.SerializeToString())
        logger.info("Image sent successfully.")

async def receive(websocket):
    """Coroutine to receive and process messages from the server via a websocket."""
    async for message in websocket:
        frame = frames_pb2.Frame()
        frame.ParseFromString(message)
        if frame.HasField('audio'):
            stream.write(frame.audio.audio[44:])  # Ignore the WAV header in the audio data.
        elif frame.HasField('text'):
            if frame.text.text == '__interrupt__':
                # Handle interrupt if necessary.
                pass

def image_upload_trigger(websocket):
    """Function to wait for user input and trigger image upload."""
    while True:
        image_path = input("Please enter the image path and press Enter to upload (or type 'exit' to quit):\n")
        if image_path.lower() == 'exit':
            break
        if image_path:
            asyncio.run(send_image(websocket, image_path))

async def main():
    """Main function to run the program."""
    async with websockets.connect(endpoint, subprotocols=['proto', assistant_api_key], ssl=True) as ws:
        logger.info('WebSocket connection established.')
        
        # Start audio send/receive in asyncio tasks
        send_task = asyncio.create_task(send_audio(ws))
        receive_task = asyncio.create_task(receive(ws))
        
        # Start a separate thread for waiting user input and image upload
        upload_thread = threading.Thread(target=image_upload_trigger, args=(ws,))
        upload_thread.start()
        
        await asyncio.gather(send_task, receive_task)
        upload_thread.join()

# Run the main function to start the program.
asyncio.run(main())

...

HTML code w/ embedded JavaScript.

Appendix: Decode/Encode w/o ProtoBuf

...