...
Sample Code
Audio-Only Mode
...
Python (w/o interruption handling)
install pyaudio, websockets
import the generated source code by ProtoBuf (
)View file name frames_pb2.py Check Source code or the example below
程式碼區塊 language py import asyncio import pyaudio import websockets import frames_pb2 from loguru import logger # Configure the logger to output to the 'packet.log' file. logger.add('packet.log') # Configure the audio stream. FRAMES_PER_BUFFER = 512 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 # Initialize the PyAudio instance. p = pyaudio.PyAudio() # Open the audio stream. stream = p.open( format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=FRAMES_PER_BUFFER, ) # Configure the assistant. assistant_id = "your_assistant_id_here" assistant_api_key = "your_api_key_here" endpoint = f'wss://assistant-audio-prod.dvcbot.net/ws?assistant_id={assistant_id}' # Websocket endpoint. async def send(websocket): """Coroutine to send audio data to the server via a websocket.""" while True: try: # Read audio data from the stream. data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) frame = frames_pb2.Frame() frame.audio.audio = data frame.audio.sample_rate = RATE frame.audio.num_channels = CHANNELS logger.debug(f"Sent frame={frame}") logger.debug(f"Sent bytestring={frame.SerializeToString()}") # Send the audio data to the server. await websocket.send(frame.SerializeToString()) except websockets.exceptions.ConnectionClosedError as e: logger.error(e) assert e.code == 4008 break except Exception as e: logger.error("Not a websocket 4008 error", e) assert False await asyncio.sleep(0.01) async def receive(websocket): """Coroutine to receive and process messages from the server via a websocket.""" async for message in websocket: logger.debug(f"Received bytestring={message}") # Deserialize the message using Protobuf. frame = frames_pb2.Frame() frame.ParseFromString(message) logger.debug(f"Received frame={frame}") logger.debug(f"Received frame length={frame.ParseFromString(message)}") # Check the type of frame. if frame.HasField('audio'): # Play the received audio data. audio_data = frame.audio.audio logger.debug(f"Received audio_data={audio_data}") logger.debug(f"Type of audio data={type(audio_data)}") stream.write(audio_data[44:]) # Ignore the WAV header in the audio data. elif frame.HasField('text'): # If an interrupt is received, stop and restart the audio stream to clear the buffer. if frame.text.text == '__interrupt__': logger.debug(f"Received text={frame.text.text}") logger.debug(f"Type of text data={type(frame.text.text)}") # stream.stop_stream() # stream.start_stream() # Allow the event loop to handle other tasks. await asyncio.sleep(0.01) async def send_receive(): """Main coroutine to establish a websocket connection and handle sending/receiving.""" logger.info(f'Connecting to the websocket at URL: {endpoint}') async with websockets.connect(endpoint, subprotocols=['proto', assistant_api_key], ssl=True) as ws: await asyncio.sleep(0.1) logger.info("Beginning to send messages...") # Create tasks for sending and receiving data. send_task = asyncio.create_task(send(ws)) receive_task = asyncio.create_task(receive(ws)) # Run both tasks concurrently. await asyncio.gather(send_task, receive_task) # Run the main coroutine to start the program. asyncio.run(send_receive())
...
HTML code w/ embedded JavaScript.
Check this file
Audio+Image Mode
...
Python (w/o interruption handling)
install pyaudio, websockets, aiofiles
import the generated source code by ProtoBuf (frames_pb2.py)
Check Source code or the example below
程式碼區塊 | ||
---|---|---|
| ||
import asyncio import pyaudio import websockets import frames_pb2 from loguru import logger import aiofiles import threading # Configure the logger to output to the 'packet.log' file. logger.add('packet.log') # Configure the audio stream. FRAMES_PER_BUFFER = 512 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 # Initialize the PyAudio instance. p = pyaudio.PyAudio() # Open the audio stream. stream = p.open( format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=FRAMES_PER_BUFFER, ) # Configure the assistant. assistant_id = "your_assistant_id_here" assistant_api_key = "your_api_key_here" endpoint = f'wss://assistant-audio-stag.dvcbot.net/ws?vision=1&assistant_id={assistant_id}' async def send_audio(websocket): """Coroutine to send audio data to the server via a websocket.""" while True: try: data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) frame = frames_pb2.Frame() frame.audio.audio = data frame.audio.sample_rate = RATE frame.audio.num_channels = CHANNELS await websocket.send(frame.SerializeToString()) except websockets.exceptions.ConnectionClosedError as e: logger.error(e) break except Exception as e: logger.error("Error sending audio data", e) break await asyncio.sleep(0.01) async def send_image(websocket, image_path): """Coroutine to send image data to the server via a websocket.""" async with aiofiles.open(image_path, 'rb') as img_file: image_data = await img_file.read() frame = frames_pb2.Frame() frame.image.image = image_data frame.image.size.extend([640, 480]) # Assuming the image size is 640x480 frame.image.format = "png" # or jpg logger.info("Sending image...") await websocket.send(frame.SerializeToString()) logger.info("Image sent successfully.") async def receive(websocket): """Coroutine to receive and process messages from the server via a websocket.""" async for message in websocket: frame = frames_pb2.Frame() frame.ParseFromString(message) if frame.HasField('audio'): stream.write(frame.audio.audio[44:]) # Ignore the WAV header in the audio data. elif frame.HasField('text'): if frame.text.text == '__interrupt__': # Handle interrupt if necessary. pass def image_upload_trigger(websocket): """Function to wait for user input and trigger image upload.""" while True: image_path = input("Please enter the image path and press Enter to upload (or type 'exit' to quit):\n") if image_path.lower() == 'exit': break if image_path: asyncio.run(send_image(websocket, image_path)) async def main(): """Main function to run the program.""" async with websockets.connect(endpoint, subprotocols=['proto', assistant_api_key], ssl=True) as ws: logger.info('WebSocket connection established.') # Start audio send/receive in asyncio tasks send_task = asyncio.create_task(send_audio(ws)) receive_task = asyncio.create_task(receive(ws)) # Start a separate thread for waiting user input and image upload upload_thread = threading.Thread(target=image_upload_trigger, args=(ws,)) upload_thread.start() await asyncio.gather(send_task, receive_task) upload_thread.join() # Run the main function to start the program. asyncio.run(main()) |
...
HTML code w/ embedded JavaScript.
Check this file
Appendix: Decode/Encode w/o ProtoBuf
...