Skip to main content
import asyncio
import websockets
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.environ.get('RIME_API_KEY')
if not api_key:
    raise ValueError("RIME_API_KEY environment variable is not set")

FILE_PATH = "arcana_ws.wav"

class RimeClient:
    def __init__(self, speaker, api_key):
        self.url = f"wss://users.rime.ai/ws?speaker={speaker}&modelId=arcana&audioFormat=wav"
        self.auth_headers = {
            "Authorization": f"Bearer {api_key}"
        }
        self.audio_data = b''

    async def send_tokens(self, websocket, message):
        for token in message:
            await websocket.send(token)

    async def handle_audio(self, websocket):
        while True:
            try:
                audio = await websocket.recv()
            except websockets.exceptions.ConnectionClosedOK:
                break
            self.audio_data += audio

    async def run(self, message):
        async with websockets.connect(self.url, additional_headers=self.auth_headers) as websocket:
            await asyncio.gather(
                self.send_tokens(websocket, message),
                self.handle_audio(websocket),
            )

    def save_audio(self, file_path):
        with open(file_path, 'wb') as f:
            f.write(self.audio_data)

message = [
    "This ",
    "is ",
    "a ",
    "test ",
    "of ",
    "the ",
    "arcana ",
    "model ",
    "using ",
    "websockets ",
    "and ",
    "python.",
    "<EOS>",
]

client = RimeClient("astra", api_key=api_key)
asyncio.run(client.run(message))

print(f"Saving audio to {FILE_PATH}")
client.save_audio(FILE_PATH)

Overview

Rime’s websocket implementation accepts bare text, and responds with audio bytes of the selected format. All synthesis arguments are provided as query parameters when establishing the connection.

Messages

Send

The messages your client will send to the websocket API will be bare (non-serialized) text.
This will be converted to audio via websockets

Receive

The messages your client will receive will be raw audio bytes in the audio format specified at connection time.
<FF>^@^@^@9LAME3.100^AP^@^@^@^@^@^@^@^@^T<A0>$^D>"^@^@<A0>^@^@<A8><C0><BA><9D>G^N^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@
^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@
^@^@^@^@^@^@^@^@^@^@

Commands

There will be times when you want to interact with the API and manipute the stored buffer. These are the current supported commands.

<CLEAR>

This clears the current buffer. Used in the event of interruptions.

<FLUSH>

This forces whatever buffer exists, if any, to be synthesized, and the generated audio to be sent over.

<EOS>

This forces whatever buffer exists, if any, to be synthesized, and for the server to close the connection after sending the generated audio.

Variable Parameters

speaker
string
required
Must be one of the voices listed in our documentation for arcana.
text
string
required
The text you’d like spoken. Character limit per request is 500 via the API and 1,000 in the dashboard UI.
modelId
string
This value must be set to arcana else the websockets server will default to mistv2 for speech synthesis.
audioFormat
string
One of mp3, mulaw, or pcm
lang
string
default:"eng"
If provided, the language must match the language spoken by the provided speaker. This can be checked in our voices documentation.
repetition_penalty
float
default:"1.5"
The repetition penalty. We do not recommend changing this from the default value. Typical range is 1 to 2.Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far. Values > 1 encourage the model to use new tokens, while values < 1 encourage the model to repeat tokens.
temperature
float
default:"0.5"
The temperature. We do not recommend changing this from the default value. Typical range is 0 to 1.Float that controls the randomness of the sampling. Lower values make the model more deterministic, while higher values make the model more random. Zero means greedy sampling.
top_p
float
default:"1"
The top p. We do not recommend changing this from the default value. Typical range is 0 to 1.Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to 1 to consider all tokens.
samplingRate
int
default:"24000"
The sampling rate (Hz).
  • On-cloud: Accepted values: 8000, 16000, 22050, 24000, 44100, 48000, 96000. Anything above 24000 is up sampling.
  • On-prem: Any value is accepted.
samplingRate
int
The value, if provided, must be between 4000 and 44100. Default: 22050
segment
string
default:"bySentence"
Controls how text is segmented for synthesis. Available options:
  • “immediate” - Synthesizes text immediately without waiting for complete sentences
  • “never” - Never segments the text, waits for explicit flush or EOS
  • “bySentence” (default) - Waits for complete sentences before synthesis
Note: For backward compatibility, setting immediate=true in query params is equivalent to segment=immediate. If a null value is provided, it will default to “bySentence”.
import asyncio
import websockets
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.environ.get('RIME_API_KEY')
if not api_key:
    raise ValueError("RIME_API_KEY environment variable is not set")

FILE_PATH = "arcana_ws.wav"

class RimeClient:
    def __init__(self, speaker, api_key):
        self.url = f"wss://users.rime.ai/ws?speaker={speaker}&modelId=arcana&audioFormat=wav"
        self.auth_headers = {
            "Authorization": f"Bearer {api_key}"
        }
        self.audio_data = b''

    async def send_tokens(self, websocket, message):
        for token in message:
            await websocket.send(token)

    async def handle_audio(self, websocket):
        while True:
            try:
                audio = await websocket.recv()
            except websockets.exceptions.ConnectionClosedOK:
                break
            self.audio_data += audio

    async def run(self, message):
        async with websockets.connect(self.url, additional_headers=self.auth_headers) as websocket:
            await asyncio.gather(
                self.send_tokens(websocket, message),
                self.handle_audio(websocket),
            )

    def save_audio(self, file_path):
        with open(file_path, 'wb') as f:
            f.write(self.audio_data)

message = [
    "This ",
    "is ",
    "a ",
    "test ",
    "of ",
    "the ",
    "arcana ",
    "model ",
    "using ",
    "websockets ",
    "and ",
    "python.",
    "<EOS>",
]

client = RimeClient("astra", api_key=api_key)
asyncio.run(client.run(message))

print(f"Saving audio to {FILE_PATH}")
client.save_audio(FILE_PATH)
I