Websockets

import asyncio
import websockets
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.environ.get('RIME_API_KEY')
if not api_key:
    raise ValueError("RIME_API_KEY environment variable is not set")

FILE_PATH = "arcana_ws.wav"

class RimeClient:
    def __init__(self, speaker, api_key):
        self.url = f"wss://users.rime.ai/ws?speaker={speaker}&modelId=arcana&audioFormat=wav"
        self.auth_headers = {
            "Authorization": f"Bearer {api_key}"
        }
        self.audio_data = b''

    async def send_tokens(self, websocket, message):
        for token in message:
            await websocket.send(token)

    async def handle_audio(self, websocket):
        while True:
            try:
                audio = await websocket.recv()
            except websockets.exceptions.ConnectionClosedOK:
                break
            self.audio_data += audio

    async def run(self, message):
        async with websockets.connect(self.url, additional_headers=self.auth_headers) as websocket:
            await asyncio.gather(
                self.send_tokens(websocket, message),
                self.handle_audio(websocket),
            )

    def save_audio(self, file_path):
        with open(file_path, 'wb') as f:
            f.write(self.audio_data)

message = [
    "This ",
    "is ",
    "a ",
    "test ",
    "of ",
    "the ",
    "arcana ",
    "model ",
    "using ",
    "websockets ",
    "and ",
    "python.",
    "<EOS>",
]

client = RimeClient("astra", api_key=api_key)
asyncio.run(client.run(message))

print(f"Saving audio to {FILE_PATH}")
client.save_audio(FILE_PATH)

GET

wss://users.rime.ai

import asyncio
import websockets
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.environ.get('RIME_API_KEY')
if not api_key:
    raise ValueError("RIME_API_KEY environment variable is not set")

FILE_PATH = "arcana_ws.wav"

class RimeClient:
    def __init__(self, speaker, api_key):
        self.url = f"wss://users.rime.ai/ws?speaker={speaker}&modelId=arcana&audioFormat=wav"
        self.auth_headers = {
            "Authorization": f"Bearer {api_key}"
        }
        self.audio_data = b''

    async def send_tokens(self, websocket, message):
        for token in message:
            await websocket.send(token)

    async def handle_audio(self, websocket):
        while True:
            try:
                audio = await websocket.recv()
            except websockets.exceptions.ConnectionClosedOK:
                break
            self.audio_data += audio

    async def run(self, message):
        async with websockets.connect(self.url, additional_headers=self.auth_headers) as websocket:
            await asyncio.gather(
                self.send_tokens(websocket, message),
                self.handle_audio(websocket),
            )

    def save_audio(self, file_path):
        with open(file_path, 'wb') as f:
            f.write(self.audio_data)

message = [
    "This ",
    "is ",
    "a ",
    "test ",
    "of ",
    "the ",
    "arcana ",
    "model ",
    "using ",
    "websockets ",
    "and ",
    "python.",
    "<EOS>",
]

client = RimeClient("astra", api_key=api_key)
asyncio.run(client.run(message))

print(f"Saving audio to {FILE_PATH}")
client.save_audio(FILE_PATH)

Overview

Rime’s websocket implementation accepts bare text, and responds with audio bytes of the selected format. All synthesis arguments are provided as query parameters when establishing the connection.

Messages

Send

The messages your client will send to the websocket API will be bare (non-serialized) text.

This will be converted to audio via websockets

Receive

The messages your client will receive will be raw audio bytes in the audio format specified at connection time.

<FF>^@^@^@9LAME3.100^AP^@^@^@^@^@^@^@^@^T<A0>$^D>"^@^@<A0>^@^@<A8><C0><BA><9D>G^N^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@
^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@
^@^@^@^@^@^@^@^@^@^@

Commands

There will be times when you want to interact with the API and manipute the stored buffer. These are the current supported commands.

`<CLEAR>`

This clears the current buffer. Used in the event of interruptions.

`<FLUSH>`

This forces whatever buffer exists, if any, to be synthesized, and the generated audio to be sent over.

`<EOS>`

This forces whatever buffer exists, if any, to be synthesized, and for the server to close the connection after sending the generated audio.

Variable Parameters

speaker

string

required

Must be one of the voices listed in our documentation for arcana.

text

string

required

The text you’d like spoken. Character limit per request is 500 via the API and 1,000 in the dashboard UI.

modelId

string

This value must be set to arcana else the websockets server will default to mistv2 for speech synthesis.

audioFormat

string

One of mp3, mulaw, or pcm

lang

string

default:"eng"

If provided, the language must match the language spoken by the provided speaker. This can be checked in our voices documentation.

repetition_penalty

float

default:"1.5"

The repetition penalty. We do not recommend changing this from the default value. Typical range is 1 to 2.Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far. Values > 1 encourage the model to use new tokens, while values < 1 encourage the model to repeat tokens.

temperature

float

default:"0.5"

The temperature. We do not recommend changing this from the default value. Typical range is 0 to 1.Float that controls the randomness of the sampling. Lower values make the model more deterministic, while higher values make the model more random. Zero means greedy sampling.

top_p

float

default:"1"

The top p. We do not recommend changing this from the default value. Typical range is 0 to 1.Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to 1 to consider all tokens.

samplingRate

int

default:"24000"

The sampling rate (Hz).

On-cloud: Accepted values: 8000, 16000, 22050, 24000, 44100, 48000, 96000. Anything above 24000 is up sampling.
On-prem: Any value is accepted.

samplingRate

int

The value, if provided, must be between 4000 and 44100. Default: 22050

segment

string

default:"bySentence"

Controls how text is segmented for synthesis. Available options:

“immediate” - Synthesizes text immediately without waiting for complete sentences
“never” - Never segments the text, waits for explicit flush or EOS
“bySentence” (default) - Waits for complete sentences before synthesis

Note: For backward compatibility, setting immediate=true in query params is equivalent to segment=immediate. If a null value is provided, it will default to “bySentence”.

import asyncio
import websockets
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.environ.get('RIME_API_KEY')
if not api_key:
    raise ValueError("RIME_API_KEY environment variable is not set")

FILE_PATH = "arcana_ws.wav"

class RimeClient:
    def __init__(self, speaker, api_key):
        self.url = f"wss://users.rime.ai/ws?speaker={speaker}&modelId=arcana&audioFormat=wav"
        self.auth_headers = {
            "Authorization": f"Bearer {api_key}"
        }
        self.audio_data = b''

    async def send_tokens(self, websocket, message):
        for token in message:
            await websocket.send(token)

    async def handle_audio(self, websocket):
        while True:
            try:
                audio = await websocket.recv()
            except websockets.exceptions.ConnectionClosedOK:
                break
            self.audio_data += audio

    async def run(self, message):
        async with websockets.connect(self.url, additional_headers=self.auth_headers) as websocket:
            await asyncio.gather(
                self.send_tokens(websocket, message),
                self.handle_audio(websocket),
            )

    def save_audio(self, file_path):
        with open(file_path, 'wb') as f:
            f.write(self.audio_data)

message = [
    "This ",
    "is ",
    "a ",
    "test ",
    "of ",
    "the ",
    "arcana ",
    "model ",
    "using ",
    "websockets ",
    "and ",
    "python.",
    "<EOS>",
]

client = RimeClient("astra", api_key=api_key)
asyncio.run(client.run(message))

print(f"Saving audio to {FILE_PATH}")
client.save_audio(FILE_PATH)

Streaming μ-law Streaming PCM

⌘I

Documentation

Arcana API reference

Mist v2 API reference

API Metadata

Other APIs

Overview

Messages

Send

Receive

Commands

`<CLEAR>`

`<FLUSH>`

`<EOS>`

Variable Parameters

Documentation

Arcana API reference

Mist v2 API reference

API Metadata

Other APIs

​Overview

​Messages

​Send

​Receive

​Commands

​<CLEAR>

​<FLUSH>

​<EOS>

​Variable Parameters

Overview

Messages

Send

Receive

Commands

`<CLEAR>`

`<FLUSH>`

`<EOS>`

Variable Parameters