LLM Streaming
SvelteVirtualChat handles streaming natively. As a message grows token by token, ResizeObserver detects the height change and the viewport stays pinned to bottom. No special handling is needed from your code — just mutate the message content.
How It Works
- Create an empty assistant message with
isStreaming: true - Append tokens to
message.contentas they arrive - The component’s ResizeObserver detects the height change
- If following bottom, the viewport snaps to the new bottom (batched per animation frame)
- When done, set
isStreaming = false
With @humanspeak/svelte-markdown
Pair with @humanspeak/svelte-markdown for rich markdown rendering during streaming:
<script lang="ts">
import SvelteVirtualChat from '@humanspeak/svelte-virtual-chat'
import SvelteMarkdown from '@humanspeak/svelte-markdown'
type Message = {
id: string
role: 'user' | 'assistant'
content: string
isStreaming?: boolean
}
let messages: Message[] = $state([...])
</script>
<SvelteVirtualChat
{messages}
getMessageId={(msg) => msg.id}
containerClass="h-[600px]"
viewportClass="h-full"
>
{#snippet renderMessage(message, index)}
<div class="p-4 border-b">
{#if message.role === 'assistant'}
<SvelteMarkdown
source={message.content}
streaming={message.isStreaming ?? false}
/>
{:else}
<p>{message.content}</p>
{/if}
</div>
{/snippet}
</SvelteVirtualChat><script lang="ts">
import SvelteVirtualChat from '@humanspeak/svelte-virtual-chat'
import SvelteMarkdown from '@humanspeak/svelte-markdown'
type Message = {
id: string
role: 'user' | 'assistant'
content: string
isStreaming?: boolean
}
let messages: Message[] = $state([...])
</script>
<SvelteVirtualChat
{messages}
getMessageId={(msg) => msg.id}
containerClass="h-[600px]"
viewportClass="h-full"
>
{#snippet renderMessage(message, index)}
<div class="p-4 border-b">
{#if message.role === 'assistant'}
<SvelteMarkdown
source={message.content}
streaming={message.isStreaming ?? false}
/>
{:else}
<p>{message.content}</p>
{/if}
</div>
{/snippet}
</SvelteVirtualChat>API Route Pattern
Server-side streaming with the Anthropic SDK:
// src/routes/api/chat/+server.ts
import Anthropic from '@anthropic-ai/sdk'
const client = new Anthropic({ apiKey: ANTHROPIC_API_KEY })
export async function POST({ request }) {
const { messages } = await request.json()
const stream = await client.messages.stream({
model: 'claude-sonnet-4-20250514',
max_tokens: 4096,
messages
})
const encoder = new TextEncoder()
const readable = new ReadableStream({
async start(controller) {
for await (const event of stream) {
if (event.type === 'content_block_delta' &&
event.delta.type === 'text_delta') {
controller.enqueue(encoder.encode(event.delta.text))
}
}
controller.close()
}
})
return new Response(readable, {
headers: { 'Content-Type': 'text/plain; charset=utf-8' }
})
}// src/routes/api/chat/+server.ts
import Anthropic from '@anthropic-ai/sdk'
const client = new Anthropic({ apiKey: ANTHROPIC_API_KEY })
export async function POST({ request }) {
const { messages } = await request.json()
const stream = await client.messages.stream({
model: 'claude-sonnet-4-20250514',
max_tokens: 4096,
messages
})
const encoder = new TextEncoder()
const readable = new ReadableStream({
async start(controller) {
for await (const event of stream) {
if (event.type === 'content_block_delta' &&
event.delta.type === 'text_delta') {
controller.enqueue(encoder.encode(event.delta.text))
}
}
controller.close()
}
})
return new Response(readable, {
headers: { 'Content-Type': 'text/plain; charset=utf-8' }
})
}Client-Side Consumption
async function streamResponse(messageId: string) {
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages: [...] })
})
const reader = response.body!.getReader()
const decoder = new TextDecoder()
while (true) {
const { done, value } = await reader.read()
if (done) break
// stream: true keeps partial multi-byte chars buffered until complete
const chunk = decoder.decode(value, { stream: true })
const msg = messages.find((m) => m.id === messageId)
if (msg) msg.content += chunk
}
// Flush any remaining buffered bytes
const final = decoder.decode()
if (final) {
const msg = messages.find((m) => m.id === messageId)
if (msg) msg.content += final
}
}async function streamResponse(messageId: string) {
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages: [...] })
})
const reader = response.body!.getReader()
const decoder = new TextDecoder()
while (true) {
const { done, value } = await reader.read()
if (done) break
// stream: true keeps partial multi-byte chars buffered until complete
const chunk = decoder.decode(value, { stream: true })
const msg = messages.find((m) => m.id === messageId)
if (msg) msg.content += chunk
}
// Flush any remaining buffered bytes
const final = decoder.decode()
if (final) {
const msg = messages.find((m) => m.id === messageId)
if (msg) msg.content += final
}
}Key Behavior
- Height changes are batched per
requestAnimationFrame— not per token - When following bottom, the viewport re-snaps after each batch
- When scrolled away, height changes do not affect scroll position
- No debouncing or throttling needed from your code