Async Usage

The TypeScript SDK is async by default — all methods return Promises. This page covers Python’s AsyncLunar client and TypeScript equivalents for concurrent patterns.

Basic Async Usage

Python
TypeScript

from lunar import AsyncLunar
import asyncio

async def main():
    async with AsyncLunar() as client:
        response = await client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": "Hello!"}]
        )
        print(response.choices[0].message.content)

asyncio.run(main())

import { Lunar } from "lunar";

const client = new Lunar();

const response = await client.chat.completions.create({
  model: "gpt-4o-mini",
  messages: [{ role: "user", content: "Hello!" }],
});

console.log(response.choices[0].message.content);

Why Use Async?

Scenario	Sync	Async
Single request	Fine	Overkill
Sequential requests	Fine	Similar performance
Concurrent requests	Slow	Much faster
High throughput	Limited	Excellent
Web servers	Blocking	Non-blocking

Concurrent Requests

Make multiple requests in parallel:

Python
TypeScript

from lunar import AsyncLunar
import asyncio

async def ask_question(client, question: str) -> str:
    response = await client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": question}]
    )
    return response.choices[0].message.content

async def main():
    questions = [
        "What is Python?",
        "What is JavaScript?",
        "What is Rust?",
        "What is Go?",
        "What is TypeScript?"
    ]

    async with AsyncLunar() as client:
        # Run all requests concurrently
        tasks = [ask_question(client, q) for q in questions]
        answers = await asyncio.gather(*tasks)

        for q, a in zip(questions, answers):
            print(f"Q: {q}")
            print(f"A: {a[:100]}...")
            print()

asyncio.run(main())

import { Lunar } from "lunar";

async function askQuestion(client: Lunar, question: string): Promise<string> {
  const response = await client.chat.completions.create({
    model: "gpt-4o-mini",
    messages: [{ role: "user", content: question }],
  });
  return response.choices[0].message.content;
}

const questions = [
  "What is Python?",
  "What is JavaScript?",
  "What is Rust?",
  "What is Go?",
  "What is TypeScript?",
];

const client = new Lunar();

// Run all requests concurrently
const answers = await Promise.all(
  questions.map((q) => askQuestion(client, q))
);

for (let i = 0; i < questions.length; i++) {
  console.log(`Q: ${questions[i]}`);
  console.log(`A: ${answers[i].slice(0, 100)}...`);
  console.log();
}

Client Initialization

Python
TypeScript

# With context manager (recommended)
async with AsyncLunar(api_key="your-key") as client:
    response = await client.chat.completions.create(...)

# Manual management
client = AsyncLunar(api_key="your-key")
try:
    response = await client.chat.completions.create(...)
finally:
    await client.close()

// Direct instantiation (no cleanup needed)
const client = new Lunar({ apiKey: "your-key" });

const response = await client.chat.completions.create({
  model: "gpt-4o-mini",
  messages: [{ role: "user", content: "Hello" }],
});

All Async Methods

Python
TypeScript

async with AsyncLunar() as client:
    # Chat completions
    response = await client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": "Hello"}]
    )

    # Text completions
    response = await client.completions.create(
        model="gpt-4o-mini",
        prompt="Hello"
    )

    # List models
    models = await client.models.list()

    # List providers
    providers = await client.providers.list(model="gpt-4o-mini")

    # Run evaluations
    result = await client.evals.run(...)

const client = new Lunar();

// Chat completions
const chatResponse = await client.chat.completions.create({
  model: "gpt-4o-mini",
  messages: [{ role: "user", content: "Hello" }],
});

// Text completions
const textResponse = await client.completions.create({
  model: "gpt-4o-mini",
  prompt: "Hello",
});

// List models
const models = await client.models.list();

// List providers
const providers = await client.providers.list("gpt-4o-mini");

Async Streaming

Python
TypeScript

async def stream_response():
    async with AsyncLunar() as client:
        stream = await client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": "Write a poem."}],
            stream=True
        )

        async for chunk in stream:
            content = chunk.choices[0].delta.content
            if content:
                print(content, end="", flush=True)

asyncio.run(stream_response())

const stream = await client.chat.completions.create({
  model: "gpt-4o-mini",
  messages: [{ role: "user", content: "Write a poem." }],
  stream: true,
});

for await (const chunk of stream) {
  const content = chunk.choices[0].delta.content;
  if (content) {
    process.stdout.write(content);
  }
}

Rate Limiting with Concurrency Control

Control concurrency to avoid rate limits:

Python
TypeScript

from lunar import AsyncLunar
import asyncio

async def process_with_limit(questions: list, max_concurrent: int = 5):
    semaphore = asyncio.Semaphore(max_concurrent)

    async def ask_with_limit(client, question):
        async with semaphore:  # Limits concurrent requests
            response = await client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": question}]
            )
            return response.choices[0].message.content

    async with AsyncLunar() as client:
        tasks = [ask_with_limit(client, q) for q in questions]
        return await asyncio.gather(*tasks)

# Process 100 questions, max 5 at a time
questions = [f"Question {i}" for i in range(100)]
answers = asyncio.run(process_with_limit(questions, max_concurrent=5))

import { Lunar } from "lunar";

async function processWithLimit(
  questions: string[],
  maxConcurrent: number = 5
): Promise<string[]> {
  const client = new Lunar();
  const results: string[] = [];
  let running = 0;
  let index = 0;

  return new Promise((resolve) => {
    function next() {
      while (running < maxConcurrent && index < questions.length) {
        const i = index++;
        running++;
        client.chat.completions
          .create({
            model: "gpt-4o-mini",
            messages: [{ role: "user", content: questions[i] }],
          })
          .then((response) => {
            results[i] = response.choices[0].message.content;
            running--;
            if (results.filter(Boolean).length === questions.length) {
              resolve(results);
            } else {
              next();
            }
          });
      }
    }
    next();
  });
}

// Process 100 questions, max 5 at a time
const questions = Array.from({ length: 100 }, (_, i) => `Question ${i}`);
const answers = await processWithLimit(questions, 5);

Error Handling

Python
TypeScript

from lunar import AsyncLunar, RateLimitError, ServerError
import asyncio

async def safe_request(client, messages):
    try:
        return await client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
    except RateLimitError as e:
        print(f"Rate limited, waiting {e.retry_after}s")
        await asyncio.sleep(e.retry_after or 1)
        return await safe_request(client, messages)  # Retry
    except ServerError as e:
        print(f"Server error: {e}")
        return None

async def main():
    async with AsyncLunar() as client:
        response = await safe_request(
            client,
            [{"role": "user", "content": "Hello!"}]
        )
        if response:
            print(response.choices[0].message.content)

asyncio.run(main())

import { Lunar, RateLimitError, ServerError } from "lunar";

async function safeRequest(
  client: Lunar,
  messages: Array<{ role: string; content: string }>
) {
  try {
    return await client.chat.completions.create({
      model: "gpt-4o-mini",
      messages,
    });
  } catch (e) {
    if (e instanceof RateLimitError) {
      console.log(`Rate limited, waiting ${e.retryAfter}s`);
      await new Promise((r) => setTimeout(r, (e.retryAfter ?? 1) * 1000));
      return safeRequest(client, messages); // Retry
    } else if (e instanceof ServerError) {
      console.log(`Server error: ${e}`);
      return null;
    }
    throw e;
  }
}

const client = new Lunar();
const response = await safeRequest(client, [
  { role: "user", content: "Hello!" },
]);
if (response) {
  console.log(response.choices[0].message.content);
}

Integration with Web Frameworks

Python (FastAPI)
TypeScript (Express)

from fastapi import FastAPI
from lunar import AsyncLunar

app = FastAPI()
client = AsyncLunar()

@app.on_event("shutdown")
async def shutdown():
    await client.close()

@app.post("/chat")
async def chat(message: str):
    response = await client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": message}]
    )
    return {"response": response.choices[0].message.content}

import express from "express";
import { Lunar } from "lunar";

const app = express();
const client = new Lunar();

app.use(express.json());

app.post("/chat", async (req, res) => {
  const response = await client.chat.completions.create({
    model: "gpt-4o-mini",
    messages: [{ role: "user", content: req.body.message }],
  });
  res.json({ response: response.choices[0].message.content });
});

app.listen(3000);

Getting Started

Lunar SDK

Pricing

Async Usage

Async Usage

Basic Async Usage

Why Use Async?

Concurrent Requests

Client Initialization

All Async Methods

Async Streaming

Rate Limiting with Concurrency Control

Error Handling

Integration with Web Frameworks

Getting Started

Lunar SDK

Pricing

​Async Usage

​Basic Async Usage

​Why Use Async?

​Concurrent Requests

​Client Initialization

​All Async Methods

​Async Streaming

​Rate Limiting with Concurrency Control

​Error Handling

​Integration with Web Frameworks

Async Usage

Basic Async Usage

Why Use Async?

Concurrent Requests

Client Initialization

All Async Methods

Async Streaming

Rate Limiting with Concurrency Control

Error Handling

Integration with Web Frameworks