85 lines
2.6 KiB
Python
85 lines
2.6 KiB
Python
from openai import AsyncOpenAI
|
||
import chainlit as cl
|
||
from typing import Optional
|
||
import os
|
||
from dotenv import load_dotenv
|
||
from chainlit.data.sql_alchemy import SQLAlchemyDataLayer
|
||
|
||
@cl.data_layer
|
||
def get_data_layer():
|
||
return SQLAlchemyDataLayer(conninfo = "postgresql+asyncpg://postgres:mysecretpassword123@127.0.0.1:5432/postgres")
|
||
|
||
load_dotenv()
|
||
|
||
client = AsyncOpenAI(base_url="http://192.168.1.37:8088/v1", api_key="lm-studio")
|
||
# Instrument the OpenAI client
|
||
cl.instrument_openai()
|
||
|
||
settings = {
|
||
"model": "/modelle/Moonlight-L3-15B-v2-64k",
|
||
"temperature": 0.6,
|
||
"frequency_penalty":1.05,
|
||
# ... more settings
|
||
}
|
||
|
||
# Passwortbasierte Authentifizierung (nur ein Callback!)
|
||
@cl.password_auth_callback
|
||
def auth_callback(username: str, password: str):
|
||
# Hier könntest du z. B. gegen eine Datenbank prüfen
|
||
if (username, password) == ("admin", "admin"):
|
||
return cl.User(
|
||
identifier="admin", metadata={"role": "admin", "provider": "credentials"}
|
||
)
|
||
else:
|
||
return None
|
||
|
||
# Beim Start eines Chats: initialisiere die Chat-History in der User-Session
|
||
@cl.on_chat_start
|
||
async def start_chat():
|
||
if not cl.user_session.get("message_history"):
|
||
cl.user_session.set("message_history", [
|
||
{
|
||
"role": "system",
|
||
"content": "Du bist ein hilfreicher Assistent. Löse Aufgaben schrittweise."
|
||
}
|
||
])
|
||
app_user = cl.user_session.get("user")
|
||
await cl.Message(f"Hallo {app_user.identifier}!").send()
|
||
|
||
@cl.on_chat_resume
|
||
async def on_chat_resume(thread):
|
||
pass
|
||
|
||
# Verarbeitung eingehender Nachrichten
|
||
@cl.on_message
|
||
async def handle_message(message: cl.Message):
|
||
# Hole die bisherige Chat-History aus der User-Session
|
||
message_history = cl.user_session.get("message_history")
|
||
message_history.append({"role": "user", "content": message.content})
|
||
|
||
# Erstelle eine leere Nachricht für den Streaming-Output
|
||
response_msg = cl.Message(content="")
|
||
|
||
# Sende die gesamte History an den LLM-Client
|
||
stream = await client.chat.completions.create(
|
||
messages=message_history,
|
||
stream=True,
|
||
**settings
|
||
)
|
||
|
||
# Tokenweise streamen und zur Antwortnachricht hinzufügen
|
||
async for part in stream:
|
||
token = part.choices[0].delta.content or ""
|
||
if token:
|
||
await response_msg.stream_token(token)
|
||
|
||
# Füge die Antwort des Assistenten der History hinzu
|
||
message_history.append({"role": "assistant", "content": response_msg.content})
|
||
cl.user_session.set("message_history", message_history)
|
||
|
||
# Aktualisiere die Nachricht im Frontend
|
||
await response_msg.update()
|
||
|
||
|
||
|