Chat with Memory
Integrate Remina with LLMs for memory-augmented conversations.
Overview
Build a chatbot that:
- Persists user facts and preferences
- Uses memories to personalize responses
- Extracts new information from conversations
Implementation
from remina import Memory
from openai import OpenAI
memory = Memory()
client = OpenAI()
def chat(user_id: str, message: str, history: list = None) -> str:
history = history or []
# Retrieve relevant memories
memories = memory.search(
query=message,
user_id=user_id,
limit=5
)
# Build context
memory_context = ""
if memories["results"]:
memory_context = "User context:\n"
memory_context += "\n".join([
f"- {m['memory']}" for m in memories["results"]
])
# System prompt with memories
system_prompt = f"""You are an AI assistant with memory capabilities.
Use the provided context naturally in responses.
{memory_context}"""
# Generate response
messages = [{"role": "system", "content": system_prompt}]
messages.extend(history[-10:])
messages.append({"role": "user", "content": message})
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
temperature=0.7,
)
assistant_response = response.choices[0].message.content
# Store conversation
memory.add(
messages=[
{"role": "user", "content": message},
{"role": "assistant", "content": assistant_response},
],
user_id=user_id,
)
return assistant_responseGemini Implementation
from remina import Memory
from google import genai
memory = Memory({
"embedder": {"provider": "gemini"},
"llm": {"provider": "gemini"},
})
client = genai.Client()
def chat_gemini(user_id: str, message: str) -> str:
memories = memory.search(query=message, user_id=user_id, limit=5)
context = ""
if memories["results"]:
context = "User context:\n" + "\n".join([
f"- {m['memory']}" for m in memories["results"]
])
prompt = f"""{context}
User: {message}
Assistant:"""
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt,
)
assistant_response = response.text
memory.add(
messages=[
{"role": "user", "content": message},
{"role": "assistant", "content": assistant_response},
],
user_id=user_id,
)
return assistant_responseAsync Implementation
from remina import AsyncMemory
from openai import AsyncOpenAI
import asyncio
memory = AsyncMemory()
client = AsyncOpenAI()
async def chat_async(user_id: str, message: str) -> str:
memories = await memory.search(
query=message,
user_id=user_id,
limit=5
)
context = "\n".join([m['memory'] for m in memories["results"]])
response = await client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": f"User context:\n{context}"},
{"role": "user", "content": message},
],
)
assistant_response = response.choices[0].message.content
await memory.add(
messages=[
{"role": "user", "content": message},
{"role": "assistant", "content": assistant_response},
],
user_id=user_id,
)
return assistant_response
async def main():
response = await chat_async("user_123", "I enjoy hiking in the mountains")
print(response)
await memory.close()
asyncio.run(main())Production Considerations
Limit Memory Context
# Don't overwhelm the LLM
memories = memory.search(query=message, user_id=user_id, limit=5)
# Filter by relevance threshold
relevant = [m for m in memories["results"] if m["score"] > 0.5]Handle Failures Gracefully
try:
memories = memory.search(query=message, user_id=user_id)
except Exception as e:
print(f"Memory search failed: {e}")
memories = {"results": []}Selective Storage
# Only store meaningful exchanges
if len(message) > 10 and len(assistant_response) > 20:
memory.add(
messages=[
{"role": "user", "content": message},
{"role": "assistant", "content": assistant_response},
],
user_id=user_id,
)Metadata for Filtering
memory.add(
messages=message,
user_id=user_id,
metadata={
"session_id": session_id,
"channel": "web",
"timestamp": datetime.now().isoformat(),
}
)
# Search with filters
memories = memory.search(
query=message,
user_id=user_id,
filters={"channel": "web"}
)