157 lines
3.8 KiB
Plaintext
157 lines
3.8 KiB
Plaintext
---
|
|
description: Guidelines for using PocketFlow, Utility Function, LLM Wrapper
|
|
globs:
|
|
alwaysApply: false
|
|
---
|
|
# LLM Wrappers
|
|
|
|
Check out libraries like [litellm](https://github.com/BerriAI/litellm).
|
|
Here, we provide some minimal example implementations:
|
|
|
|
1. OpenAI
|
|
```python
|
|
def call_llm(prompt):
|
|
from openai import OpenAI
|
|
client = OpenAI(api_key="YOUR_API_KEY_HERE")
|
|
r = client.chat.completions.create(
|
|
model="gpt-4o",
|
|
messages=[{"role": "user", "content": prompt}]
|
|
)
|
|
return r.choices[0].message.content
|
|
|
|
# Example usage
|
|
call_llm("How are you?")
|
|
```
|
|
> Store the API key in an environment variable like OPENAI_API_KEY for security.
|
|
{: .best-practice }
|
|
|
|
2. Claude (Anthropic)
|
|
```python
|
|
def call_llm(prompt):
|
|
from anthropic import Anthropic
|
|
client = Anthropic(api_key="YOUR_API_KEY_HERE")
|
|
r = client.messages.create(
|
|
model="claude-3-7-sonnet-20250219",
|
|
max_tokens=3000,
|
|
messages=[
|
|
{"role": "user", "content": prompt}
|
|
]
|
|
)
|
|
return r.content[0].text
|
|
```
|
|
|
|
3. Google (Generative AI Studio / PaLM API)
|
|
```python
|
|
def call_llm(prompt):
|
|
import google.generativeai as genai
|
|
genai.configure(api_key="YOUR_API_KEY_HERE")
|
|
r = genai.generate_text(
|
|
model="models/text-bison-001",
|
|
prompt=prompt
|
|
)
|
|
return r.result
|
|
```
|
|
|
|
4. Azure (Azure OpenAI)
|
|
```python
|
|
def call_llm(prompt):
|
|
from openai import AzureOpenAI
|
|
client = AzureOpenAI(
|
|
azure_endpoint="https://.openai.azure.com/",
|
|
api_key="YOUR_API_KEY_HERE",
|
|
api_version="2023-05-15"
|
|
)
|
|
r = client.chat.completions.create(
|
|
model="",
|
|
messages=[{"role": "user", "content": prompt}]
|
|
)
|
|
return r.choices[0].message.content
|
|
```
|
|
|
|
5. Ollama (Local LLM)
|
|
```python
|
|
def call_llm(prompt):
|
|
from ollama import chat
|
|
response = chat(
|
|
model="llama2",
|
|
messages=[{"role": "user", "content": prompt}]
|
|
)
|
|
return response.message.content
|
|
```
|
|
|
|
6. DeepSeek
|
|
```python
|
|
def call_llm(prompt):
|
|
from openai import OpenAI
|
|
client = OpenAI(api_key="YOUR_DEEPSEEK_API_KEY", base_url="https://api.deepseek.com")
|
|
r = client.chat.completions.create(
|
|
model="deepseek-chat",
|
|
messages=[{"role": "user", "content": prompt}]
|
|
)
|
|
return r.choices[0].message.content
|
|
```
|
|
|
|
|
|
## Improvements
|
|
Feel free to enhance your `call_llm` function as needed. Here are examples:
|
|
|
|
- Handle chat history:
|
|
|
|
```python
|
|
def call_llm(messages):
|
|
from openai import OpenAI
|
|
client = OpenAI(api_key="YOUR_API_KEY_HERE")
|
|
r = client.chat.completions.create(
|
|
model="gpt-4o",
|
|
messages=messages
|
|
)
|
|
return r.choices[0].message.content
|
|
```
|
|
|
|
- Add in-memory caching
|
|
|
|
```python
|
|
from functools import lru_cache
|
|
|
|
@lru_cache(maxsize=1000)
|
|
def call_llm(prompt):
|
|
# Your implementation here
|
|
pass
|
|
```
|
|
|
|
> ⚠️ Caching conflicts with Node retries, as retries yield the same result.
|
|
>
|
|
> To address this, you could use cached results only if not retried.
|
|
{: .warning }
|
|
|
|
|
|
```python
|
|
from functools import lru_cache
|
|
|
|
@lru_cache(maxsize=1000)
|
|
def cached_call(prompt):
|
|
pass
|
|
|
|
def call_llm(prompt, use_cache):
|
|
if use_cache:
|
|
return cached_call(prompt)
|
|
# Call the underlying function directly
|
|
return cached_call.__wrapped__(prompt)
|
|
|
|
class SummarizeNode(Node):
|
|
def exec(self, text):
|
|
return call_llm(f"Summarize: {text}", self.cur_retry==0)
|
|
```
|
|
|
|
- Enable logging:
|
|
|
|
```python
|
|
def call_llm(prompt):
|
|
import logging
|
|
logging.info(f"Prompt: {prompt}")
|
|
response = ... # Your implementation here
|
|
logging.info(f"Response: {response}")
|
|
return response
|
|
```
|
|
|