make embedding simpler

This commit is contained in:
zachary62 2025-03-31 10:00:14 -04:00
parent 21004e274f
commit 968d4c928c
1 changed files with 6 additions and 16 deletions

View File

@ -11,27 +11,17 @@ def call_llm(prompt):
return r.choices[0].message.content return r.choices[0].message.content
def get_simple_embedding(text): def get_simple_embedding(text):
""" # Create a 128-dim vector to store character frequencies
A simple embedding function that converts text to vector.
In a real application, you would use a proper embedding model like OpenAI,
Hugging Face, or other embedding services. For this example, we'll use a
simple approach based on character frequencies for demonstration purposes.
"""
# Create a simple embedding (128-dimensional) based on character frequencies
# This is just for demonstration - not a real embedding algorithm!
embedding = np.zeros(128, dtype=np.float32) embedding = np.zeros(128, dtype=np.float32)
# Generate a deterministic but distributed embedding based on character frequency # Count character frequencies in the text
for i, char in enumerate(text): for char in text:
# Use modulo to distribute values across the embedding dimensions embedding[ord(char) % 128] += 1.0
pos = ord(char) % 128
embedding[pos] += 1.0
# Normalize the embedding # Normalize the vector to unit length
norm = np.linalg.norm(embedding) norm = np.linalg.norm(embedding)
if norm > 0: if norm > 0:
embedding = embedding / norm embedding /= norm
return embedding return embedding