pocketflow/cookbook/pocketflow-chat-memory/vector_index.py

92 lines
3.0 KiB
Python

import numpy as np
import faiss
def create_index(dimension=128):
"""Create a new vector index for fast similarity search
Args:
dimension: The dimensionality of the vectors to be indexed
Returns:
tuple: (index, items_list) where:
- index is the FAISS index for searching
- items_list is an empty list for storing the items
"""
# Create a flat (exact, brute-force) index for storing vectors
index = faiss.IndexFlatL2(dimension)
# Initialize an empty list to store the actual items
items_list = []
return index, items_list
def add_to_index(index, items_list, embedding, item):
"""Add an item and its vector representation to the index
Args:
index: The FAISS index
items_list: The list of items corresponding to vectors in the index
embedding: The vector representation of the item (numpy array)
item: The actual item to store
Returns:
int: The position where the item was added
"""
# Make sure the embedding is a numpy array with the right shape for FAISS
vector = np.array(embedding).reshape(1, -1).astype(np.float32)
# Add the vector to the index
index.add(vector)
# Store the item and return its position
items_list.append(item)
return len(items_list) - 1
def search_index(index, items_list, query_embedding, k=1):
"""Search for the k most similar items to the query vector
Args:
index: The FAISS index
items_list: The list of items corresponding to vectors in the index
query_embedding: The query vector (numpy array)
k: Number of results to return (default: 1)
Returns:
tuple: (found_items, distances) where:
- found_items is a list of the k most similar items
- distances is a list of the corresponding distances
"""
# Make sure we don't try to retrieve more items than exist in the index
k = min(k, len(items_list))
if k == 0:
return [], []
# Make sure the query is a numpy array with the right shape for FAISS
query_vector = np.array(query_embedding).reshape(1, -1).astype(np.float32)
# Search the index
D, I = index.search(query_vector, k)
# Get the items corresponding to the found indices
found_items = [items_list[i] for i in I[0]]
distances = D[0].tolist()
return found_items, distances
# Example usage
if __name__ == "__main__":
# Create a new index
index, items = create_index(dimension=3)
# Add some random vectors and items
for i in range(5):
vector = np.random.random(3)
add_to_index(index, items, vector, f"Item {i}")
print(f"Added {len(items)} items to the index")
# Search for a similar vector
query = np.random.random(3)
found_items, distances = search_index(index, items, query, k=2)
print("Query:", query)
print("Found items:", found_items)
print("Distances:", distances)