remove redundant memory layer
This commit is contained in:
parent
da435e0bf7
commit
ee4dc4e467
|
|
@ -1,92 +0,0 @@
|
|||
import numpy as np
|
||||
import faiss
|
||||
|
||||
def create_index(dimension=128):
|
||||
"""Create a new vector index for fast similarity search
|
||||
|
||||
Args:
|
||||
dimension: The dimensionality of the vectors to be indexed
|
||||
|
||||
Returns:
|
||||
tuple: (index, items_list) where:
|
||||
- index is the FAISS index for searching
|
||||
- items_list is an empty list for storing the items
|
||||
"""
|
||||
# Create a flat (exact, brute-force) index for storing vectors
|
||||
index = faiss.IndexFlatL2(dimension)
|
||||
# Initialize an empty list to store the actual items
|
||||
items_list = []
|
||||
return index, items_list
|
||||
|
||||
def add_to_index(index, items_list, embedding, item):
|
||||
"""Add an item and its vector representation to the index
|
||||
|
||||
Args:
|
||||
index: The FAISS index
|
||||
items_list: The list of items corresponding to vectors in the index
|
||||
embedding: The vector representation of the item (numpy array)
|
||||
item: The actual item to store
|
||||
|
||||
Returns:
|
||||
int: The position where the item was added
|
||||
"""
|
||||
# Make sure the embedding is a numpy array with the right shape for FAISS
|
||||
vector = np.array(embedding).reshape(1, -1).astype(np.float32)
|
||||
|
||||
# Add the vector to the index
|
||||
index.add(vector)
|
||||
|
||||
# Store the item and return its position
|
||||
items_list.append(item)
|
||||
return len(items_list) - 1
|
||||
|
||||
def search_index(index, items_list, query_embedding, k=1):
|
||||
"""Search for the k most similar items to the query vector
|
||||
|
||||
Args:
|
||||
index: The FAISS index
|
||||
items_list: The list of items corresponding to vectors in the index
|
||||
query_embedding: The query vector (numpy array)
|
||||
k: Number of results to return (default: 1)
|
||||
|
||||
Returns:
|
||||
tuple: (found_items, distances) where:
|
||||
- found_items is a list of the k most similar items
|
||||
- distances is a list of the corresponding distances
|
||||
"""
|
||||
# Make sure we don't try to retrieve more items than exist in the index
|
||||
k = min(k, len(items_list))
|
||||
if k == 0:
|
||||
return [], []
|
||||
|
||||
# Make sure the query is a numpy array with the right shape for FAISS
|
||||
query_vector = np.array(query_embedding).reshape(1, -1).astype(np.float32)
|
||||
|
||||
# Search the index
|
||||
D, I = index.search(query_vector, k)
|
||||
|
||||
# Get the items corresponding to the found indices
|
||||
found_items = [items_list[i] for i in I[0]]
|
||||
distances = D[0].tolist()
|
||||
|
||||
return found_items, distances
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Create a new index
|
||||
index, items = create_index(dimension=3)
|
||||
|
||||
# Add some random vectors and items
|
||||
for i in range(5):
|
||||
vector = np.random.random(3)
|
||||
add_to_index(index, items, vector, f"Item {i}")
|
||||
|
||||
print(f"Added {len(items)} items to the index")
|
||||
|
||||
# Search for a similar vector
|
||||
query = np.random.random(3)
|
||||
found_items, distances = search_index(index, items, query, k=2)
|
||||
|
||||
print("Query:", query)
|
||||
print("Found items:", found_items)
|
||||
print("Distances:", distances)
|
||||
Loading…
Reference in New Issue