make hard fail on dimension mismatch

This commit is contained in:
emrgnt-cmplxty
2025-03-20 00:49:57 -07:00
parent b83d9422da
commit 8aa1e29aee
+45 -1
View File
@@ -95,7 +95,51 @@ class PostgresChunksHandler(Handler):
self.quantization_type = quantization_type
async def create_tables(self):
# Check for old table name first
# First check if table already exists and validate dimensions
table_exists_query = """
SELECT EXISTS (
SELECT FROM pg_tables
WHERE schemaname = $1
AND tablename = $2
);
"""
table_name = VectorTableName.CHUNKS
table_exists = await self.connection_manager.fetch_query(
table_exists_query, (self.project_name, table_name)
)
if len(table_exists) > 0 and table_exists[0]["exists"]:
# Table exists, check vector dimension
vector_dim_query = """
SELECT a.atttypmod as dimension
FROM pg_attribute a
JOIN pg_class c ON a.attrelid = c.oid
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = $1
AND c.relname = $2
AND a.attname = 'vec';
"""
vector_dim_result = await self.connection_manager.fetch_query(
vector_dim_query, (self.project_name, table_name)
)
if vector_dim_result and len(vector_dim_result) > 0:
existing_dimension = vector_dim_result[0]["dimension"]
# In pgvector, dimension is stored as atttypmod - 4
if existing_dimension > 0: # If it has a specific dimension
# Compare with provided dimension
if (
self.dimension > 0
and existing_dimension != self.dimension
):
raise ValueError(
f"Dimension mismatch: Table '{self.project_name}.{table_name}' was created with "
f"dimension {existing_dimension}, but {self.dimension} was provided. "
f"You must use the same dimension for existing tables."
)
# Check for old table name
check_query = """
SELECT EXISTS (
SELECT FROM pg_tables