Nolan/collections migration (#1681)
* 20,539% speed up in collections overview * Migration
This commit is contained in:
@@ -297,8 +297,9 @@ services:
|
||||
- R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
|
||||
- R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
|
||||
- R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
|
||||
- R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
|
||||
- R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default}
|
||||
- R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
|
||||
- R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}
|
||||
|
||||
# OpenAI
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
||||
|
||||
+2
-1
@@ -63,8 +63,9 @@ services:
|
||||
- R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
|
||||
- R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
|
||||
- R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
|
||||
- R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
|
||||
- R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default}
|
||||
- R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
|
||||
- R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}
|
||||
|
||||
# OpenAI
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
@@ -292,7 +291,6 @@ class PostgresCollectionHandler(CollectionsHandler):
|
||||
filter_document_ids: Optional[list[UUID]] = None,
|
||||
filter_collection_ids: Optional[list[UUID]] = None,
|
||||
) -> dict[str, list[CollectionResponse] | int]:
|
||||
t0 = time.time()
|
||||
conditions = []
|
||||
params: list[Any] = []
|
||||
param_index = 1
|
||||
@@ -349,24 +347,15 @@ class PostgresCollectionHandler(CollectionsHandler):
|
||||
params.append(limit)
|
||||
|
||||
try:
|
||||
t2 = time.time()
|
||||
results = await self.connection_manager.fetch_query(query, params)
|
||||
t3 = time.time()
|
||||
print(f"Time in fetch_query: {t3 - t2}")
|
||||
|
||||
if not results:
|
||||
return {"results": [], "total_entries": 0}
|
||||
|
||||
total_entries = results[0]["total_entries"] if results else 0
|
||||
|
||||
t4 = time.time()
|
||||
collections = [CollectionResponse(**row) for row in results]
|
||||
t5 = time.time()
|
||||
|
||||
print(f"Time in creating CollectionResponse objects: {t5 - t4}")
|
||||
|
||||
t1 = time.time()
|
||||
print(f"Total time in get_collections_overview: {t1 - t0}")
|
||||
return {"results": collections, "total_entries": total_entries}
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
@@ -428,6 +417,15 @@ class PostgresCollectionHandler(CollectionsHandler):
|
||||
message="Document is already assigned to the collection",
|
||||
)
|
||||
|
||||
update_collection_query = f"""
|
||||
UPDATE {self._get_table_name('collections')}
|
||||
SET document_count = document_count + 1
|
||||
WHERE id = $1
|
||||
"""
|
||||
await self.connection_manager.execute_query(
|
||||
query=update_collection_query, params=[collection_id]
|
||||
)
|
||||
|
||||
return collection_id
|
||||
|
||||
except R2RException:
|
||||
|
||||
@@ -387,11 +387,22 @@ class PostgresUserHandler(UserHandler):
|
||||
"""
|
||||
result = await self.connection_manager.fetchrow_query(
|
||||
query, [collection_id, id]
|
||||
) # fetchrow instead of execute_query
|
||||
)
|
||||
if not result:
|
||||
raise R2RException(
|
||||
status_code=400, message="User already in collection"
|
||||
)
|
||||
|
||||
update_collection_query = f"""
|
||||
UPDATE {self._get_table_name('collections')}
|
||||
SET user_count = user_count + 1
|
||||
WHERE id = $1
|
||||
"""
|
||||
await self.connection_manager.execute_query(
|
||||
query=update_collection_query,
|
||||
params=[collection_id],
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
async def remove_user_from_collection(
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Add user and document count to collection
|
||||
|
||||
Revision ID: c45a9cf6a8a4
|
||||
Revises:
|
||||
Create Date: 2024-12-10 13:28:07.798167
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import os
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "c45a9cf6a8a4"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
project_name = os.getenv("R2R_PROJECT_NAME")
|
||||
if not project_name:
|
||||
raise ValueError(
|
||||
"Environment variable `R2R_PROJECT_NAME` must be provided migrate, it should be set equal to the value of `project_name` in your `r2r.toml`."
|
||||
)
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Add the new columns with default value of 0
|
||||
op.add_column(
|
||||
"collections",
|
||||
sa.Column(
|
||||
"user_count", sa.Integer(), nullable=False, server_default="0"
|
||||
),
|
||||
schema=project_name,
|
||||
)
|
||||
op.add_column(
|
||||
"collections",
|
||||
sa.Column(
|
||||
"document_count", sa.Integer(), nullable=False, server_default="0"
|
||||
),
|
||||
schema=project_name,
|
||||
)
|
||||
|
||||
# Initialize the counts based on existing relationships
|
||||
op.execute(
|
||||
f"""
|
||||
WITH collection_counts AS (
|
||||
SELECT c.id,
|
||||
COUNT(DISTINCT u.id) as user_count,
|
||||
COUNT(DISTINCT d.id) as document_count
|
||||
FROM {project_name}.collections c
|
||||
LEFT JOIN {project_name}.users u ON c.id = ANY(u.collection_ids)
|
||||
LEFT JOIN {project_name}.documents d ON c.id = ANY(d.collection_ids)
|
||||
GROUP BY c.id
|
||||
)
|
||||
UPDATE {project_name}.collections c
|
||||
SET user_count = COALESCE(cc.user_count, 0),
|
||||
document_count = COALESCE(cc.document_count, 0)
|
||||
FROM collection_counts cc
|
||||
WHERE c.id = cc.id
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_column("collections", "document_count", schema=project_name)
|
||||
op.drop_column("collections", "user_count", schema=project_name)
|
||||
Reference in New Issue
Block a user