diff --git a/js/sdk/package-lock.json b/js/sdk/package-lock.json index 49372c60f..ce4d6d74d 100644 --- a/js/sdk/package-lock.json +++ b/js/sdk/package-lock.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.15", + "version": "0.3.16", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/js/sdk/package.json b/js/sdk/package.json index 0b6452ce5..2f42195d9 100644 --- a/js/sdk/package.json +++ b/js/sdk/package.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.15", + "version": "0.3.16", "description": "", "main": "dist/index.js", "browser": "dist/index.browser.js", diff --git a/js/sdk/src/r2rClient.ts b/js/sdk/src/r2rClient.ts index f60730402..3e3a9e56f 100644 --- a/js/sdk/src/r2rClient.ts +++ b/js/sdk/src/r2rClient.ts @@ -1921,41 +1921,24 @@ export class r2rClient { /** * Search over documents. * @param query The query to search for. - * @param settings Settings for the document search. + * @param vector_search_settings Settings for the document search. * @returns A promise that resolves to the response from the server. */ @feature("searchDocuments") async searchDocuments( query: string, - settings?: { - searchOverMetadata?: boolean; - metadataKeys?: string[]; - searchOverBody?: boolean; - filters?: Record; - searchFilters?: Record; - offset?: number; - limit?: number; - titleWeight?: number; - metadataWeight?: number; - }, + vector_search_settings?: VectorSearchSettings | Record, ): Promise { this._ensureAuthenticated(); - const json_data: Record = { query, - settings: { - search_over_metadata: settings?.searchOverMetadata ?? true, - metadata_keys: settings?.metadataKeys ?? ["title"], - search_over_body: settings?.searchOverBody ?? false, - filters: settings?.filters ?? {}, - search_filters: settings?.searchFilters ?? {}, - offset: settings?.offset ?? 0, - limit: settings?.limit ?? 10, - title_weight: settings?.titleWeight ?? 0.5, - metadata_weight: settings?.metadataWeight ?? 0.5, - }, + vector_search_settings, }; + Object.keys(json_data).forEach( + (key) => json_data[key] === undefined && delete json_data[key], + ); + return await this._makeRequest("POST", "search_documents", { data: json_data, }); diff --git a/py/cli/commands/database.py b/py/cli/commands/database.py index d324b2976..311b28809 100644 --- a/py/cli/commands/database.py +++ b/py/cli/commands/database.py @@ -84,6 +84,7 @@ async def upgrade(schema, revision): click.echo( f"Running database upgrade for schema {schema or 'default'}..." ) + print(f"Upgrading revision = {revision}") command = f"upgrade {revision}" if revision else "upgrade" result = await run_alembic_command(command, schema_name=schema) diff --git a/py/cli/commands/retrieval.py b/py/cli/commands/retrieval.py index e9de58069..f0d5604d4 100644 --- a/py/cli/commands/retrieval.py +++ b/py/cli/commands/retrieval.py @@ -10,7 +10,7 @@ from cli.utils.timer import timer @click.option( "--query", prompt="Enter your search query", help="The search query" ) -# VectorSearchSettings +# SearchSettings @click.option( "--use-vector-search", is_flag=True, diff --git a/py/compose.full.yaml b/py/compose.full.yaml index 8c7ea4a19..a03df77a6 100644 --- a/py/compose.full.yaml +++ b/py/compose.full.yaml @@ -333,6 +333,10 @@ services: # Ollama - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434} + # Huggingface + - HUGGINGFACE_API_BASE=${HUGGINGFACE_API_BASE:-http://host.docker.internal:8080} + - HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY} + # Unstructured - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-} - UNSTRUCTURED_API_URL=${UNSTRUCTURED_API_URL:-https://api.unstructured.io/general/v0/general} diff --git a/py/core/__init__.py b/py/core/__init__.py index cb8c270bc..a0e654f97 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -78,8 +78,7 @@ __all__ = [ "KGSearchResult", "KGSearchSettings", "VectorSearchResult", - "VectorSearchSettings", - "DocumentSearchSettings", + "SearchSettings", "HybridSearchSettings", # User abstractions "Token", diff --git a/py/core/agent/rag.py b/py/core/agent/rag.py index 0912f238b..8bc56c419 100644 --- a/py/core/agent/rag.py +++ b/py/core/agent/rag.py @@ -8,7 +8,7 @@ from core.base import ( from core.base.abstractions import ( AggregateSearchResult, KGSearchSettings, - VectorSearchSettings, + SearchSettings, ) from core.base.agent import AgentConfig, Tool from core.base.providers import CompletionProvider @@ -57,7 +57,7 @@ class RAGAgentMixin: async def search( self, query: str, - vector_search_settings: VectorSearchSettings, + vector_search_settings: SearchSettings, kg_search_settings: KGSearchSettings, *args, **kwargs, diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index 6e14b3edc..32abbbca1 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -46,8 +46,7 @@ __all__ = [ "KGSearchResult", "KGSearchSettings", "VectorSearchResult", - "VectorSearchSettings", - "DocumentSearchSettings", + "SearchSettings", "HybridSearchSettings", # KG abstractions "KGCreationSettings", diff --git a/py/core/base/abstractions/__init__.py b/py/core/base/abstractions/__init__.py index a130cd416..b49220e3c 100644 --- a/py/core/base/abstractions/__init__.py +++ b/py/core/base/abstractions/__init__.py @@ -51,7 +51,6 @@ from shared.abstractions.llm import ( from shared.abstractions.prompt import Prompt from shared.abstractions.search import ( AggregateSearchResult, - DocumentSearchSettings, HybridSearchSettings, KGCommunityResult, KGEntityResult, @@ -61,8 +60,8 @@ from shared.abstractions.search import ( KGSearchResult, KGSearchResultType, KGSearchSettings, + SearchSettings, VectorSearchResult, - VectorSearchSettings, ) from shared.abstractions.user import Token, TokenData, UserStats from shared.abstractions.vector import ( @@ -130,8 +129,7 @@ __all__ = [ "KGGlobalResult", "KGSearchSettings", "VectorSearchResult", - "VectorSearchSettings", - "DocumentSearchSettings", + "SearchSettings", "HybridSearchSettings", # KG abstractions "KGCreationSettings", diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index ddea49556..4411e0a1c 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -27,7 +27,6 @@ from core.base import ( ) from core.base.abstractions import ( DocumentInfo, - DocumentSearchSettings, IndexArgsHNSW, IndexArgsIVFFlat, IndexMeasure, @@ -35,11 +34,11 @@ from core.base.abstractions import ( KGCreationSettings, KGEnrichmentSettings, KGEntityDeduplicationSettings, + SearchSettings, UserStats, VectorEntry, VectorQuantizationType, VectorSearchResult, - VectorSearchSettings, VectorTableName, ) from core.base.api.models import ( @@ -256,6 +255,15 @@ class DocumentHandler(Handler): ): pass + @abstractmethod + async def search_documents( + self, + query_text: str, + query_embedding: Optional[list[float]] = None, + search_settings: Optional[SearchSettings] = None, + ) -> list[DocumentInfo]: + pass + class CollectionHandler(Handler): @abstractmethod @@ -511,28 +519,22 @@ class VectorHandler(Handler): @abstractmethod async def semantic_search( - self, query_vector: list[float], search_settings: VectorSearchSettings + self, query_vector: list[float], search_settings: SearchSettings ) -> list[VectorSearchResult]: pass @abstractmethod async def full_text_search( - self, query_text: str, search_settings: VectorSearchSettings + self, query_text: str, search_settings: SearchSettings ) -> list[VectorSearchResult]: pass - @abstractmethod - async def search_documents( - self, query_text: str, settings: DocumentSearchSettings - ) -> list[dict]: - pass - @abstractmethod async def hybrid_search( self, query_text: str, query_vector: list[float], - search_settings: VectorSearchSettings, + search_settings: SearchSettings, *args, **kwargs, ) -> list[VectorSearchResult]: @@ -1404,14 +1406,14 @@ class DatabaseProvider(Provider): return await self.vector_handler.upsert_entries(entries) async def semantic_search( - self, query_vector: list[float], search_settings: VectorSearchSettings + self, query_vector: list[float], search_settings: SearchSettings ) -> list[VectorSearchResult]: return await self.vector_handler.semantic_search( query_vector, search_settings ) async def full_text_search( - self, query_text: str, search_settings: VectorSearchSettings + self, query_text: str, search_settings: SearchSettings ) -> list[VectorSearchResult]: return await self.vector_handler.full_text_search( query_text, search_settings @@ -1421,7 +1423,7 @@ class DatabaseProvider(Provider): self, query_text: str, query_vector: list[float], - search_settings: VectorSearchSettings, + search_settings: SearchSettings, *args, **kwargs, ) -> list[VectorSearchResult]: @@ -1430,9 +1432,14 @@ class DatabaseProvider(Provider): ) async def search_documents( - self, query_text: str, settings: DocumentSearchSettings - ) -> list[dict]: - return await self.vector_handler.search_documents(query_text, settings) + self, + query_text: str, + settings: SearchSettings, + query_embedding: Optional[list[float]] = None, + ) -> list[DocumentInfo]: + return await self.document_handler.search_documents( + query_text, query_embedding, settings + ) async def delete( self, filters: dict[str, Any] diff --git a/py/core/base/providers/embedding.py b/py/core/base/providers/embedding.py index 66f686011..57eee761c 100644 --- a/py/core/base/providers/embedding.py +++ b/py/core/base/providers/embedding.py @@ -25,8 +25,7 @@ class EmbeddingConfig(ProviderConfig): base_model: str base_dimension: int rerank_model: Optional[str] = None - rerank_dimension: Optional[int] = None - rerank_transformer_type: Optional[str] = None + rerank_url: Optional[str] = None batch_size: int = 1 prefixes: Optional[dict[str, str]] = None add_title_as_prefix: bool = True @@ -38,6 +37,10 @@ class EmbeddingConfig(ProviderConfig): VectorQuantizationSettings() ) + ## deprecated + rerank_dimension: Optional[int] = None + rerank_transformer_type: Optional[str] = None + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider '{self.provider}' is not supported.") @@ -171,6 +174,16 @@ class EmbeddingProvider(Provider): ): pass + @abstractmethod + async def arerank( + self, + query: str, + results: list[VectorSearchResult], + stage: PipeStage = PipeStage.RERANK, + limit: int = 10, + ): + pass + def set_prefixes(self, config_prefixes: dict[str, str], base_model: str): self.prefixes = {} diff --git a/py/core/base/providers/ingestion.py b/py/core/base/providers/ingestion.py index 7826c8c84..7b6df0820 100644 --- a/py/core/base/providers/ingestion.py +++ b/py/core/base/providers/ingestion.py @@ -18,7 +18,6 @@ class IngestionConfig(ProviderConfig): chunk_enrichment_settings: ChunkEnrichmentSettings = ( ChunkEnrichmentSettings() ) - extra_parsers: dict[str, str] = {} audio_transcription_model: str = "openai/whisper-1" @@ -29,6 +28,12 @@ class IngestionConfig(ProviderConfig): vision_pdf_prompt_name: str = "vision_pdf" vision_pdf_model: str = "openai/gpt-4-mini" + skip_document_summary: bool = False + document_summary_system_prompt: str = "default_system" + document_summary_task_prompt: str = "default_summary" + chunks_for_document_summary: int = 128 + document_summary_model: str = "openai/gpt-4o-mini" + @property def supported_providers(self) -> list[str]: return ["r2r", "unstructured_local", "unstructured_api"] diff --git a/py/core/configs/full_local_llm.toml b/py/core/configs/full_local_llm.toml index 548e5ca84..615805494 100644 --- a/py/core/configs/full_local_llm.toml +++ b/py/core/configs/full_local_llm.toml @@ -66,6 +66,8 @@ new_after_n_chars = 512 max_characters = 1_024 combine_under_n_chars = 128 overlap = 20 +chunks_for_document_summary = 16 +document_summary_model = "ollama/llama3.1" [orchestration] provider = "hatchet" diff --git a/py/core/configs/local_llm.toml b/py/core/configs/local_llm.toml index 9a0196f96..2c51e5e79 100644 --- a/py/core/configs/local_llm.toml +++ b/py/core/configs/local_llm.toml @@ -67,3 +67,6 @@ vision_pdf_model = "ollama/llama3.2-vision" [ingestion.extra_parsers] pdf = "zerox" + +chunks_for_document_summary = 16 +document_summary_model = "ollama/llama3.1" diff --git a/py/core/configs/r2r_azure.toml b/py/core/configs/r2r_azure.toml index 36cb91202..600920e80 100644 --- a/py/core/configs/r2r_azure.toml +++ b/py/core/configs/r2r_azure.toml @@ -40,5 +40,6 @@ audio_transcription_model="azure/whisper-1" vision_img_model = "azure/gpt-4o-mini" vision_pdf_model = "azure/gpt-4o-mini" +document_summary_model = "azure/gpt-4o-mini" [ingestion.chunk_enrichment_settings] generation_config = { model = "azure/gpt-4o-mini" } diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py index 157b0937d..6a53076e3 100644 --- a/py/core/main/api/ingestion_router.py +++ b/py/core/main/api/ingestion_router.py @@ -11,10 +11,10 @@ from fastapi import ( Depends, File, Form, + HTTPException, Path, Query, UploadFile, - HTTPException, ) from pydantic import Json diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py index 605cd29e8..1116cb7df 100644 --- a/py/core/main/api/management_router.py +++ b/py/core/main/api/management_router.py @@ -337,7 +337,7 @@ class ManagementRouter(BaseRouter): document_ids: list[str] = Query([]), offset: int = Query(0, ge=0), limit: int = Query( - 100, + 1_000, ge=-1, description="Number of items to return. Use -1 to return all items.", ), diff --git a/py/core/main/api/retrieval_router.py b/py/core/main/api/retrieval_router.py index 955fd0149..ae45d62bd 100644 --- a/py/core/main/api/retrieval_router.py +++ b/py/core/main/api/retrieval_router.py @@ -8,12 +8,11 @@ from fastapi import Body, Depends from fastapi.responses import StreamingResponse from core.base import ( - DocumentSearchSettings, GenerationConfig, KGSearchSettings, Message, R2RException, - VectorSearchSettings, + SearchSettings, ) from core.base.api.models import ( WrappedCompletionResponse, @@ -58,7 +57,7 @@ class RetrievalRouter(BaseRouter): def _select_filters( self, auth_user: Any, - search_settings: Union[VectorSearchSettings, KGSearchSettings], + search_settings: Union[SearchSettings, KGSearchSettings], ) -> dict[str, Any]: selected_collections = { str(cid) for cid in set(search_settings.selected_collection_ids) @@ -111,8 +110,8 @@ class RetrievalRouter(BaseRouter): query: str = Body( ..., description=search_descriptions.get("query") ), - settings: DocumentSearchSettings = Body( - default_factory=DocumentSearchSettings, + settings: SearchSettings = Body( + default_factory=SearchSettings, description="Settings for document search", ), auth_user=Depends(self.service.providers.auth.auth_wrapper), @@ -127,8 +126,14 @@ class RetrievalRouter(BaseRouter): Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. """ + query_embedding = ( + await self.service.providers.embedding.async_get_embedding( + query + ) + ) results = await self.service.search_documents( query=query, + query_embedding=query_embedding, settings=settings, ) return results @@ -142,8 +147,8 @@ class RetrievalRouter(BaseRouter): query: str = Body( ..., description=search_descriptions.get("query") ), - vector_search_settings: VectorSearchSettings = Body( - default_factory=VectorSearchSettings, + vector_search_settings: SearchSettings = Body( + default_factory=SearchSettings, description=search_descriptions.get("vector_search_settings"), ), kg_search_settings: KGSearchSettings = Body( @@ -187,8 +192,8 @@ class RetrievalRouter(BaseRouter): @self.base_endpoint async def rag_app( query: str = Body(..., description=rag_descriptions.get("query")), - vector_search_settings: VectorSearchSettings = Body( - default_factory=VectorSearchSettings, + vector_search_settings: SearchSettings = Body( + default_factory=SearchSettings, description=rag_descriptions.get("vector_search_settings"), ), kg_search_settings: KGSearchSettings = Body( @@ -261,8 +266,8 @@ class RetrievalRouter(BaseRouter): description=agent_descriptions.get("messages"), deprecated=True, ), - vector_search_settings: VectorSearchSettings = Body( - default_factory=VectorSearchSettings, + vector_search_settings: SearchSettings = Body( + default_factory=SearchSettings, description=agent_descriptions.get("vector_search_settings"), ), kg_search_settings: KGSearchSettings = Body( @@ -358,7 +363,27 @@ class RetrievalRouter(BaseRouter): This endpoint uses the language model to generate completions for the provided messages. The generation process can be customized using the generation_config parameter. """ + print("messages = ", messages) + return await self.service.completion( - messages=messages, + messages=[message.to_dict() for message in messages], generation_config=generation_config, ) + + @self.router.post("/embedding") + @self.base_endpoint + async def embedding( + content: str = Body(..., description="The content to embed"), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + response_model=WrappedCompletionResponse, + ): + """ + Generate completions for a list of messages. + + This endpoint uses the language model to generate completions for the provided messages. + The generation process can be customized using the generation_config parameter. + """ + + return await self.service.providers.embedding.async_get_embedding( + text=content + ) diff --git a/py/core/main/app.py b/py/core/main/app.py index 10cf5e7be..5fc6ec16c 100644 --- a/py/core/main/app.py +++ b/py/core/main/app.py @@ -1,11 +1,11 @@ from typing import Union -from core.base import R2RException from fastapi import FastAPI, Request -from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from fastapi.openapi.utils import get_openapi +from fastapi.responses import JSONResponse +from core.base import R2RException from core.providers import ( HatchetOrchestrationProvider, SimpleOrchestrationProvider, diff --git a/py/core/main/app_entry.py b/py/core/main/app_entry.py index 836b7b409..5328e637a 100644 --- a/py/core/main/app_entry.py +++ b/py/core/main/app_entry.py @@ -5,10 +5,11 @@ from contextlib import asynccontextmanager from typing import Optional from apscheduler.schedulers.asyncio import AsyncIOScheduler -from core.base import R2RException from fastapi import FastAPI, Request -from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse + +from core.base import R2RException from .assembly import R2RBuilder, R2RConfig diff --git a/py/core/main/orchestration/hatchet/ingestion_workflow.py b/py/core/main/orchestration/hatchet/ingestion_workflow.py index 8c4cd8f09..e30220db2 100644 --- a/py/core/main/orchestration/hatchet/ingestion_workflow.py +++ b/py/core/main/orchestration/hatchet/ingestion_workflow.py @@ -3,8 +3,8 @@ import logging import uuid from typing import TYPE_CHECKING from uuid import UUID -from fastapi import HTTPException +from fastapi import HTTPException from hatchet_sdk import ConcurrencyLimitStrategy, Context from litellm import AuthenticationError @@ -103,6 +103,14 @@ def hatchet_ingestion_factory( # document_info_dict = context.step_output("parse")["document_info"] # document_info = DocumentInfo(**document_info_dict) + await service.update_document_status( + document_info, status=IngestionStatus.AUGMENTING + ) + await service.augment_document_info( + document_info, + [extraction.to_dict() for extraction in extractions], + ) + await self.ingestion_service.update_document_status( document_info, status=IngestionStatus.EMBEDDING, diff --git a/py/core/main/orchestration/simple/ingestion_workflow.py b/py/core/main/orchestration/simple/ingestion_workflow.py index 2f35b6d77..d91e87fe2 100644 --- a/py/core/main/orchestration/simple/ingestion_workflow.py +++ b/py/core/main/orchestration/simple/ingestion_workflow.py @@ -2,9 +2,9 @@ import asyncio import logging from uuid import UUID +from fastapi import HTTPException from litellm import AuthenticationError -from fastapi import HTTPException from core.base import DocumentExtraction, R2RException, increment_version from core.utils import ( generate_default_user_collection_id, @@ -44,6 +44,11 @@ def simple_ingestion_factory(service: IngestionService): async for extraction in extractions_generator ] + await service.update_document_status( + document_info, status=IngestionStatus.AUGMENTING + ) + await service.augment_document_info(document_info, extractions) + await service.update_document_status( document_info, status=IngestionStatus.EMBEDDING ) diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index 5fe886e00..f1a45cb86 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -5,6 +5,7 @@ import uuid from datetime import datetime from typing import Any, AsyncGenerator, Optional, Sequence, Union from uuid import UUID + from fastapi import HTTPException from core.base import ( @@ -12,6 +13,7 @@ from core.base import ( DocumentExtraction, DocumentInfo, DocumentType, + GenerationConfig, IngestionStatus, R2RException, RawChunk, @@ -221,6 +223,43 @@ class IngestionService(Service): ingestion_config=ingestion_config, ) + async def augment_document_info( + self, + document_info: DocumentInfo, + chunked_documents: list[dict], + ) -> None: + if not self.config.ingestion.skip_document_summary: + document = f"Document Title: {document_info.title}\n" + if document_info.metadata != {}: + document += f"Document Metadata: {json.dumps(document_info.metadata)}\n" + + document += "Document Text:\n" + for chunk in chunked_documents[ + 0 : self.config.ingestion.chunks_for_document_summary + ]: + document += chunk["data"] + + messages = await self.providers.database.prompt_handler.get_message_payload( + system_prompt_name=self.config.ingestion.document_summary_system_prompt, + task_prompt_name=self.config.ingestion.document_summary_task_prompt, + task_inputs={"document": document}, + ) + response = await self.providers.llm.aget_completion( + messages=messages, + generation_config=GenerationConfig(model="openai/gpt-4o-mini"), + ) + + document_info.summary = response.choices[0].message.content # type: ignore + + if not document_info.summary: + raise ValueError("Expected a generated response.") + + embedding = await self.providers.embedding.async_get_embedding( + text=document_info.summary, + ) + document_info.summary_embedding = embedding + return + async def embed_document( self, chunked_documents: list[dict], diff --git a/py/core/main/services/kg_service.py b/py/core/main/services/kg_service.py index 9d0e56dc5..c80e60ecc 100644 --- a/py/core/main/services/kg_service.py +++ b/py/core/main/services/kg_service.py @@ -3,6 +3,7 @@ import math import time from typing import AsyncGenerator, Optional from uuid import UUID + from fastapi import HTTPException from core.base import KGExtractionStatus, RunManager diff --git a/py/core/main/services/retrieval_service.py b/py/core/main/services/retrieval_service.py index e0770ade2..e1c1ba009 100644 --- a/py/core/main/services/retrieval_service.py +++ b/py/core/main/services/retrieval_service.py @@ -3,18 +3,19 @@ import logging import time from typing import Optional from uuid import UUID + from fastapi import HTTPException from core import R2RStreamingRAGAgent from core.base import ( - DocumentSearchSettings, + DocumentInfo, EmbeddingPurpose, GenerationConfig, KGSearchSettings, Message, R2RException, RunManager, - VectorSearchSettings, + SearchSettings, manage_run, to_async_generator, ) @@ -55,7 +56,7 @@ class RetrievalService(Service): async def search( self, query: str, - vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + vector_search_settings: SearchSettings = SearchSettings(), kg_search_settings: KGSearchSettings = KGSearchSettings(), *args, **kwargs, @@ -121,12 +122,14 @@ class RetrievalService(Service): async def search_documents( self, query: str, - settings: DocumentSearchSettings, - ) -> list[dict]: + settings: SearchSettings, + query_embedding: Optional[list[float]] = None, + ) -> list[DocumentInfo]: return await self.providers.database.search_documents( query_text=query, settings=settings, + query_embedding=query_embedding, ) @telemetry_event("Completion") @@ -149,7 +152,7 @@ class RetrievalService(Service): self, query: str, rag_generation_config: GenerationConfig, - vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + vector_search_settings: SearchSettings = SearchSettings(), kg_search_settings: KGSearchSettings = KGSearchSettings(), *args, **kwargs, @@ -247,7 +250,7 @@ class RetrievalService(Service): async def agent( self, rag_generation_config: GenerationConfig, - vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + vector_search_settings: SearchSettings = SearchSettings(), kg_search_settings: KGSearchSettings = KGSearchSettings(), task_prompt_override: Optional[str] = None, include_title_if_available: Optional[bool] = False, @@ -422,7 +425,7 @@ class RetrievalServiceAdapter: @staticmethod def prepare_search_input( query: str, - vector_search_settings: VectorSearchSettings, + vector_search_settings: SearchSettings, kg_search_settings: KGSearchSettings, user: UserResponse, ) -> dict: @@ -437,7 +440,7 @@ class RetrievalServiceAdapter: def parse_search_input(data: dict): return { "query": data["query"], - "vector_search_settings": VectorSearchSettings.from_dict( + "vector_search_settings": SearchSettings.from_dict( data["vector_search_settings"] ), "kg_search_settings": KGSearchSettings.from_dict( @@ -449,7 +452,7 @@ class RetrievalServiceAdapter: @staticmethod def prepare_rag_input( query: str, - vector_search_settings: VectorSearchSettings, + vector_search_settings: SearchSettings, kg_search_settings: KGSearchSettings, rag_generation_config: GenerationConfig, task_prompt_override: Optional[str], @@ -468,7 +471,7 @@ class RetrievalServiceAdapter: def parse_rag_input(data: dict): return { "query": data["query"], - "vector_search_settings": VectorSearchSettings.from_dict( + "vector_search_settings": SearchSettings.from_dict( data["vector_search_settings"] ), "kg_search_settings": KGSearchSettings.from_dict( @@ -484,7 +487,7 @@ class RetrievalServiceAdapter: @staticmethod def prepare_agent_input( message: Message, - vector_search_settings: VectorSearchSettings, + vector_search_settings: SearchSettings, kg_search_settings: KGSearchSettings, rag_generation_config: GenerationConfig, task_prompt_override: Optional[str], @@ -509,7 +512,7 @@ class RetrievalServiceAdapter: def parse_agent_input(data: dict): return { "message": Message.from_dict(data["message"]), - "vector_search_settings": VectorSearchSettings.from_dict( + "vector_search_settings": SearchSettings.from_dict( data["vector_search_settings"] ), "kg_search_settings": KGSearchSettings.from_dict( diff --git a/py/core/pipelines/rag_pipeline.py b/py/core/pipelines/rag_pipeline.py index 474ace913..3fc380719 100644 --- a/py/core/pipelines/rag_pipeline.py +++ b/py/core/pipelines/rag_pipeline.py @@ -5,7 +5,7 @@ from typing import Any, Optional from ..base.abstractions import ( GenerationConfig, KGSearchSettings, - VectorSearchSettings, + SearchSettings, ) from ..base.logger.base import RunType from ..base.logger.run_manager import RunManager, manage_run @@ -34,7 +34,7 @@ class RAGPipeline(AsyncPipeline): input: Any, state: Optional[AsyncState], run_manager: Optional[RunManager] = None, - vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + vector_search_settings: SearchSettings = SearchSettings(), kg_search_settings: KGSearchSettings = KGSearchSettings(), rag_generation_config: GenerationConfig = GenerationConfig(), *args: Any, diff --git a/py/core/pipelines/search_pipeline.py b/py/core/pipelines/search_pipeline.py index 7c120c3d2..ed7ede2da 100644 --- a/py/core/pipelines/search_pipeline.py +++ b/py/core/pipelines/search_pipeline.py @@ -6,7 +6,7 @@ from typing import Any, Optional from ..base.abstractions import ( AggregateSearchResult, KGSearchSettings, - VectorSearchSettings, + SearchSettings, ) from ..base.logger.run_manager import RunManager, manage_run from ..base.pipeline.base_pipeline import AsyncPipeline, dequeue_requests @@ -35,7 +35,7 @@ class SearchPipeline(AsyncPipeline): state: Optional[AsyncState], stream: bool = False, run_manager: Optional[RunManager] = None, - vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + vector_search_settings: SearchSettings = SearchSettings(), kg_search_settings: KGSearchSettings = KGSearchSettings(), *args: Any, **kwargs: Any, diff --git a/py/core/pipes/kg/deduplication.py b/py/core/pipes/kg/deduplication.py index f7ff24f75..441167610 100644 --- a/py/core/pipes/kg/deduplication.py +++ b/py/core/pipes/kg/deduplication.py @@ -2,6 +2,7 @@ import json import logging from typing import Any, Union from uuid import UUID + from fastapi import HTTPException from core.base import AsyncState diff --git a/py/core/pipes/kg/prompt_tuning.py b/py/core/pipes/kg/prompt_tuning.py index f1763f858..7a1274d5b 100644 --- a/py/core/pipes/kg/prompt_tuning.py +++ b/py/core/pipes/kg/prompt_tuning.py @@ -5,6 +5,7 @@ Pipe to tune the prompt for the KG model. import logging from typing import Any from uuid import UUID + from fastapi import HTTPException from core.base import ( diff --git a/py/core/pipes/retrieval/multi_search.py b/py/core/pipes/retrieval/multi_search.py index 6e2dffebd..131f71f9b 100644 --- a/py/core/pipes/retrieval/multi_search.py +++ b/py/core/pipes/retrieval/multi_search.py @@ -4,8 +4,8 @@ from uuid import UUID from core.base.abstractions import ( GenerationConfig, + SearchSettings, VectorSearchResult, - VectorSearchSettings, ) from core.base.pipes.base_pipe import AsyncPipe @@ -51,7 +51,7 @@ class MultiSearchPipe(AsyncPipe): input: Any, state: Any, run_id: UUID, - vector_search_settings: VectorSearchSettings, + vector_search_settings: SearchSettings, query_transform_generation_config: Optional[GenerationConfig] = None, *args: Any, **kwargs: Any, diff --git a/py/core/pipes/retrieval/routing_search_pipe.py b/py/core/pipes/retrieval/routing_search_pipe.py index 72d7b7e2d..ca631809a 100644 --- a/py/core/pipes/retrieval/routing_search_pipe.py +++ b/py/core/pipes/retrieval/routing_search_pipe.py @@ -1,12 +1,7 @@ from typing import Any, AsyncGenerator, Dict from uuid import UUID -from core.base import ( - AsyncPipe, - AsyncState, - VectorSearchResult, - VectorSearchSettings, -) +from core.base import AsyncPipe, AsyncState, SearchSettings, VectorSearchResult class RoutingSearchPipe(AsyncPipe): @@ -27,7 +22,7 @@ class RoutingSearchPipe(AsyncPipe): input: AsyncPipe.Input, state: AsyncState, run_id: UUID, - vector_search_settings: VectorSearchSettings, + vector_search_settings: SearchSettings, *args: Any, **kwargs: Any, ) -> AsyncGenerator[VectorSearchResult, None]: diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index 645cfe91e..292f91143 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -9,8 +9,8 @@ from core.base import ( DatabaseProvider, EmbeddingProvider, EmbeddingPurpose, + SearchSettings, VectorSearchResult, - VectorSearchSettings, ) from ..abstractions.search_pipe import SearchPipe @@ -44,7 +44,7 @@ class VectorSearchPipe(SearchPipe): async def search( # type: ignore self, message: str, - search_settings: VectorSearchSettings, + search_settings: SearchSettings, *args: Any, **kwargs: Any, ) -> AsyncGenerator[VectorSearchResult, None]: @@ -72,7 +72,7 @@ class VectorSearchPipe(SearchPipe): search_settings=search_settings, ) ) - reranked_results = self.embedding_provider.rerank( + reranked_results = await self.embedding_provider.arerank( query=message, results=search_results, limit=search_settings.search_limit, @@ -93,7 +93,7 @@ class VectorSearchPipe(SearchPipe): input: AsyncPipe.Input, state: AsyncState, run_id: UUID, - vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + vector_search_settings: SearchSettings = SearchSettings(), *args: Any, **kwargs: Any, ) -> AsyncGenerator[VectorSearchResult, None]: diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index 69043dfc6..f060591e5 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -1,10 +1,9 @@ import logging import os from datetime import datetime, timedelta, timezone -from fastapi import HTTPException import jwt -from fastapi import Depends +from fastapi import Depends, HTTPException from fastapi.security import OAuth2PasswordBearer from core.base import ( diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index 7acec6588..e577a29ab 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -3,6 +3,7 @@ import logging from datetime import datetime from typing import Optional, Union from uuid import UUID, uuid4 + from fastapi import HTTPException from core.base import ( diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index e9a67a974..a883831f6 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -1,11 +1,12 @@ import asyncio +import copy import json import logging from typing import Any, Optional, Union from uuid import UUID -from fastapi import HTTPException import asyncpg +from fastapi import HTTPException from core.base import ( DocumentHandler, @@ -15,6 +16,7 @@ from core.base import ( KGEnrichmentStatus, KGExtractionStatus, R2RException, + SearchSettings, ) from .base import PostgresConnectionManager @@ -24,36 +26,61 @@ logger = logging.getLogger() class PostgresDocumentHandler(DocumentHandler): TABLE_NAME = "document_info" + COLUMN_VARS = [ + "extraction_id", + "document_id", + "user_id", + "collection_ids", + ] def __init__( - self, project_name: str, connection_manager: PostgresConnectionManager + self, + project_name: str, + connection_manager: PostgresConnectionManager, + dimension: int, ): + self.dimension = dimension super().__init__(project_name, connection_manager) async def create_tables(self): logger.info( f"Creating table, if not exists: {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)}" ) - query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} ( - document_id UUID PRIMARY KEY, - collection_ids UUID[], - user_id UUID, - type TEXT, - metadata JSONB, - title TEXT, - version TEXT, - size_in_bytes INT, - ingestion_status TEXT DEFAULT 'pending', - kg_extraction_status TEXT DEFAULT 'pending', - created_at TIMESTAMPTZ DEFAULT NOW(), - updated_at TIMESTAMPTZ DEFAULT NOW(), - ingestion_attempt_number INT DEFAULT 0 - ); - CREATE INDEX IF NOT EXISTS idx_collection_ids_{self.project_name} - ON {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} USING GIN (collection_ids); - """ - await self.connection_manager.execute_query(query) + try: + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} ( + document_id UUID PRIMARY KEY, + collection_ids UUID[], + user_id UUID, + type TEXT, + metadata JSONB, + title TEXT, + summary TEXT NULL, + summary_embedding vector({self.dimension}) NULL, + version TEXT, + size_in_bytes INT, + ingestion_status TEXT DEFAULT 'pending', + kg_extraction_status TEXT DEFAULT 'pending', + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + ingestion_attempt_number INT DEFAULT 0, + doc_search_vector tsvector GENERATED ALWAYS AS ( + setweight(to_tsvector('english', COALESCE(title, '')), 'A') || + setweight(to_tsvector('english', COALESCE(summary, '')), 'B') || + setweight(to_tsvector('english', COALESCE((metadata->>'description')::text, '')), 'C') + ) STORED + ); + CREATE INDEX IF NOT EXISTS idx_collection_ids_{self.project_name} + ON {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} USING GIN (collection_ids); + + -- Full text search index + CREATE INDEX IF NOT EXISTS idx_doc_search_{self.project_name} + ON {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} + USING GIN (doc_search_vector); + """ + await self.connection_manager.execute_query(query) + except Exception as e: + logger.warning(f"Error {e} when creating document table.") async def upsert_documents_overview( self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] @@ -107,8 +134,9 @@ class PostgresDocumentHandler(DocumentHandler): UPDATE {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} SET collection_ids = $1, user_id = $2, type = $3, metadata = $4, title = $5, version = $6, size_in_bytes = $7, ingestion_status = $8, - kg_extraction_status = $9, updated_at = $10, ingestion_attempt_number = $11 - WHERE document_id = $12 + kg_extraction_status = $9, updated_at = $10, ingestion_attempt_number = $11, + summary = $12, summary_embedding = $13 + WHERE document_id = $14 """ await conn.execute( update_query, @@ -123,15 +151,18 @@ class PostgresDocumentHandler(DocumentHandler): db_entry["kg_extraction_status"], db_entry["updated_at"], new_attempt_number, + db_entry["summary"], + db_entry["summary_embedding"], document_info.id, ) else: + insert_query = f""" INSERT INTO {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} (document_id, collection_ids, user_id, type, metadata, title, version, size_in_bytes, ingestion_status, kg_extraction_status, created_at, - updated_at, ingestion_attempt_number) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) + updated_at, ingestion_attempt_number, summary, summary_embedding) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) """ await conn.execute( insert_query, @@ -148,6 +179,8 @@ class PostgresDocumentHandler(DocumentHandler): db_entry["created_at"], db_entry["updated_at"], db_entry["ingestion_attempt_number"], + db_entry["summary"], + db_entry["summary_embedding"], ) break # Success, exit the retry loop @@ -164,6 +197,12 @@ class PostgresDocumentHandler(DocumentHandler): else: wait_time = 0.1 * (2**retries) # Exponential backoff await asyncio.sleep(wait_time) + except Exception as e: + if 'column "summary"' in str(e): + raise ValueError( + "Document schema is missing 'summary' and 'summary_embedding' columns. Call `r2r db upgrade` to carry out the necessary migration." + ) + raise async def delete_from_documents_overview( self, document_id: UUID, version: Optional[str] = None @@ -385,10 +424,51 @@ class PostgresDocumentHandler(DocumentHandler): if conditions: base_query += " WHERE " + " AND ".join(conditions) + # query = f""" + # SELECT document_id, collection_ids, user_id, type, metadata, title, version, + # size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, + # summary, summary_embedding, + # COUNT(*) OVER() AS total_entries + # {base_query} + # ORDER BY created_at DESC + # OFFSET ${param_index} + # """ + + # First check if the new columns exist + try: + check_query = f""" + SELECT EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE table_name = '{self._get_table_name(PostgresDocumentHandler.TABLE_NAME)}' + AND column_name = 'summary' + ); + """ + has_new_columns = await self.connection_manager.fetch_query( + check_query + ) + has_new_columns = has_new_columns[0]["exists"] + except Exception as e: + logger.warning(f"Error checking for new columns: {e}") + has_new_columns = False + + # Construct the SELECT part of the query based on column existence + if has_new_columns: + select_fields = """ + SELECT document_id, collection_ids, user_id, type, metadata, title, version, + size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, + summary, summary_embedding, + COUNT(*) OVER() AS total_entries + """ + else: + select_fields = """ + SELECT document_id, collection_ids, user_id, type, metadata, title, version, + size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, + COUNT(*) OVER() AS total_entries + """ + query = f""" - SELECT document_id, collection_ids, user_id, type, metadata, title, version, - size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, - COUNT(*) OVER() AS total_entries + {select_fields} {base_query} ORDER BY created_at DESC OFFSET ${param_index} @@ -405,26 +485,52 @@ class PostgresDocumentHandler(DocumentHandler): results = await self.connection_manager.fetch_query(query, params) total_entries = results[0]["total_entries"] if results else 0 - documents = [ - DocumentInfo( - id=row["document_id"], - collection_ids=row["collection_ids"], - user_id=row["user_id"], - document_type=DocumentType(row["type"]), - metadata=json.loads(row["metadata"]), - title=row["title"], - version=row["version"], - size_in_bytes=row["size_in_bytes"], - ingestion_status=IngestionStatus(row["ingestion_status"]), - kg_extraction_status=KGExtractionStatus( - row["kg_extraction_status"] - ), - created_at=row["created_at"], - updated_at=row["updated_at"], - ) - for row in results - ] + documents = [] + for row in results: + # Safely handle the embedding + embedding = None + if ( + "summary_embedding" in row + and row["summary_embedding"] is not None + ): + try: + # Parse the vector string returned by Postgres + embedding_str = row["summary_embedding"] + if embedding_str.startswith( + "[" + ) and embedding_str.endswith("]"): + embedding = [ + float(x) + for x in embedding_str[1:-1].split(",") + if x + ] + except Exception as e: + logger.warning( + f"Failed to parse embedding for document {row['document_id']}: {e}" + ) + documents.append( + DocumentInfo( + id=row["document_id"], + collection_ids=row["collection_ids"], + user_id=row["user_id"], + document_type=DocumentType(row["type"]), + metadata=json.loads(row["metadata"]), + title=row["title"], + version=row["version"], + size_in_bytes=row["size_in_bytes"], + ingestion_status=IngestionStatus( + row["ingestion_status"] + ), + kg_extraction_status=KGExtractionStatus( + row["kg_extraction_status"] + ), + created_at=row["created_at"], + updated_at=row["updated_at"], + summary=row["summary"] if "summary" in row else None, + summary_embedding=embedding, + ) + ) return {"results": documents, "total_entries": total_entries} except Exception as e: logger.error(f"Error in get_documents_overview: {str(e)}") @@ -432,3 +538,448 @@ class PostgresDocumentHandler(DocumentHandler): status_code=500, detail="Database query failed", ) + + async def semantic_document_search( + self, query_embedding: list[float], search_settings: SearchSettings + ) -> list[DocumentInfo]: + """Search documents using semantic similarity with their summary embeddings.""" + + where_clauses = ["summary_embedding IS NOT NULL"] + params: list[str | int | bytes] = [str(query_embedding)] + + # Handle filters + if search_settings.search_filters: + filter_clause = self._build_filters( + search_settings.search_filters, params + ) + where_clauses.append(filter_clause) + + # Handle collection filtering + if search_settings.selected_collection_ids: + where_clauses.append("collection_ids && $" + str(len(params) + 1)) + params.append( + [str(ele) for ele in search_settings.selected_collection_ids] # type: ignore + ) + + where_clause = " AND ".join(where_clauses) + + query = f""" + WITH document_scores AS ( + SELECT + document_id, + collection_ids, + user_id, + type, + metadata, + title, + version, + size_in_bytes, + ingestion_status, + kg_extraction_status, + created_at, + updated_at, + summary, + summary_embedding, + (summary_embedding <=> $1::vector({self.dimension})) as semantic_distance + FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} + WHERE {where_clause} + ORDER BY semantic_distance ASC + LIMIT ${len(params) + 1} + OFFSET ${len(params) + 2} + ) + SELECT *, + 1.0 - semantic_distance as semantic_score + FROM document_scores + """ + + params.extend([search_settings.search_limit, search_settings.offset]) + + results = await self.connection_manager.fetch_query(query, params) + + return [ + DocumentInfo( + id=row["document_id"], + collection_ids=row["collection_ids"], + user_id=row["user_id"], + document_type=DocumentType(row["type"]), + metadata={ + **( + json.loads(row["metadata"]) + if search_settings.include_metadatas + else {} + ), + "search_score": float(row["semantic_score"]), + "search_type": "semantic", + }, + title=row["title"], + version=row["version"], + size_in_bytes=row["size_in_bytes"], + ingestion_status=IngestionStatus(row["ingestion_status"]), + kg_extraction_status=KGExtractionStatus( + row["kg_extraction_status"] + ), + created_at=row["created_at"], + updated_at=row["updated_at"], + summary=row["summary"], + summary_embedding=[ + float(x) + for x in row["summary_embedding"][1:-1].split(",") + if x + ], + ) + for row in results + ] + + async def full_text_document_search( + self, query_text: str, search_settings: SearchSettings + ) -> list[DocumentInfo]: + """Enhanced full-text search using generated tsvector.""" + + where_clauses = [ + "doc_search_vector @@ websearch_to_tsquery('english', $1)" + ] + params: list[str | int | bytes] = [query_text] + + # Handle filters + if search_settings.search_filters: + filter_clause = self._build_filters( + search_settings.search_filters, params + ) + where_clauses.append(filter_clause) + + # Handle collection filtering + if search_settings.selected_collection_ids: + where_clauses.append("collection_ids && $" + str(len(params) + 1)) + params.append([str(ele) for ele in search_settings.selected_collection_ids]) # type: ignore + + where_clause = " AND ".join(where_clauses) + + query = f""" + WITH document_scores AS ( + SELECT + document_id, + collection_ids, + user_id, + type, + metadata, + title, + version, + size_in_bytes, + ingestion_status, + kg_extraction_status, + created_at, + updated_at, + summary, + summary_embedding, + ts_rank_cd(doc_search_vector, websearch_to_tsquery('english', $1), 32) as text_score + FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} + WHERE {where_clause} + ORDER BY text_score DESC + LIMIT ${len(params) + 1} + OFFSET ${len(params) + 2} + ) + SELECT * FROM document_scores + """ + + params.extend([search_settings.search_limit, search_settings.offset]) + + results = await self.connection_manager.fetch_query(query, params) + + return [ + DocumentInfo( + id=row["document_id"], + collection_ids=row["collection_ids"], + user_id=row["user_id"], + document_type=DocumentType(row["type"]), + metadata={ + **( + json.loads(row["metadata"]) + if search_settings.include_metadatas + else {} + ), + "search_score": float(row["text_score"]), + "search_type": "full_text", + }, + title=row["title"], + version=row["version"], + size_in_bytes=row["size_in_bytes"], + ingestion_status=IngestionStatus(row["ingestion_status"]), + kg_extraction_status=KGExtractionStatus( + row["kg_extraction_status"] + ), + created_at=row["created_at"], + updated_at=row["updated_at"], + summary=row["summary"], + summary_embedding=( + [ + float(x) + for x in row["summary_embedding"][1:-1].split(",") + if x + ] + if row["summary_embedding"] + else None + ), + ) + for row in results + ] + + async def hybrid_document_search( + self, + query_text: str, + query_embedding: list[float], + search_settings: SearchSettings, + ) -> list[DocumentInfo]: + """Search documents using both semantic and full-text search with RRF fusion.""" + + # Get more results than needed for better fusion + extended_settings = copy.deepcopy(search_settings) + extended_settings.search_limit = search_settings.search_limit * 3 + + # Get results from both search methods + semantic_results = await self.semantic_document_search( + query_embedding, extended_settings + ) + full_text_results = await self.full_text_document_search( + query_text, extended_settings + ) + + # Combine results using RRF + doc_scores: dict[str, dict] = {} + + # Process semantic results + for rank, result in enumerate(semantic_results, 1): + doc_id = str(result.id) + doc_scores[doc_id] = { + "semantic_rank": rank, + "full_text_rank": len(full_text_results) + + 1, # Default rank if not found + "data": result, + } + + # Process full-text results + for rank, result in enumerate(full_text_results, 1): + doc_id = str(result.id) + if doc_id in doc_scores: + doc_scores[doc_id]["full_text_rank"] = rank + else: + doc_scores[doc_id] = { + "semantic_rank": len(semantic_results) + + 1, # Default rank if not found + "full_text_rank": rank, + "data": result, + } + + # Calculate RRF scores using hybrid search settings + rrf_k = search_settings.hybrid_search_settings.rrf_k + semantic_weight = ( + search_settings.hybrid_search_settings.semantic_weight + ) + full_text_weight = ( + search_settings.hybrid_search_settings.full_text_weight + ) + + for doc_id, scores in doc_scores.items(): + semantic_score = 1 / (rrf_k + scores["semantic_rank"]) + full_text_score = 1 / (rrf_k + scores["full_text_rank"]) + + # Weighted combination + combined_score = ( + semantic_score * semantic_weight + + full_text_score * full_text_weight + ) / (semantic_weight + full_text_weight) + + scores["final_score"] = combined_score + + # Sort by final score and apply offset/limit + sorted_results = sorted( + doc_scores.values(), key=lambda x: x["final_score"], reverse=True + )[ + search_settings.offset : search_settings.offset + + search_settings.search_limit + ] + + return [ + DocumentInfo( + **{ + **result["data"].__dict__, + "metadata": { + **( + result["data"].metadata + if search_settings.include_metadatas + else {} + ), + "search_score": result["final_score"], + "semantic_rank": result["semantic_rank"], + "full_text_rank": result["full_text_rank"], + "search_type": "hybrid", + }, + } + ) + for result in sorted_results + ] + + async def search_documents( + self, + query_text: str, + query_embedding: Optional[list[float]] = None, + search_settings: Optional[SearchSettings] = None, + ) -> list[DocumentInfo]: + """ + Main search method that delegates to the appropriate search method based on settings. + """ + if search_settings is None: + search_settings = SearchSettings() + + if search_settings.use_hybrid_search: + if query_embedding is None: + raise ValueError( + "query_embedding is required for hybrid search" + ) + return await self.hybrid_document_search( + query_text, query_embedding, search_settings + ) + elif search_settings.use_vector_search: + if query_embedding is None: + raise ValueError( + "query_embedding is required for vector search" + ) + return await self.semantic_document_search( + query_embedding, search_settings + ) + else: + return await self.full_text_document_search( + query_text, search_settings + ) + + # TODO - Remove copy pasta, consolidate + def _build_filters( + self, filters: dict, parameters: list[Union[str, int, bytes]] + ) -> str: + + def parse_condition(key: str, value: Any) -> str: # type: ignore + # nonlocal parameters + if key in self.COLUMN_VARS: + # Handle column-based filters + if isinstance(value, dict): + op, clause = next(iter(value.items())) + if op == "$eq": + parameters.append(clause) + return f"{key} = ${len(parameters)}" + elif op == "$ne": + parameters.append(clause) + return f"{key} != ${len(parameters)}" + elif op == "$in": + parameters.append(clause) + return f"{key} = ANY(${len(parameters)})" + elif op == "$nin": + parameters.append(clause) + return f"{key} != ALL(${len(parameters)})" + elif op == "$overlap": + parameters.append(clause) + return f"{key} && ${len(parameters)}" + elif op == "$contains": + parameters.append(clause) + return f"{key} @> ${len(parameters)}" + elif op == "$any": + if key == "collection_ids": + parameters.append(f"%{clause}%") + return f"array_to_string({key}, ',') LIKE ${len(parameters)}" + parameters.append(clause) + return f"${len(parameters)} = ANY({key})" + else: + raise ValueError( + f"Unsupported operator for column {key}: {op}" + ) + else: + # Handle direct equality + parameters.append(value) + return f"{key} = ${len(parameters)}" + else: + # Handle JSON-based filters + json_col = "metadata" + if key.startswith("metadata."): + key = key.split("metadata.")[1] + if isinstance(value, dict): + op, clause = next(iter(value.items())) + if op not in ( + "$eq", + "$ne", + "$lt", + "$lte", + "$gt", + "$gte", + "$in", + "$contains", + ): + raise ValueError("unknown operator") + + if op == "$eq": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' = ${len(parameters)}::jsonb" + ) + elif op == "$ne": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' != ${len(parameters)}::jsonb" + ) + elif op == "$lt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float < (${len(parameters)}::jsonb)::float" + elif op == "$lte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float <= (${len(parameters)}::jsonb)::float" + elif op == "$gt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float > (${len(parameters)}::jsonb)::float" + elif op == "$gte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float >= (${len(parameters)}::jsonb)::float" + elif op == "$in": + if not isinstance(clause, list): + raise ValueError( + "argument to $in filter must be a list" + ) + parameters.append(json.dumps(clause)) + return f"{json_col}->'{key}' = ANY(SELECT jsonb_array_elements(${len(parameters)}::jsonb))" + elif op == "$contains": + if not isinstance(clause, (int, str, float, list)): + raise ValueError( + "argument to $contains filter must be a scalar or array" + ) + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' @> ${len(parameters)}::jsonb" + ) + + def parse_filter(filter_dict: dict) -> str: + filter_conditions = [] + for key, value in filter_dict.items(): + if key == "$and": + and_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries + if and_conditions: + filter_conditions.append( + f"({' AND '.join(and_conditions)})" + ) + elif key == "$or": + or_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries + if or_conditions: + filter_conditions.append( + f"({' OR '.join(or_conditions)})" + ) + else: + filter_conditions.append(parse_condition(key, value)) + + # Check if there is only a single condition + if len(filter_conditions) == 1: + return filter_conditions[0] + else: + return " AND ".join(filter_conditions) + + where_clause = parse_filter(filters) + + return where_clause diff --git a/py/core/providers/database/file.py b/py/core/providers/database/file.py index dc12f0ffe..df5496303 100644 --- a/py/core/providers/database/file.py +++ b/py/core/providers/database/file.py @@ -2,9 +2,9 @@ import io import logging from typing import BinaryIO, Optional, Union from uuid import UUID -from fastapi import HTTPException import asyncpg +from fastapi import HTTPException from core.base import FileHandler, R2RException diff --git a/py/core/providers/database/kg.py b/py/core/providers/database/kg.py index fac3882d8..b233041dd 100644 --- a/py/core/providers/database/kg.py +++ b/py/core/providers/database/kg.py @@ -3,10 +3,10 @@ import logging import time from typing import Any, AsyncGenerator, Optional, Tuple from uuid import UUID -from fastapi import HTTPException import asyncpg from asyncpg.exceptions import PostgresError, UndefinedTableError +from fastapi import HTTPException from core.base import ( CommunityReport, diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 655473192..f5a524ef1 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -137,7 +137,7 @@ class PostgresDBProvider(DatabaseProvider): PostgresConnectionManager() ) self.document_handler = PostgresDocumentHandler( - self.project_name, self.connection_manager + self.project_name, self.connection_manager, self.dimension ) self.token_handler = PostgresTokenHandler( self.project_name, self.connection_manager diff --git a/py/core/providers/database/prompts/default_summary.yaml b/py/core/providers/database/prompts/default_summary.yaml new file mode 100644 index 000000000..2fcd0d899 --- /dev/null +++ b/py/core/providers/database/prompts/default_summary.yaml @@ -0,0 +1,18 @@ +default_summary: + template: > + ## Task: + + Your task is to generate a descriptive summary of the document that follows. Your objective is to return a summary that is roughly 10% of the input document size while retaining as many key points as possible. Your response should begin with `The document contains `. + + ### Document: + + {document} + + + ### Query: + + Reminder: Your task is to generate a descriptive summary of the document that was given. Your objective is to return a summary that is roughly 10% of the input document size while retaining as many key points as possible. Your response should begin with `The document contains `. + + ## Response: + input_types: + document: str diff --git a/py/core/providers/database/prompts/system.yaml b/py/core/providers/database/prompts/default_system.yaml similarity index 100% rename from py/core/providers/database/prompts/system.yaml rename to py/core/providers/database/prompts/default_system.yaml diff --git a/py/core/providers/database/user.py b/py/core/providers/database/user.py index 41654ff04..0c45d761a 100644 --- a/py/core/providers/database/user.py +++ b/py/core/providers/database/user.py @@ -1,6 +1,7 @@ from datetime import datetime from typing import Optional, Union from uuid import UUID + from fastapi import HTTPException from core.base import CryptoProvider, UserHandler diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 600d5c5e2..4ee807922 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -9,16 +9,15 @@ from uuid import UUID import numpy as np from core.base import ( - DocumentSearchSettings, IndexArgsHNSW, IndexArgsIVFFlat, IndexMeasure, IndexMethod, + SearchSettings, VectorEntry, VectorHandler, VectorQuantizationType, VectorSearchResult, - VectorSearchSettings, VectorTableName, ) @@ -144,7 +143,6 @@ class PostgresVectorHandler(VectorHandler): CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} (document_id); CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} (user_id); CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} USING GIN (collection_ids); - CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} USING GIN (to_tsvector('english', text)); """ if self.enable_fts: query += f""" @@ -284,7 +282,7 @@ class PostgresVectorHandler(VectorHandler): await self.connection_manager.execute_many(query, params) async def semantic_search( - self, query_vector: list[float], search_settings: VectorSearchSettings + self, query_vector: list[float], search_settings: SearchSettings ) -> list[VectorSearchResult]: try: imeasure_obj = IndexMeasure(search_settings.index_measure) @@ -429,7 +427,7 @@ class PostgresVectorHandler(VectorHandler): ] async def full_text_search( - self, query_text: str, search_settings: VectorSearchSettings + self, query_text: str, search_settings: SearchSettings ) -> list[VectorSearchResult]: if not self.enable_fts: raise ValueError( @@ -491,7 +489,7 @@ class PostgresVectorHandler(VectorHandler): self, query_text: str, query_vector: list[float], - search_settings: VectorSearchSettings, + search_settings: SearchSettings, *args, **kwargs, ) -> list[VectorSearchResult]: @@ -1196,140 +1194,6 @@ class PostgresVectorHandler(VectorHandler): for r in results ] - async def search_documents( - self, - query_text: str, - settings: DocumentSearchSettings, - ) -> list[dict[str, Any]]: - """ - Search for documents based on their metadata fields and/or body text. - Joins with document_info table to get complete document metadata. - - Args: - query_text (str): The search query text - settings (DocumentSearchSettings): Search settings including search preferences and filters - - Returns: - list[dict[str, Any]]: List of documents with their search scores and complete metadata - """ - where_clauses = [] - params: list[Union[str, int, bytes]] = [query_text] - - # Build the dynamic metadata field search expression - metadata_fields_expr = " || ' ' || ".join( - [ - f"COALESCE(v.metadata->>{psql_quote_literal(key)}, '')" - for key in settings.metadata_keys - ] - ) - - query = f""" - WITH - -- Metadata search scores - metadata_scores AS ( - SELECT DISTINCT ON (v.document_id) - v.document_id, - d.metadata as doc_metadata, - CASE WHEN $1 = '' THEN 0.0 - ELSE - ts_rank_cd( - setweight(to_tsvector('english', {metadata_fields_expr}), 'A'), - websearch_to_tsquery('english', $1), - 32 - ) - END as metadata_rank - FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} v - LEFT JOIN {self._get_table_name('document_info')} d ON v.document_id = d.document_id - WHERE v.metadata IS NOT NULL - ), - -- Body search scores - body_scores AS ( - SELECT - document_id, - AVG( - ts_rank_cd( - setweight(to_tsvector('english', COALESCE(text, '')), 'B'), - websearch_to_tsquery('english', $1), - 32 - ) - ) as body_rank - FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - WHERE $1 != '' - {f"AND to_tsvector('english', text) @@ websearch_to_tsquery('english', $1)" if settings.search_over_body else ""} - GROUP BY document_id - ), - -- Combined scores with document metadata - combined_scores AS ( - SELECT - COALESCE(m.document_id, b.document_id) as document_id, - m.doc_metadata as metadata, - COALESCE(m.metadata_rank, 0) as debug_metadata_rank, - COALESCE(b.body_rank, 0) as debug_body_rank, - CASE - WHEN {str(settings.search_over_metadata).lower()} AND {str(settings.search_over_body).lower()} THEN - COALESCE(m.metadata_rank, 0) * {settings.metadata_weight} + COALESCE(b.body_rank, 0) * {settings.title_weight} - WHEN {str(settings.search_over_metadata).lower()} THEN - COALESCE(m.metadata_rank, 0) - WHEN {str(settings.search_over_body).lower()} THEN - COALESCE(b.body_rank, 0) - ELSE 0 - END as rank - FROM metadata_scores m - FULL OUTER JOIN body_scores b ON m.document_id = b.document_id - WHERE ( - ($1 = '') OR - ({str(settings.search_over_metadata).lower()} AND m.metadata_rank > 0) OR - ({str(settings.search_over_body).lower()} AND b.body_rank > 0) - ) - """ - - # Add any additional filters - if settings.filters: - filter_clause = self._build_filters(settings.filters, params) - where_clauses.append(filter_clause) - - if where_clauses: - query += f" AND {' AND '.join(where_clauses)}" - - query += """ - ) - SELECT - document_id, - metadata, - rank as score, - debug_metadata_rank, - debug_body_rank - FROM combined_scores - WHERE rank > 0 - ORDER BY rank DESC - OFFSET ${offset_param} LIMIT ${limit_param} - """.format( - offset_param=len(params) + 1, - limit_param=len(params) + 2, - ) - - # Add offset and limit to params - params.extend([settings.offset, settings.limit]) - - # Execute query - results = await self.connection_manager.fetch_query(query, params) - - # Format results with complete document metadata - return [ - { - "document_id": str(r["document_id"]), - "metadata": ( - json.loads(r["metadata"]) - if isinstance(r["metadata"], str) - else r["metadata"] - ), - "score": float(r["score"]), - "debug_metadata_rank": float(r["debug_metadata_rank"]), - "debug_body_rank": float(r["debug_body_rank"]), - } - for r in results - ] - def _get_index_options( self, method: IndexMethod, diff --git a/py/core/providers/embeddings/litellm.py b/py/core/providers/embeddings/litellm.py index c782e6577..8e6e579b2 100644 --- a/py/core/providers/embeddings/litellm.py +++ b/py/core/providers/embeddings/litellm.py @@ -1,7 +1,11 @@ import logging +import os +from copy import copy from typing import Any import litellm +import requests +from aiohttp import ClientError, ClientSession from litellm import AuthenticationError, aembedding, embedding from core.base import ( @@ -36,10 +40,21 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider): raise ValueError( "LiteLLMEmbeddingProvider must be initialized with provider `litellm`." ) + + self.rerank_url = None if config.rerank_model: - raise ValueError( - "LiteLLMEmbeddingProvider does not support separate reranking." - ) + + if "huggingface" not in config.rerank_model: + raise ValueError( + "LiteLLMEmbeddingProvider only supports re-ranking via the HuggingFace text-embeddings-inference API" + ) + + url = os.getenv("HUGGINGFACE_API_BASE") or config.rerank_url + if not url: + raise ValueError( + "LiteLLMEmbeddingProvider requires a valid reranking API url to be set via `embedding.rerank_url` in the r2r.toml, or via the environment variable `HUGGINGFACE_API_BASE`." + ) + self.rerank_url = url self.base_model = config.base_model if "amazon" in self.base_model: @@ -182,4 +197,106 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider): stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, limit: int = 10, ): - return results[:limit] + if self.config.rerank_model is not None: + if not self.rerank_url: + raise ValueError( + "Error, `rerank_url` was expected to be set inside LiteLLMEmbeddingProvider" + ) + + texts = [result.text for result in results] + + payload = { + "query": query, + "texts": texts, + "model-id": self.config.rerank_model.split("huggingface/")[1], + } + + headers = {"Content-Type": "application/json"} + + try: + response = requests.post( + self.rerank_url, json=payload, headers=headers + ) + response.raise_for_status() + reranked_results = response.json() + + # Copy reranked results into new array + scored_results = [] + for rank_info in reranked_results: + original_result = results[rank_info["index"]] + copied_result = copy(original_result) + # Inject the reranking score into the result object + copied_result.score = rank_info["score"] + scored_results.append(copied_result) + + # Return only the VectorSearchResult objects, limited to specified count + return scored_results[:limit] + + except requests.RequestException as e: + logger.error(f"Error during reranking: {str(e)}") + # Fall back to returning the original results if reranking fails + return results[:limit] + else: + return results[:limit] + + async def arerank( + self, + query: str, + results: list[VectorSearchResult], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, + limit: int = 10, + ) -> list[VectorSearchResult]: + """ + Asynchronously rerank search results using the configured rerank model. + + Args: + query: The search query string + results: List of VectorSearchResult objects to rerank + stage: The pipeline stage (must be RERANK) + limit: Maximum number of results to return + + Returns: + List of reranked VectorSearchResult objects, limited to specified count + """ + if self.config.rerank_model is not None: + if not self.rerank_url: + raise ValueError( + "Error, `rerank_url` was expected to be set inside LiteLLMEmbeddingProvider" + ) + + texts = [result.text for result in results] + + payload = { + "query": query, + "texts": texts, + "model-id": self.config.rerank_model.split("huggingface/")[1], + } + + headers = {"Content-Type": "application/json"} + + try: + async with ClientSession() as session: + async with session.post( + self.rerank_url, json=payload, headers=headers + ) as response: + response.raise_for_status() + reranked_results = await response.json() + + # Copy reranked results into new array + scored_results = [] + for rank_info in reranked_results: + original_result = results[rank_info["index"]] + copied_result = copy(original_result) + # Inject the reranking score into the result object + copied_result.score = rank_info["score"] + scored_results.append(copied_result) + + # Return only the VectorSearchResult objects, limited to specified count + return scored_results[:limit] + + except (ClientError, Exception) as e: + logger.error(f"Error during async reranking: {str(e)}") + # Fall back to returning the original results if reranking fails + return results[:limit] + else: + return results[:limit] diff --git a/py/core/providers/embeddings/ollama.py b/py/core/providers/embeddings/ollama.py index e838b8842..c054e846d 100644 --- a/py/core/providers/embeddings/ollama.py +++ b/py/core/providers/embeddings/ollama.py @@ -183,3 +183,12 @@ class OllamaEmbeddingProvider(EmbeddingProvider): limit: int = 10, ) -> list[VectorSearchResult]: return results[:limit] + + async def arerank( + self, + query: str, + results: list[VectorSearchResult], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, + limit: int = 10, + ): + return results[:limit] diff --git a/py/core/providers/embeddings/openai.py b/py/core/providers/embeddings/openai.py index 3d3986c22..7aeb72ae6 100644 --- a/py/core/providers/embeddings/openai.py +++ b/py/core/providers/embeddings/openai.py @@ -224,6 +224,15 @@ class OpenAIEmbeddingProvider(EmbeddingProvider): ): return results[:limit] + async def arerank( + self, + query: str, + results: list[VectorSearchResult], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, + limit: int = 10, + ): + return results[:limit] + def tokenize_string(self, text: str, model: str) -> list[int]: try: import tiktoken diff --git a/py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py b/py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py new file mode 100644 index 000000000..697d19009 --- /dev/null +++ b/py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py @@ -0,0 +1,304 @@ +"""migrate_to_document_search + +Revision ID: 2fac23e4d91b +Revises: +Create Date: 2024-11-11 11:55:49.461015 + +""" + +import asyncio +import json +import os +from concurrent.futures import ThreadPoolExecutor +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from openai import AsyncOpenAI +from sqlalchemy import inspect +from sqlalchemy.types import UserDefinedType + +from r2r import R2RAsyncClient + +# revision identifiers, used by Alembic. +revision: str = "2fac23e4d91b" +down_revision: Union[str, None] = "d342e632358a" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +project_name = os.getenv("R2R_PROJECT_NAME") +if not project_name: + raise ValueError( + "Environment variable `R2R_PROJECT_NAME` must be provided migrate, it should be set equal to the value of `project_name` in your `r2r.toml`." + ) + +dimension = os.getenv("R2R_EMBEDDING_DIMENSION") +if not dimension: + raise ValueError( + "Environment variable `R2R_EMBEDDING_DIMENSION` must be provided migrate, it must should be set equal to the value of `base_dimension` in your `r2r.toml`." + ) + + +class Vector(UserDefinedType): + def get_col_spec(self, **kw): + return f"vector({dimension})" + + +def run_async(coroutine): + """Helper function to run async code synchronously""" + with ThreadPoolExecutor() as pool: + return pool.submit(asyncio.run, coroutine).result() + + +async def async_generate_all_summaries(): + """Asynchronous function to generate summaries""" + + base_url = os.getenv("R2R_BASE_URL") + if not base_url: + raise ValueError( + "Environment variable `R2R_BASE_URL` must be provided, it must point at the R2R deployment you wish to migrate, e.g. `http://localhost:7272`." + ) + + print(f"Using R2R Base URL: {base_url})") + + base_model = os.getenv("R2R_BASE_MODEL") + if not base_model: + raise ValueError( + "Environment variable `R2R_BASE_MODEL` must be provided, e.g. `openai/gpt-4o-mini`, it will be used for generating document summaries during migration." + ) + + print(f"Using R2R Base Model: {base_model}") + + client = R2RAsyncClient(base_url) + + offset = 0 + limit = 1_000 + documents = (await client.documents_overview(offset=offset, limit=limit))[ + "results" + ] + while len(documents) == limit: + limit += offset + documents += ( + await client.documents_overview(offset=offset, limit=limit) + )["results"] + + # Load existing summaries if they exist + document_summaries = {} + if os.path.exists("document_summaries.json"): + try: + with open("document_summaries.json", "r") as f: + document_summaries = json.load(f) + print( + f"Loaded {len(document_summaries)} existing document summaries" + ) + except json.JSONDecodeError: + print( + "Existing document_summaries.json was invalid, starting fresh" + ) + document_summaries = {} + + for document in documents: + title = document["title"] + doc_id = str( + document["id"] + ) # Convert UUID to string for JSON compatibility + + # Skip if document already has a summary + if doc_id in document_summaries: + print( + f"Skipping document {title} ({doc_id}) - summary already exists" + ) + continue + + print(f"Processing document: {title} ({doc_id})") + + try: + document_text = f"Document Title:{title}\n" + if document["metadata"]: + metadata = json.dumps(document["metadata"]) + document_text += f"Document Metadata:\n{metadata}\n" + + full_chunks = ( + await client.document_chunks(document["id"], limit=10) + )["results"] + + document_text += "Document Content:\n" + + for chunk in full_chunks: + document_text += chunk["text"] + + summary_prompt = """## Task: + + Your task is to generate a descriptive summary of the document that follows. Your objective is to return a summary that is roughly 10% of the input document size while retaining as many key points as possible. Your response should begin with `The document contains `. + + ### Document: + + {document} + + + ### Query: + + Reminder: Your task is to generate a descriptive summary of the document that was given. Your objective is to return a summary that is roughly 10% of the input document size while retaining as many key points as possible. Your response should begin with `The document contains `. + + ## Response:""" + + messages = [ + { + "role": "user", + "content": summary_prompt.format( + **{"document": document_text} + ), + } + ] + print("Making completion") + summary = await client.completion( + messages=messages, generation_config={"model": base_model} + ) + summary_text = summary["results"]["choices"][0]["message"][ + "content" + ] + embedding_vector = client.embedding(summary_text)["results"][0] + # embedding_response = await openai_client.embeddings.create( + # model=embedding_model, input=summary_text, dimensions=dimension + # ) + # embedding_vector = embedding_response.data[0].embedding + + # Store in our results dictionary + document_summaries[doc_id] = { + "summary": summary_text, + "embedding": embedding_vector, + } + + # Save after each document + with open("document_summaries.json", "w") as f: + json.dump(document_summaries, f) + + print(f"Successfully processed document {doc_id}") + + except Exception as e: + print(f"Error processing document {doc_id}: {str(e)}") + # Continue with next document instead of failing + continue + + return document_summaries + + +def generate_all_summaries(): + """Synchronous wrapper for async_generate_all_summaries""" + return run_async(async_generate_all_summaries()) + + +def check_if_upgrade_needed(): + """Check if the upgrade has already been applied by checking for summary column""" + # Get database connection + connection = op.get_bind() + inspector = inspect(connection) + + # Check if the columns exist + existing_columns = [ + col["name"] + for col in inspector.get_columns(f"document_info", schema=project_name) + ] + + needs_upgrade = "summary" not in existing_columns + + if needs_upgrade: + print( + "Migration needed: 'summary' column does not exist in document_info table" + ) + else: + print( + "Migration not needed: 'summary' column already exists in document_info table" + ) + + return needs_upgrade + + +def upgrade() -> None: + if check_if_upgrade_needed(): + # Load the document summaries + generate_all_summaries() + try: + with open("document_summaries.json", "r") as f: + document_summaries = json.load(f) + print(f"Loaded {len(document_summaries)} document summaries") + except FileNotFoundError: + raise ValueError( + "document_summaries.json not found. Please run the summary generation script first." + ) + except json.JSONDecodeError: + raise ValueError("Invalid document_summaries.json file") + + # Create the vector extension if it doesn't exist + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + + # Add new columns to document_info + op.add_column( + "document_info", + sa.Column("summary", sa.Text(), nullable=True), + schema=project_name, + ) + + op.add_column( + "document_info", + sa.Column("summary_embedding", Vector, nullable=True), + schema=project_name, + ) + + # Add generated column for full text search + op.execute( + f""" + ALTER TABLE {project_name}.document_info + ADD COLUMN doc_search_vector tsvector + GENERATED ALWAYS AS ( + setweight(to_tsvector('english', COALESCE(title, '')), 'A') || + setweight(to_tsvector('english', COALESCE(summary, '')), 'B') || + setweight(to_tsvector('english', COALESCE((metadata->>'description')::text, '')), 'C') + ) STORED; + """ + ) + + # Create index for full text search + op.execute( + f""" + CREATE INDEX idx_doc_search_{project_name} + ON {project_name}.document_info + USING GIN (doc_search_vector); + """ + ) + + # Update existing documents with summaries and embeddings + for doc_id, doc_data in document_summaries.items(): + # Convert the embedding array to the PostgreSQL vector format + embedding_str = ( + f"[{','.join(str(x) for x in doc_data['embedding'])}]" + ) + + # Use plain SQL with proper escaping for PostgreSQL + op.execute( + f""" + UPDATE {project_name}.document_info + SET + summary = '{doc_data['summary'].replace("'", "''")}', + summary_embedding = '{embedding_str}'::vector({dimension}) + WHERE document_id = '{doc_id}'::uuid; + """ + ) + + +def downgrade() -> None: + # First drop any dependencies on the columns we want to remove + op.execute( + f""" + -- Drop the full text search index first + DROP INDEX IF EXISTS {project_name}.idx_doc_search_{project_name}; + + -- Drop the generated column that depends on the summary column + ALTER TABLE {project_name}.document_info + DROP COLUMN IF EXISTS doc_search_vector; + """ + ) + + # Now we can safely drop the summary and embedding columns + op.drop_column("document_info", "summary_embedding", schema=project_name) + op.drop_column("document_info", "summary", schema=project_name) diff --git a/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py b/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py index e6dca0c4d..d2a1a7013 100644 --- a/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py +++ b/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py @@ -11,6 +11,7 @@ from typing import Sequence, Union import sqlalchemy as sa from alembic import op +from sqlalchemy import inspect from sqlalchemy.dialects import postgresql from sqlalchemy.types import UserDefinedType @@ -32,115 +33,142 @@ class Vector(UserDefinedType): return "vector" +def check_if_upgrade_needed(): + """Check if the upgrade has already been applied""" + # Get database connection + connection = op.get_bind() + inspector = inspect(connection) + + # Check if the new vectors table exists + has_new_table = inspector.has_table( + new_vector_table_name, schema=project_name + ) + + if has_new_table: + print( + f"Migration not needed: '{new_vector_table_name}' table already exists" + ) + return False + + print(f"Migration needed: '{new_vector_table_name}' table does not exist") + return True + + def upgrade() -> None: - # Create required extensions - op.execute("CREATE EXTENSION IF NOT EXISTS vector") - op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") - op.execute("CREATE EXTENSION IF NOT EXISTS btree_gin") + if check_if_upgrade_needed(): + # Create required extensions + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") + op.execute("CREATE EXTENSION IF NOT EXISTS btree_gin") - # KG table migrations - op.execute( - f"ALTER TABLE IF EXISTS {project_name}.entity_raw RENAME TO chunk_entity" - ) - op.execute( - f"ALTER TABLE IF EXISTS {project_name}.triple_raw RENAME TO chunk_triple" - ) - op.execute( - f"ALTER TABLE IF EXISTS {project_name}.entity_embedding RENAME TO document_entity" - ) - op.execute( - f"ALTER TABLE IF EXISTS {project_name}.community RENAME TO community_info" - ) + # KG table migrations + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.entity_raw RENAME TO chunk_entity" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.triple_raw RENAME TO chunk_triple" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.entity_embedding RENAME TO document_entity" + ) + op.execute( + f"ALTER TABLE IF EXISTS {project_name}.community RENAME TO community_info" + ) - # Create the new table - op.create_table( - new_vector_table_name, - sa.Column("extraction_id", postgresql.UUID(), nullable=False), - sa.Column("document_id", postgresql.UUID(), nullable=False), - sa.Column("user_id", postgresql.UUID(), nullable=False), - sa.Column( - "collection_ids", - postgresql.ARRAY(postgresql.UUID()), - server_default="{}", - ), - sa.Column("vec", Vector), # This will be handled as a vector type - sa.Column("text", sa.Text(), nullable=True), - sa.Column( - "fts", - postgresql.TSVECTOR, - nullable=False, - server_default=sa.text("to_tsvector('english'::regconfig, '')"), - ), - sa.Column( - "metadata", postgresql.JSONB(), server_default="{}", nullable=False - ), - sa.PrimaryKeyConstraint("extraction_id"), - schema=project_name, - ) + # Create the new table + op.create_table( + new_vector_table_name, + sa.Column("extraction_id", postgresql.UUID(), nullable=False), + sa.Column("document_id", postgresql.UUID(), nullable=False), + sa.Column("user_id", postgresql.UUID(), nullable=False), + sa.Column( + "collection_ids", + postgresql.ARRAY(postgresql.UUID()), + server_default="{}", + ), + sa.Column("vec", Vector), # This will be handled as a vector type + sa.Column("text", sa.Text(), nullable=True), + sa.Column( + "fts", + postgresql.TSVECTOR, + nullable=False, + server_default=sa.text( + "to_tsvector('english'::regconfig, '')" + ), + ), + sa.Column( + "metadata", + postgresql.JSONB(), + server_default="{}", + nullable=False, + ), + sa.PrimaryKeyConstraint("extraction_id"), + schema=project_name, + ) - # Create indices - op.create_index( - "idx_vectors_document_id", - new_vector_table_name, - ["document_id"], - schema=project_name, - ) + # Create indices + op.create_index( + "idx_vectors_document_id", + new_vector_table_name, + ["document_id"], + schema=project_name, + ) - op.create_index( - "idx_vectors_user_id", - new_vector_table_name, - ["user_id"], - schema=project_name, - ) + op.create_index( + "idx_vectors_user_id", + new_vector_table_name, + ["user_id"], + schema=project_name, + ) - op.create_index( - "idx_vectors_collection_ids", - new_vector_table_name, - ["collection_ids"], - schema=project_name, - postgresql_using="gin", - ) + op.create_index( + "idx_vectors_collection_ids", + new_vector_table_name, + ["collection_ids"], + schema=project_name, + postgresql_using="gin", + ) - op.create_index( - "idx_vectors_fts", - new_vector_table_name, - ["fts"], - schema=project_name, - postgresql_using="gin", - ) + op.create_index( + "idx_vectors_fts", + new_vector_table_name, + ["fts"], + schema=project_name, + postgresql_using="gin", + ) - # Migrate data from old table (assuming old table name is 'old_vectors') - # Note: You'll need to replace 'old_schema' and 'old_vectors' with your actual names - op.execute( - f""" - INSERT INTO {project_name}.{new_vector_table_name} - (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) - SELECT - extraction_id, - document_id, - user_id, - collection_ids, - vec, - text, - metadata - FROM {project_name}.{old_vector_table_name} - """ - ) + # Migrate data from old table (assuming old table name is 'old_vectors') + # Note: You'll need to replace 'old_schema' and 'old_vectors' with your actual names + op.execute( + f""" + INSERT INTO {project_name}.{new_vector_table_name} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + SELECT + extraction_id, + document_id, + user_id, + collection_ids, + vec, + text, + metadata + FROM {project_name}.{old_vector_table_name} + """ + ) - # Verify data migration - op.execute( - f""" - SELECT COUNT(*) old_count FROM {project_name}.{old_vector_table_name}; - SELECT COUNT(*) new_count FROM {project_name}.{new_vector_table_name}; - """ - ) + # Verify data migration + op.execute( + f""" + SELECT COUNT(*) old_count FROM {project_name}.{old_vector_table_name}; + SELECT COUNT(*) new_count FROM {project_name}.{new_vector_table_name}; + """ + ) - # If we get here, migration was successful, so drop the old table - op.execute( - f""" - DROP TABLE IF EXISTS {project_name}.{old_vector_table_name}; - """ - ) + # If we get here, migration was successful, so drop the old table + op.execute( + f""" + DROP TABLE IF EXISTS {project_name}.{old_vector_table_name}; + """ + ) def downgrade() -> None: diff --git a/py/poetry.lock b/py/poetry.lock index 4f0eacee6..6594e7d5b 100644 --- a/py/poetry.lock +++ b/py/poetry.lock @@ -136,13 +136,13 @@ speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] [[package]] name = "aiohttp-retry" -version = "2.9.0" +version = "2.9.1" description = "Simple retry client for aiohttp" optional = true python-versions = ">=3.7" files = [ - {file = "aiohttp_retry-2.9.0-py3-none-any.whl", hash = "sha256:7661af92471e9a96c69d9b8f32021360272073397e6a15bc44c1726b12f46056"}, - {file = "aiohttp_retry-2.9.0.tar.gz", hash = "sha256:92c47f1580040208bac95d9a8389a87227ef22758530f2e3f4683395e42c41b5"}, + {file = "aiohttp_retry-2.9.1-py3-none-any.whl", hash = "sha256:66d2759d1921838256a05a3f80ad7e724936f083e35be5abb5e16eed6be6dc54"}, + {file = "aiohttp_retry-2.9.1.tar.gz", hash = "sha256:8eb75e904ed4ee5c2ec242fefe85bf04240f685391c4879d8f541d6028ff01f1"}, ] [package.dependencies] @@ -571,17 +571,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.35.54" +version = "1.35.57" description = "The AWS SDK for Python" optional = true python-versions = ">=3.8" files = [ - {file = "boto3-1.35.54-py3-none-any.whl", hash = "sha256:2d5e160b614db55fbee7981001c54476cb827c441cef65b2fcb2c52a62019909"}, - {file = "boto3-1.35.54.tar.gz", hash = "sha256:7d9c359bbbc858a60b51c86328db813353c8bd1940212cdbd0a7da835291c2e1"}, + {file = "boto3-1.35.57-py3-none-any.whl", hash = "sha256:9edf49640c79a05b0a72f4c2d1e24dfc164344b680535a645f455ac624dc3680"}, + {file = "boto3-1.35.57.tar.gz", hash = "sha256:db58348849a5af061f0f5ec9c3b699da5221ca83354059fdccb798e3ddb6b62a"}, ] [package.dependencies] -botocore = ">=1.35.54,<1.36.0" +botocore = ">=1.35.57,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -590,13 +590,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.54" +version = "1.35.57" description = "Low-level, data-driven core of boto 3." optional = true python-versions = ">=3.8" files = [ - {file = "botocore-1.35.54-py3-none-any.whl", hash = "sha256:9cca1811094b6cdc144c2c063a3ec2db6d7c88194b04d4277cd34fc8e3473aff"}, - {file = "botocore-1.35.54.tar.gz", hash = "sha256:131bb59ce59c8a939b31e8e647242d70cf11d32d4529fa4dca01feea1e891a76"}, + {file = "botocore-1.35.57-py3-none-any.whl", hash = "sha256:92ddd02469213766872cb2399269dd20948f90348b42bf08379881d5e946cc34"}, + {file = "botocore-1.35.57.tar.gz", hash = "sha256:d96306558085baf0bcb3b022d7a8c39c93494f031edb376694d2b2dcd0e81327"}, ] [package.dependencies] @@ -2064,13 +2064,13 @@ scipy = ">=1.4.0" [[package]] name = "identify" -version = "2.6.1" +version = "2.6.2" description = "File identification library for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "identify-2.6.1-py2.py3-none-any.whl", hash = "sha256:53863bcac7caf8d2ed85bd20312ea5dcfc22226800f6d6881f232d861db5a8f0"}, - {file = "identify-2.6.1.tar.gz", hash = "sha256:91478c5fb7c3aac5ff7bf9b4344f803843dc586832d5f110d672b19aa1984c98"}, + {file = "identify-2.6.2-py2.py3-none-any.whl", hash = "sha256:c097384259f49e372f4ea00a19719d95ae27dd5ff0fd77ad630aa891306b82f3"}, + {file = "identify-2.6.2.tar.gz", hash = "sha256:fab5c716c24d7a789775228823797296a2994b075fb6080ac83a102772a98cbd"}, ] [package.extras] @@ -2433,13 +2433,13 @@ files = [ [[package]] name = "litellm" -version = "1.51.3" +version = "1.52.3" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.51.3-py3-none-any.whl", hash = "sha256:440d3c7cc5ab8eeb12cee8f4d806bff05b7db834ebc11117d7fa070a1142ced5"}, - {file = "litellm-1.51.3.tar.gz", hash = "sha256:31eff9fcbf7b058bac0fd7432c4ea0487e8555f12446a1f30e5862e33716f44d"}, + {file = "litellm-1.52.3-py3-none-any.whl", hash = "sha256:fc8d5d53ba184cd570ae50d9acefa53c521225b62244adedea129794e98828b6"}, + {file = "litellm-1.52.3.tar.gz", hash = "sha256:4718235cbd6dea8db99b08e884a07f7ac7fad4a4b12597e20d8ff622295e1e05"}, ] [package.dependencies] @@ -2448,7 +2448,7 @@ click = "*" importlib-metadata = ">=6.8.0" jinja2 = ">=3.1.2,<4.0.0" jsonschema = ">=4.22.0,<5.0.0" -openai = ">=1.52.0" +openai = ">=1.54.0" pydantic = ">=2.0.0,<3.0.0" python-dotenv = ">=0.2.0" requests = ">=2.31.0,<3.0.0" @@ -3173,13 +3173,13 @@ httpx = ">=0.27.0,<0.28.0" [[package]] name = "openai" -version = "1.53.1" +version = "1.54.3" description = "The official Python library for the openai API" optional = false -python-versions = ">=3.7.1" +python-versions = ">=3.8" files = [ - {file = "openai-1.53.1-py3-none-any.whl", hash = "sha256:b26bc2d91eda8a9317ebecddfbd388b3698f89fa56d78672dd115a1ccc175722"}, - {file = "openai-1.53.1.tar.gz", hash = "sha256:04b8df362e7e2af75c8a3bcd105a5abb3837ce883e2fa3cb8d922cb8ee3515ac"}, + {file = "openai-1.54.3-py3-none-any.whl", hash = "sha256:f18dbaf09c50d70c4185b892a2a553f80681d1d866323a2da7f7be2f688615d5"}, + {file = "openai-1.54.3.tar.gz", hash = "sha256:7511b74eeb894ac0b0253dc71f087a15d2e4d71d22d0088767205143d880cca6"}, ] [package.dependencies] @@ -3225,13 +3225,13 @@ dev = ["black", "mypy", "pytest"] [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -3518,13 +3518,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "postgrest" -version = "0.17.2" +version = "0.18.0" description = "PostgREST client for Python. This library provides an ORM interface to PostgREST." optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "postgrest-0.17.2-py3-none-any.whl", hash = "sha256:f7c4f448e5a5e2d4c1dcf192edae9d1007c4261e9a6fb5116783a0046846ece2"}, - {file = "postgrest-0.17.2.tar.gz", hash = "sha256:445cd4e4a191e279492549df0c4e827d32f9d01d0852599bb8a6efb0f07fcf78"}, + {file = "postgrest-0.18.0-py3-none-any.whl", hash = "sha256:200baad0d23fee986b3a0ffd3e07bfe0cdd40e09760f11e8e13a6c0c2376d5fa"}, + {file = "postgrest-0.18.0.tar.gz", hash = "sha256:29c1a94801a17eb9ad590189993fe5a7a6d8c1bfc11a3c9d0ce7ba146454ebb3"}, ] [package.dependencies] @@ -3558,52 +3558,52 @@ test = ["coverage", "django", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)" [[package]] name = "pot" -version = "0.9.4" +version = "0.9.5" description = "Python Optimal Transport Library" optional = true python-versions = ">=3.7" files = [ - {file = "POT-0.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8418ab9c24f549290fdc452caebb58ded05b986a024063fe3354cfd2e704b378"}, - {file = "POT-0.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:096cd3b454f87ff9c8f48d8e221bc26509d8f9355ce99d9fefe83560f82278b5"}, - {file = "POT-0.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6e67d420a479ed66f4549c785e157bb3dce2c5489bf81a44ac922a6e9471fe69"}, - {file = "POT-0.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:107bc7225906a3fa3aafdb441e1d24c55eaf1ee3badd1c93ab6199865f689221"}, - {file = "POT-0.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfee044f744b1722912654c8b905bc289ce160524db5ca0e853f1aa442ffea55"}, - {file = "POT-0.9.4-cp310-cp310-win32.whl", hash = "sha256:421c3efb8da2f1ce9605f9f2068047ea629b95de87baa15b8786b3e664de9dbd"}, - {file = "POT-0.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:6e76194a62f29ddadc975e18cf7f07d22060735bd3fb9a023781e0e126a05822"}, - {file = "POT-0.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:148040b89063790ab784458d5d200ba4a7f36c54fdb62ea0842f8d5d4c5c6ccb"}, - {file = "POT-0.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1109fc77989834a1467be731ff957d90c2b558e772cff6c06fb90f7cbe58b014"}, - {file = "POT-0.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8218cd419e8457b37fe2b8060b5bf9bd07d4671d5f5fa4d5ac98c58b5be8c0"}, - {file = "POT-0.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea0055f18e26917ff326f39dd5e5fd43bcc9eccaab4b09a4f8d7785c8921250"}, - {file = "POT-0.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f03b4af3f56234787d2a34e0637dac9c1e1de4cb3f7386ca4f91331f0c4af187"}, - {file = "POT-0.9.4-cp311-cp311-win32.whl", hash = "sha256:a69f6d554df1de47dd312fc065d9171bdbedf48c90c8409889485945ffaaeacf"}, - {file = "POT-0.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:8791c8f09a852901e03e2dc1c6aec4f374b58b3ee905a90349713587aa16e26a"}, - {file = "POT-0.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1a7a55b3fd528e6808f99de0165dcacf185eb86ae3aff4d358b850479b76a8ba"}, - {file = "POT-0.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a9bbd2507605be6eae4f0f0d6f6f0ff91ce3eeb5b7c8eeb350e4eb76bcc6940a"}, - {file = "POT-0.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5594ab0e78329307ce4cd293f2de409513876695d60fb4c1013b5dd46069f256"}, - {file = "POT-0.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0ca658105d129b752c8d20751ff2cb965d1bdcaecec319ae489b135c58d9da9"}, - {file = "POT-0.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6732f1acd94959b8fa13a4fa250ad49c1e6894ece488a81f4427dbf13df4776"}, - {file = "POT-0.9.4-cp312-cp312-win32.whl", hash = "sha256:bf7f5253fee6ba7df5dd854b739540f701153cabab80dd25332dfac93d84bec1"}, - {file = "POT-0.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:597ff64b06a157871feb84e6e82b3f5dfbfff57161c14660ab2ddbcc93c940e6"}, - {file = "POT-0.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:385b41606943fbc73f1ab96fd994117d79c4ad782c91bbb7ba74c0359e9de887"}, - {file = "POT-0.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3f697e084243b9fe0a975331e471fd09610343c6aa28172232958e39100ede6"}, - {file = "POT-0.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b967fb9cafc6ad30a6d51b21d6307b384491a106d6dc75f37bee52a3f63575c3"}, - {file = "POT-0.9.4-cp37-cp37m-win32.whl", hash = "sha256:35926c2f4d2ee49309dce340f7f6646fe451ca1e0d11b2d017a851d482bf4468"}, - {file = "POT-0.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:f29fa1fcd591d1940e2279dc1422ff46c0c273f6be4ecbcaa819d91dd196573e"}, - {file = "POT-0.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:63f8b50f448e32a4ae274dd1e68e22b1a2bc291c53c5c6ec5afadfb930b6a809"}, - {file = "POT-0.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cabd13a728d2db40b3989ad57810769dfba8972b686ae7f4881dbd315252e5d9"}, - {file = "POT-0.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5984157d5a819bd6c55db3b0d8fe631ff398c243e80a9e9e933cbd1ee7c7588c"}, - {file = "POT-0.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b8da4e3268eeee40dff96364f0a9f0326979d565d970ec74a1688b8ad338022"}, - {file = "POT-0.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ede957083299e4904627621f4d2c8a6b56af108fef9b486330f65519a395f10a"}, - {file = "POT-0.9.4-cp38-cp38-win32.whl", hash = "sha256:79716079d7970c6c0bf909f986c65d7103135e36b364ba5fa5caed97d7aa6464"}, - {file = "POT-0.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:3246493745bcf2b353312183b9ab547466cae711936f991a6754b62f55ff1dec"}, - {file = "POT-0.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:223c4ca199b679e4c2b8a79fb49d31f2c7fab2975c2c37d1e68a0a7fbe2bc55d"}, - {file = "POT-0.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c30d77994d02984ad32665f5975e272e8c02e8d5288c4edfbec08617c5c38f91"}, - {file = "POT-0.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b0fe5be45112c12cc0f6ab61fb85ed9161ca540b24a4037e5d53ab86f390a49"}, - {file = "POT-0.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab68bdfeae54719d202e923f18ec29869c09b105e42f94568221fc92996f0f4d"}, - {file = "POT-0.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2847015e3bb65171eb70eca786f8cebe806960af40625ebc17c858b6314a9e0b"}, - {file = "POT-0.9.4-cp39-cp39-win32.whl", hash = "sha256:2e35d68c6e676108d6eeb7e6b119c4c19dca364992bca57f3f513660bfb1810c"}, - {file = "POT-0.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:e7d029f024ed58f8d10b3e4d517df551bb9758ac12d0503be793f44258f2dffc"}, - {file = "pot-0.9.4.tar.gz", hash = "sha256:4cf8b46bf4992c37529161c32dd5e3334e0c089399142f08ed6d455b57015edd"}, + {file = "POT-0.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:34d766c38e65a69c087b01a854fe89fbd152c3e8af93da2227b6c40aed6d37b9"}, + {file = "POT-0.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5407377256de11b6fdc94bbba9b50ea5a2301570905fc9014541cc8473806d9"}, + {file = "POT-0.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f37039cd356198c1fb994e7d935b9bf75d44f2a40319d298bf8cc149eb360d5"}, + {file = "POT-0.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00a18427c9abdd107a2285ea0a814c6b22e95a1af8f88a37c56f23cd216f7a6b"}, + {file = "POT-0.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0dc608cea1107289a58dec33cddc1b0a3fea77ff36d66e2c8ac7aeea543969a"}, + {file = "POT-0.9.5-cp310-cp310-win32.whl", hash = "sha256:8312bee055389db47adab063749c8d77b5981534177ca6cd9b91e4fb68f69d00"}, + {file = "POT-0.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:043706d69202ac87e140121ba32ed1b038f2b3fc4a5549586187239a583cd50d"}, + {file = "POT-0.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b5f000da00e408ff781672a4895bfa8daacec055bd534c9e66ead479f3c6d83c"}, + {file = "POT-0.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9eddd9ff29bdb17d4db8ba00ba18d42656c694a128591502bf59afc1369e1bb3"}, + {file = "POT-0.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7eb9b88c73387a9966775a6f6d077d9d071814783701d2656dc05b5032a9662d"}, + {file = "POT-0.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f44446056f5fc9d132ed8e431732c33cbe754fb1e6d73636f1b6ae811be7df"}, + {file = "POT-0.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7f5d27bc9063e01b03d906bb77e7b3428065fdd72ed64233b249584ead2e2bf"}, + {file = "POT-0.9.5-cp311-cp311-win32.whl", hash = "sha256:cd79a8b4d35b706f2124f73ebff3bb1ce3450e01cc8f610eda3b6ce13616b829"}, + {file = "POT-0.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:6680aadb69df2f75a413fe9c58bd1c5cb744d017a7c8ba8841654fd0dc75433b"}, + {file = "POT-0.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7d57f96b333c9816a2af7817753108739b38155e52648c5967681dbd89d92ed2"}, + {file = "POT-0.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:afad647c78f999439f8c5cbcf74b03c5c0afefb08727cd7d68994130fabfc761"}, + {file = "POT-0.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bca891c28592d6e0e8f04b35989de7005f0fb9b3923f00537f1b269c5084aa7b"}, + {file = "POT-0.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:088c930a5fcd1e8e36fb6af710df47ce6e9331b6b5a28eb09c673df4186dcb10"}, + {file = "POT-0.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfb18268fac1e982e21821a03f802802a0d579c4690988b764115dd886dc38f5"}, + {file = "POT-0.9.5-cp312-cp312-win32.whl", hash = "sha256:931fa46ff8e01d47309207243988c783a2d8364452bc080b130c5d319349ad3f"}, + {file = "POT-0.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:be786612b391c2e4d3b5db4e7d51cdb2360284e3a6949990051c2eb102f60d3c"}, + {file = "POT-0.9.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:844820020240bad66ca07255289df9ed1e46c5f71ba2401852833c0dd114c660"}, + {file = "POT-0.9.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a76a5bed3af51db1a10c59ba376f500a743f8e20c2a6d4851c4535dbbed17714"}, + {file = "POT-0.9.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a03da3283cb04a1fa3258f0096ad9cfa3311192d5a6bee3a2ca0e15304f8652"}, + {file = "POT-0.9.5-cp37-cp37m-win32.whl", hash = "sha256:dc50b8005b4dfa3478f0bf841c22d8b3500a8a04e5673da146d71f7039607e3a"}, + {file = "POT-0.9.5-cp37-cp37m-win_amd64.whl", hash = "sha256:a9cab787bcb3ce6d23ef297c115baad34ed578a98b4a02afba8cb4e30e39d171"}, + {file = "POT-0.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:926ba491b5b1f43fb0f3bc6e9d92b6cc634c12e2fa778eba88d9350e82fc2c88"}, + {file = "POT-0.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b77b630a303868ee14015a4306d7e852b174d4a734815c67e27cd45fd59cc07"}, + {file = "POT-0.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:db0dd974328cbdd7b20477fb5757326dda22d77cb639f4759296fcd206db380f"}, + {file = "POT-0.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb29c375d02bb5aadad527133e9c20dd73930d8e2294434dc5306fb740a49d9e"}, + {file = "POT-0.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:293e0993d66b09db69c2282edbf859e1de57a3f15b99bd909609ce120380b398"}, + {file = "POT-0.9.5-cp38-cp38-win32.whl", hash = "sha256:5996d538885b834e36a3838bc73adeb747bd54ab0a2b3178addbb35b3edafa45"}, + {file = "POT-0.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:0131aab58d57bf5876d826461d0968d1a655b611cc8c0297c38ab8a235e0d627"}, + {file = "POT-0.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:95c29ee3e647b272bfcb35c3c4cb7409326a0a6d3bf3ed8460495e9ac3f3a76d"}, + {file = "POT-0.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b1bca1b3465eadab9d5e1c075122963da3e921102555d1c6b7ff3c1f437d3e18"}, + {file = "POT-0.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e64f5d8890e21eb1e7decac694c34820496238e7d9c95309411e58cb0b04d384"}, + {file = "POT-0.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fa190662670868126a2372499aec513bd4ac50b4565fe2014525c7cef11e2bf"}, + {file = "POT-0.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9b775daf69cb4043897050961f9b654c30261543e531d53248a99e5599db0c8"}, + {file = "POT-0.9.5-cp39-cp39-win32.whl", hash = "sha256:ceea4cffebce88211cd63bfddc878e2f29a6b6347125cbac40fa214308315878"}, + {file = "POT-0.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:2f6af660505772833d4ccc189d9de264b429d9ec8e0cb564f33d2181e6f1bbce"}, + {file = "pot-0.9.5.tar.gz", hash = "sha256:9644ee7ff51c3cffa3c2632b9dd9dff4f3520266f9fb771450935ffb646d6042"}, ] [package.dependencies] @@ -4457,105 +4457,105 @@ rpds-py = ">=0.7.0" [[package]] name = "regex" -version = "2024.9.11" +version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" files = [ - {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"}, - {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d"}, - {file = "regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a"}, - {file = "regex-2024.9.11-cp310-cp310-win32.whl", hash = "sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0"}, - {file = "regex-2024.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1"}, - {file = "regex-2024.9.11-cp311-cp311-win32.whl", hash = "sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9"}, - {file = "regex-2024.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a"}, - {file = "regex-2024.9.11-cp312-cp312-win32.whl", hash = "sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776"}, - {file = "regex-2024.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8"}, - {file = "regex-2024.9.11-cp313-cp313-win32.whl", hash = "sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8"}, - {file = "regex-2024.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:35f4a6f96aa6cb3f2f7247027b07b15a374f0d5b912c0001418d1d55024d5cb4"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:55b96e7ce3a69a8449a66984c268062fbaa0d8ae437b285428e12797baefce7e"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cb130fccd1a37ed894824b8c046321540263013da72745d755f2d35114b81a60"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:323c1f04be6b2968944d730e5c2091c8c89767903ecaa135203eec4565ed2b2b"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be1c8ed48c4c4065ecb19d882a0ce1afe0745dfad8ce48c49586b90a55f02366"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5b029322e6e7b94fff16cd120ab35a253236a5f99a79fb04fda7ae71ca20ae8"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6fff13ef6b5f29221d6904aa816c34701462956aa72a77f1f151a8ec4f56aeb"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d4af3979376652010e400accc30404e6c16b7df574048ab1f581af82065e4"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:079400a8269544b955ffa9e31f186f01d96829110a3bf79dc338e9910f794fca"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f9268774428ec173654985ce55fc6caf4c6d11ade0f6f914d48ef4719eb05ebb"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:23f9985c8784e544d53fc2930fc1ac1a7319f5d5332d228437acc9f418f2f168"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2941333154baff9838e88aa71c1d84f4438189ecc6021a12c7573728b5838e"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e93f1c331ca8e86fe877a48ad64e77882c0c4da0097f2212873a69bbfea95d0c"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:846bc79ee753acf93aef4184c040d709940c9d001029ceb7b7a52747b80ed2dd"}, - {file = "regex-2024.9.11-cp38-cp38-win32.whl", hash = "sha256:c94bb0a9f1db10a1d16c00880bdebd5f9faf267273b8f5bd1878126e0fbde771"}, - {file = "regex-2024.9.11-cp38-cp38-win_amd64.whl", hash = "sha256:2b08fce89fbd45664d3df6ad93e554b6c16933ffa9d55cb7e01182baaf971508"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35"}, - {file = "regex-2024.9.11-cp39-cp39-win32.whl", hash = "sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142"}, - {file = "regex-2024.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919"}, - {file = "regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"}, + {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"}, + {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"}, + {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"}, + {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"}, + {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"}, + {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"}, + {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"}, + {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"}, + {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"}, + {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"}, + {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"}, + {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"}, + {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"}, ] [[package]] @@ -4595,114 +4595,101 @@ requests = ">=2.0.1,<3.0.0" [[package]] name = "rpds-py" -version = "0.20.1" +version = "0.21.0" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "rpds_py-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a649dfd735fff086e8a9d0503a9f0c7d01b7912a333c7ae77e1515c08c146dad"}, - {file = "rpds_py-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f16bc1334853e91ddaaa1217045dd7be166170beec337576818461268a3de67f"}, - {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14511a539afee6f9ab492b543060c7491c99924314977a55c98bfa2ee29ce78c"}, - {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3ccb8ac2d3c71cda472b75af42818981bdacf48d2e21c36331b50b4f16930163"}, - {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c142b88039b92e7e0cb2552e8967077e3179b22359e945574f5e2764c3953dcf"}, - {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f19169781dddae7478a32301b499b2858bc52fc45a112955e798ee307e294977"}, - {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13c56de6518e14b9bf6edde23c4c39dac5b48dcf04160ea7bce8fca8397cdf86"}, - {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:925d176a549f4832c6f69fa6026071294ab5910e82a0fe6c6228fce17b0706bd"}, - {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:78f0b6877bfce7a3d1ff150391354a410c55d3cdce386f862926a4958ad5ab7e"}, - {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3dd645e2b0dcb0fd05bf58e2e54c13875847687d0b71941ad2e757e5d89d4356"}, - {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4f676e21db2f8c72ff0936f895271e7a700aa1f8d31b40e4e43442ba94973899"}, - {file = "rpds_py-0.20.1-cp310-none-win32.whl", hash = "sha256:648386ddd1e19b4a6abab69139b002bc49ebf065b596119f8f37c38e9ecee8ff"}, - {file = "rpds_py-0.20.1-cp310-none-win_amd64.whl", hash = "sha256:d9ecb51120de61e4604650666d1f2b68444d46ae18fd492245a08f53ad2b7711"}, - {file = "rpds_py-0.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:762703bdd2b30983c1d9e62b4c88664df4a8a4d5ec0e9253b0231171f18f6d75"}, - {file = "rpds_py-0.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0b581f47257a9fce535c4567782a8976002d6b8afa2c39ff616edf87cbeff712"}, - {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:842c19a6ce894493563c3bd00d81d5100e8e57d70209e84d5491940fdb8b9e3a"}, - {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42cbde7789f5c0bcd6816cb29808e36c01b960fb5d29f11e052215aa85497c93"}, - {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c8e9340ce5a52f95fa7d3b552b35c7e8f3874d74a03a8a69279fd5fca5dc751"}, - {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ba6f89cac95c0900d932c9efb7f0fb6ca47f6687feec41abcb1bd5e2bd45535"}, - {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a916087371afd9648e1962e67403c53f9c49ca47b9680adbeef79da3a7811b0"}, - {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:200a23239781f46149e6a415f1e870c5ef1e712939fe8fa63035cd053ac2638e"}, - {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:58b1d5dd591973d426cbb2da5e27ba0339209832b2f3315928c9790e13f159e8"}, - {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6b73c67850ca7cae0f6c56f71e356d7e9fa25958d3e18a64927c2d930859b8e4"}, - {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d8761c3c891cc51e90bc9926d6d2f59b27beaf86c74622c8979380a29cc23ac3"}, - {file = "rpds_py-0.20.1-cp311-none-win32.whl", hash = "sha256:cd945871335a639275eee904caef90041568ce3b42f402c6959b460d25ae8732"}, - {file = "rpds_py-0.20.1-cp311-none-win_amd64.whl", hash = "sha256:7e21b7031e17c6b0e445f42ccc77f79a97e2687023c5746bfb7a9e45e0921b84"}, - {file = "rpds_py-0.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:36785be22066966a27348444b40389f8444671630063edfb1a2eb04318721e17"}, - {file = "rpds_py-0.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:142c0a5124d9bd0e2976089484af5c74f47bd3298f2ed651ef54ea728d2ea42c"}, - {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbddc10776ca7ebf2a299c41a4dde8ea0d8e3547bfd731cb87af2e8f5bf8962d"}, - {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15a842bb369e00295392e7ce192de9dcbf136954614124a667f9f9f17d6a216f"}, - {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be5ef2f1fc586a7372bfc355986226484e06d1dc4f9402539872c8bb99e34b01"}, - {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbcf360c9e3399b056a238523146ea77eeb2a596ce263b8814c900263e46031a"}, - {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd27a66740ffd621d20b9a2f2b5ee4129a56e27bfb9458a3bcc2e45794c96cb"}, - {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0b937b2a1988f184a3e9e577adaa8aede21ec0b38320d6009e02bd026db04fa"}, - {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6889469bfdc1eddf489729b471303739bf04555bb151fe8875931f8564309afc"}, - {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19b73643c802f4eaf13d97f7855d0fb527fbc92ab7013c4ad0e13a6ae0ed23bd"}, - {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3c6afcf2338e7f374e8edc765c79fbcb4061d02b15dd5f8f314a4af2bdc7feb5"}, - {file = "rpds_py-0.20.1-cp312-none-win32.whl", hash = "sha256:dc73505153798c6f74854aba69cc75953888cf9866465196889c7cdd351e720c"}, - {file = "rpds_py-0.20.1-cp312-none-win_amd64.whl", hash = "sha256:8bbe951244a838a51289ee53a6bae3a07f26d4e179b96fc7ddd3301caf0518eb"}, - {file = "rpds_py-0.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6ca91093a4a8da4afae7fe6a222c3b53ee4eef433ebfee4d54978a103435159e"}, - {file = "rpds_py-0.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b9c2fe36d1f758b28121bef29ed1dee9b7a2453e997528e7d1ac99b94892527c"}, - {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f009c69bc8c53db5dfab72ac760895dc1f2bc1b62ab7408b253c8d1ec52459fc"}, - {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6740a3e8d43a32629bb9b009017ea5b9e713b7210ba48ac8d4cb6d99d86c8ee8"}, - {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32b922e13d4c0080d03e7b62991ad7f5007d9cd74e239c4b16bc85ae8b70252d"}, - {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe00a9057d100e69b4ae4a094203a708d65b0f345ed546fdef86498bf5390982"}, - {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49fe9b04b6fa685bd39237d45fad89ba19e9163a1ccaa16611a812e682913496"}, - {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa7ac11e294304e615b43f8c441fee5d40094275ed7311f3420d805fde9b07b4"}, - {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aa97af1558a9bef4025f8f5d8c60d712e0a3b13a2fe875511defc6ee77a1ab7"}, - {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:483b29f6f7ffa6af845107d4efe2e3fa8fb2693de8657bc1849f674296ff6a5a"}, - {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:37fe0f12aebb6a0e3e17bb4cd356b1286d2d18d2e93b2d39fe647138458b4bcb"}, - {file = "rpds_py-0.20.1-cp313-none-win32.whl", hash = "sha256:a624cc00ef2158e04188df5e3016385b9353638139a06fb77057b3498f794782"}, - {file = "rpds_py-0.20.1-cp313-none-win_amd64.whl", hash = "sha256:b71b8666eeea69d6363248822078c075bac6ed135faa9216aa85f295ff009b1e"}, - {file = "rpds_py-0.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:5b48e790e0355865197ad0aca8cde3d8ede347831e1959e158369eb3493d2191"}, - {file = "rpds_py-0.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3e310838a5801795207c66c73ea903deda321e6146d6f282e85fa7e3e4854804"}, - {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249280b870e6a42c0d972339e9cc22ee98730a99cd7f2f727549af80dd5a963"}, - {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e79059d67bea28b53d255c1437b25391653263f0e69cd7dec170d778fdbca95e"}, - {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b431c777c9653e569986ecf69ff4a5dba281cded16043d348bf9ba505486f36"}, - {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da584ff96ec95e97925174eb8237e32f626e7a1a97888cdd27ee2f1f24dd0ad8"}, - {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02a0629ec053fc013808a85178524e3cb63a61dbc35b22499870194a63578fb9"}, - {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fbf15aff64a163db29a91ed0868af181d6f68ec1a3a7d5afcfe4501252840bad"}, - {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:07924c1b938798797d60c6308fa8ad3b3f0201802f82e4a2c41bb3fafb44cc28"}, - {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4a5a844f68776a7715ecb30843b453f07ac89bad393431efbf7accca3ef599c1"}, - {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:518d2ca43c358929bf08f9079b617f1c2ca6e8848f83c1225c88caeac46e6cbc"}, - {file = "rpds_py-0.20.1-cp38-none-win32.whl", hash = "sha256:3aea7eed3e55119635a74bbeb80b35e776bafccb70d97e8ff838816c124539f1"}, - {file = "rpds_py-0.20.1-cp38-none-win_amd64.whl", hash = "sha256:7dca7081e9a0c3b6490a145593f6fe3173a94197f2cb9891183ef75e9d64c425"}, - {file = "rpds_py-0.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b41b6321805c472f66990c2849e152aff7bc359eb92f781e3f606609eac877ad"}, - {file = "rpds_py-0.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a90c373ea2975519b58dece25853dbcb9779b05cc46b4819cb1917e3b3215b6"}, - {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16d4477bcb9fbbd7b5b0e4a5d9b493e42026c0bf1f06f723a9353f5153e75d30"}, - {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:84b8382a90539910b53a6307f7c35697bc7e6ffb25d9c1d4e998a13e842a5e83"}, - {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4888e117dd41b9d34194d9e31631af70d3d526efc363085e3089ab1a62c32ed1"}, - {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5265505b3d61a0f56618c9b941dc54dc334dc6e660f1592d112cd103d914a6db"}, - {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e75ba609dba23f2c95b776efb9dd3f0b78a76a151e96f96cc5b6b1b0004de66f"}, - {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1791ff70bc975b098fe6ecf04356a10e9e2bd7dc21fa7351c1742fdeb9b4966f"}, - {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d126b52e4a473d40232ec2052a8b232270ed1f8c9571aaf33f73a14cc298c24f"}, - {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c14937af98c4cc362a1d4374806204dd51b1e12dded1ae30645c298e5a5c4cb1"}, - {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3d089d0b88996df627693639d123c8158cff41c0651f646cd8fd292c7da90eaf"}, - {file = "rpds_py-0.20.1-cp39-none-win32.whl", hash = "sha256:653647b8838cf83b2e7e6a0364f49af96deec64d2a6578324db58380cff82aca"}, - {file = "rpds_py-0.20.1-cp39-none-win_amd64.whl", hash = "sha256:fa41a64ac5b08b292906e248549ab48b69c5428f3987b09689ab2441f267d04d"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7a07ced2b22f0cf0b55a6a510078174c31b6d8544f3bc00c2bcee52b3d613f74"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:68cb0a499f2c4a088fd2f521453e22ed3527154136a855c62e148b7883b99f9a"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa3060d885657abc549b2a0f8e1b79699290e5d83845141717c6c90c2df38311"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95f3b65d2392e1c5cec27cff08fdc0080270d5a1a4b2ea1d51d5f4a2620ff08d"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cc3712a4b0b76a1d45a9302dd2f53ff339614b1c29603a911318f2357b04dd2"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d4eea0761e37485c9b81400437adb11c40e13ef513375bbd6973e34100aeb06"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f5179583d7a6cdb981151dd349786cbc318bab54963a192692d945dd3f6435d"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fbb0ffc754490aff6dabbf28064be47f0f9ca0b9755976f945214965b3ace7e"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:a94e52537a0e0a85429eda9e49f272ada715506d3b2431f64b8a3e34eb5f3e75"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:92b68b79c0da2a980b1c4197e56ac3dd0c8a149b4603747c4378914a68706979"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:93da1d3db08a827eda74356f9f58884adb254e59b6664f64cc04cdff2cc19b0d"}, - {file = "rpds_py-0.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:754bbed1a4ca48479e9d4182a561d001bbf81543876cdded6f695ec3d465846b"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ca449520e7484534a2a44faf629362cae62b660601432d04c482283c47eaebab"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9c4cb04a16b0f199a8c9bf807269b2f63b7b5b11425e4a6bd44bd6961d28282c"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb63804105143c7e24cee7db89e37cb3f3941f8e80c4379a0b355c52a52b6780"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:55cd1fa4ecfa6d9f14fbd97ac24803e6f73e897c738f771a9fe038f2f11ff07c"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f8f741b6292c86059ed175d80eefa80997125b7c478fb8769fd9ac8943a16c0"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fc212779bf8411667234b3cdd34d53de6c2b8b8b958e1e12cb473a5f367c338"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ad56edabcdb428c2e33bbf24f255fe2b43253b7d13a2cdbf05de955217313e6"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a3a1e9ee9728b2c1734f65d6a1d376c6f2f6fdcc13bb007a08cc4b1ff576dc5"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e13de156137b7095442b288e72f33503a469aa1980ed856b43c353ac86390519"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:07f59760ef99f31422c49038964b31c4dfcfeb5d2384ebfc71058a7c9adae2d2"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:59240685e7da61fb78f65a9f07f8108e36a83317c53f7b276b4175dc44151684"}, - {file = "rpds_py-0.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:83cba698cfb3c2c5a7c3c6bac12fe6c6a51aae69513726be6411076185a8b24a"}, - {file = "rpds_py-0.20.1.tar.gz", hash = "sha256:e1791c4aabd117653530dccd24108fa03cc6baf21f58b950d0a73c3b3b29a350"}, + {file = "rpds_py-0.21.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a017f813f24b9df929674d0332a374d40d7f0162b326562daae8066b502d0590"}, + {file = "rpds_py-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:20cc1ed0bcc86d8e1a7e968cce15be45178fd16e2ff656a243145e0b439bd250"}, + {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad116dda078d0bc4886cb7840e19811562acdc7a8e296ea6ec37e70326c1b41c"}, + {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:808f1ac7cf3b44f81c9475475ceb221f982ef548e44e024ad5f9e7060649540e"}, + {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de552f4a1916e520f2703ec474d2b4d3f86d41f353e7680b597512ffe7eac5d0"}, + {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:efec946f331349dfc4ae9d0e034c263ddde19414fe5128580f512619abed05f1"}, + {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b80b4690bbff51a034bfde9c9f6bf9357f0a8c61f548942b80f7b66356508bf5"}, + {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:085ed25baac88953d4283e5b5bd094b155075bb40d07c29c4f073e10623f9f2e"}, + {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:daa8efac2a1273eed2354397a51216ae1e198ecbce9036fba4e7610b308b6153"}, + {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:95a5bad1ac8a5c77b4e658671642e4af3707f095d2b78a1fdd08af0dfb647624"}, + {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3e53861b29a13d5b70116ea4230b5f0f3547b2c222c5daa090eb7c9c82d7f664"}, + {file = "rpds_py-0.21.0-cp310-none-win32.whl", hash = "sha256:ea3a6ac4d74820c98fcc9da4a57847ad2cc36475a8bd9683f32ab6d47a2bd682"}, + {file = "rpds_py-0.21.0-cp310-none-win_amd64.whl", hash = "sha256:b8f107395f2f1d151181880b69a2869c69e87ec079c49c0016ab96860b6acbe5"}, + {file = "rpds_py-0.21.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5555db3e618a77034954b9dc547eae94166391a98eb867905ec8fcbce1308d95"}, + {file = "rpds_py-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:97ef67d9bbc3e15584c2f3c74bcf064af36336c10d2e21a2131e123ce0f924c9"}, + {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ab2c2a26d2f69cdf833174f4d9d86118edc781ad9a8fa13970b527bf8236027"}, + {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4e8921a259f54bfbc755c5bbd60c82bb2339ae0324163f32868f63f0ebb873d9"}, + {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a7ff941004d74d55a47f916afc38494bd1cfd4b53c482b77c03147c91ac0ac3"}, + {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5145282a7cd2ac16ea0dc46b82167754d5e103a05614b724457cffe614f25bd8"}, + {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de609a6f1b682f70bb7163da745ee815d8f230d97276db049ab447767466a09d"}, + {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40c91c6e34cf016fa8e6b59d75e3dbe354830777fcfd74c58b279dceb7975b75"}, + {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d2132377f9deef0c4db89e65e8bb28644ff75a18df5293e132a8d67748397b9f"}, + {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0a9e0759e7be10109645a9fddaaad0619d58c9bf30a3f248a2ea57a7c417173a"}, + {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e20da3957bdf7824afdd4b6eeb29510e83e026473e04952dca565170cd1ecc8"}, + {file = "rpds_py-0.21.0-cp311-none-win32.whl", hash = "sha256:f71009b0d5e94c0e86533c0b27ed7cacc1239cb51c178fd239c3cfefefb0400a"}, + {file = "rpds_py-0.21.0-cp311-none-win_amd64.whl", hash = "sha256:e168afe6bf6ab7ab46c8c375606298784ecbe3ba31c0980b7dcbb9631dcba97e"}, + {file = "rpds_py-0.21.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:30b912c965b2aa76ba5168fd610087bad7fcde47f0a8367ee8f1876086ee6d1d"}, + {file = "rpds_py-0.21.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ca9989d5d9b1b300bc18e1801c67b9f6d2c66b8fd9621b36072ed1df2c977f72"}, + {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f54e7106f0001244a5f4cf810ba8d3f9c542e2730821b16e969d6887b664266"}, + {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fed5dfefdf384d6fe975cc026886aece4f292feaf69d0eeb716cfd3c5a4dd8be"}, + {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590ef88db231c9c1eece44dcfefd7515d8bf0d986d64d0caf06a81998a9e8cab"}, + {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f983e4c2f603c95dde63df633eec42955508eefd8d0f0e6d236d31a044c882d7"}, + {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b229ce052ddf1a01c67d68166c19cb004fb3612424921b81c46e7ea7ccf7c3bf"}, + {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ebf64e281a06c904a7636781d2e973d1f0926a5b8b480ac658dc0f556e7779f4"}, + {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:998a8080c4495e4f72132f3d66ff91f5997d799e86cec6ee05342f8f3cda7dca"}, + {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:98486337f7b4f3c324ab402e83453e25bb844f44418c066623db88e4c56b7c7b"}, + {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a78d8b634c9df7f8d175451cfeac3810a702ccb85f98ec95797fa98b942cea11"}, + {file = "rpds_py-0.21.0-cp312-none-win32.whl", hash = "sha256:a58ce66847711c4aa2ecfcfaff04cb0327f907fead8945ffc47d9407f41ff952"}, + {file = "rpds_py-0.21.0-cp312-none-win_amd64.whl", hash = "sha256:e860f065cc4ea6f256d6f411aba4b1251255366e48e972f8a347cf88077b24fd"}, + {file = "rpds_py-0.21.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ee4eafd77cc98d355a0d02f263efc0d3ae3ce4a7c24740010a8b4012bbb24937"}, + {file = "rpds_py-0.21.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:688c93b77e468d72579351a84b95f976bd7b3e84aa6686be6497045ba84be560"}, + {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c38dbf31c57032667dd5a2f0568ccde66e868e8f78d5a0d27dcc56d70f3fcd3b"}, + {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2d6129137f43f7fa02d41542ffff4871d4aefa724a5fe38e2c31a4e0fd343fb0"}, + {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520ed8b99b0bf86a176271f6fe23024323862ac674b1ce5b02a72bfeff3fff44"}, + {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaeb25ccfb9b9014a10eaf70904ebf3f79faaa8e60e99e19eef9f478651b9b74"}, + {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af04ac89c738e0f0f1b913918024c3eab6e3ace989518ea838807177d38a2e94"}, + {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b9b76e2afd585803c53c5b29e992ecd183f68285b62fe2668383a18e74abe7a3"}, + {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5afb5efde74c54724e1a01118c6e5c15e54e642c42a1ba588ab1f03544ac8c7a"}, + {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:52c041802a6efa625ea18027a0723676a778869481d16803481ef6cc02ea8cb3"}, + {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee1e4fc267b437bb89990b2f2abf6c25765b89b72dd4a11e21934df449e0c976"}, + {file = "rpds_py-0.21.0-cp313-none-win32.whl", hash = "sha256:0c025820b78817db6a76413fff6866790786c38f95ea3f3d3c93dbb73b632202"}, + {file = "rpds_py-0.21.0-cp313-none-win_amd64.whl", hash = "sha256:320c808df533695326610a1b6a0a6e98f033e49de55d7dc36a13c8a30cfa756e"}, + {file = "rpds_py-0.21.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:2c51d99c30091f72a3c5d126fad26236c3f75716b8b5e5cf8effb18889ced928"}, + {file = "rpds_py-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cbd7504a10b0955ea287114f003b7ad62330c9e65ba012c6223dba646f6ffd05"}, + {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6dcc4949be728ede49e6244eabd04064336012b37f5c2200e8ec8eb2988b209c"}, + {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f414da5c51bf350e4b7960644617c130140423882305f7574b6cf65a3081cecb"}, + {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9afe42102b40007f588666bc7de82451e10c6788f6f70984629db193849dced1"}, + {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b929c2bb6e29ab31f12a1117c39f7e6d6450419ab7464a4ea9b0b417174f044"}, + {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8404b3717da03cbf773a1d275d01fec84ea007754ed380f63dfc24fb76ce4592"}, + {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e12bb09678f38b7597b8346983d2323a6482dcd59e423d9448108c1be37cac9d"}, + {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:58a0e345be4b18e6b8501d3b0aa540dad90caeed814c515e5206bb2ec26736fd"}, + {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c3761f62fcfccf0864cc4665b6e7c3f0c626f0380b41b8bd1ce322103fa3ef87"}, + {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c2b2f71c6ad6c2e4fc9ed9401080badd1469fa9889657ec3abea42a3d6b2e1ed"}, + {file = "rpds_py-0.21.0-cp39-none-win32.whl", hash = "sha256:b21747f79f360e790525e6f6438c7569ddbfb1b3197b9e65043f25c3c9b489d8"}, + {file = "rpds_py-0.21.0-cp39-none-win_amd64.whl", hash = "sha256:0626238a43152918f9e72ede9a3b6ccc9e299adc8ade0d67c5e142d564c9a83d"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6b4ef7725386dc0762857097f6b7266a6cdd62bfd209664da6712cb26acef035"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6bc0e697d4d79ab1aacbf20ee5f0df80359ecf55db33ff41481cf3e24f206919"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da52d62a96e61c1c444f3998c434e8b263c384f6d68aca8274d2e08d1906325c"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:98e4fe5db40db87ce1c65031463a760ec7906ab230ad2249b4572c2fc3ef1f9f"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30bdc973f10d28e0337f71d202ff29345320f8bc49a31c90e6c257e1ccef4333"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:faa5e8496c530f9c71f2b4e1c49758b06e5f4055e17144906245c99fa6d45356"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32eb88c30b6a4f0605508023b7141d043a79b14acb3b969aa0b4f99b25bc7d4a"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a89a8ce9e4e75aeb7fa5d8ad0f3fecdee813802592f4f46a15754dcb2fd6b061"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:241e6c125568493f553c3d0fdbb38c74babf54b45cef86439d4cd97ff8feb34d"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:3b766a9f57663396e4f34f5140b3595b233a7b146e94777b97a8413a1da1be18"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:af4a644bf890f56e41e74be7d34e9511e4954894d544ec6b8efe1e21a1a8da6c"}, + {file = "rpds_py-0.21.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3e30a69a706e8ea20444b98a49f386c17b26f860aa9245329bab0851ed100677"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:031819f906bb146561af051c7cef4ba2003d28cff07efacef59da973ff7969ba"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b876f2bc27ab5954e2fd88890c071bd0ed18b9c50f6ec3de3c50a5ece612f7a6"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc5695c321e518d9f03b7ea6abb5ea3af4567766f9852ad1560f501b17588c7b"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b4de1da871b5c0fd5537b26a6fc6814c3cc05cabe0c941db6e9044ffbb12f04a"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:878f6fea96621fda5303a2867887686d7a198d9e0f8a40be100a63f5d60c88c9"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8eeec67590e94189f434c6d11c426892e396ae59e4801d17a93ac96b8c02a6c"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ff2eba7f6c0cb523d7e9cff0903f2fe1feff8f0b2ceb6bd71c0e20a4dcee271"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a429b99337062877d7875e4ff1a51fe788424d522bd64a8c0a20ef3021fdb6ed"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d167e4dbbdac48bd58893c7e446684ad5d425b407f9336e04ab52e8b9194e2ed"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:4eb2de8a147ffe0626bfdc275fc6563aa7bf4b6db59cf0d44f0ccd6ca625a24e"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e78868e98f34f34a88e23ee9ccaeeec460e4eaf6db16d51d7a9b883e5e785a5e"}, + {file = "rpds_py-0.21.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4991ca61656e3160cdaca4851151fd3f4a92e9eba5c7a530ab030d6aee96ec89"}, + {file = "rpds_py-0.21.0.tar.gz", hash = "sha256:ed6378c9d66d0de903763e7706383d60c33829581f0adff47b6535f1802fa6db"}, ] [[package]] @@ -5073,19 +5060,18 @@ docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "n [[package]] name = "storage3" -version = "0.8.2" +version = "0.9.0" description = "Supabase Storage client for Python." optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "storage3-0.8.2-py3-none-any.whl", hash = "sha256:f2e995b18c77a2a9265d1a33047d43e4d6abb11eb3ca5067959f68281c305de3"}, - {file = "storage3-0.8.2.tar.gz", hash = "sha256:db05d3fe8fb73bd30c814c4c4749664f37a5dfc78b629e8c058ef558c2b89f5a"}, + {file = "storage3-0.9.0-py3-none-any.whl", hash = "sha256:8b2fb91f0c61583a2f4eac74a8bae67e00d41ff38095c8a6cd3f2ce5e0ab76e7"}, + {file = "storage3-0.9.0.tar.gz", hash = "sha256:e16697f60894c94e1d9df0d2e4af783c1b3f7dd08c9013d61978825c624188c4"}, ] [package.dependencies] httpx = {version = ">=0.26,<0.28", extras = ["http2"]} python-dateutil = ">=2.8.2,<3.0.0" -typing-extensions = ">=4.2.0,<5.0.0" [[package]] name = "strenum" @@ -5105,32 +5091,32 @@ test = ["pylint", "pytest", "pytest-black", "pytest-cov", "pytest-pylint"] [[package]] name = "supabase" -version = "2.9.1" +version = "2.10.0" description = "Supabase client for Python." optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "supabase-2.9.1-py3-none-any.whl", hash = "sha256:a96f857a465712cb551679c1df66ba772c834f861756ce4aa2aa4cb703f6aeb7"}, - {file = "supabase-2.9.1.tar.gz", hash = "sha256:51fce39c9eb50573126dabb342541ec5e1f13e7476938768f4b0ccfdb8c522cd"}, + {file = "supabase-2.10.0-py3-none-any.whl", hash = "sha256:183fb23c04528593f8f81c24ceb8178f3a56bff40fec7ed873b6c55ebc2e420a"}, + {file = "supabase-2.10.0.tar.gz", hash = "sha256:9ac095f8947bf60780e67c0edcbab53e2db3f6f3f022329397b093500bf2607c"}, ] [package.dependencies] -gotrue = ">=2.9.0,<3.0.0" +gotrue = ">=2.10.0,<3.0.0" httpx = ">=0.26,<0.28" -postgrest = ">=0.17.0,<0.18.0" +postgrest = ">=0.18,<0.19" realtime = ">=2.0.0,<3.0.0" -storage3 = ">=0.8.0,<0.9.0" -supafunc = ">=0.6.0,<0.7.0" +storage3 = ">=0.9.0,<0.10.0" +supafunc = ">=0.7.0,<0.8.0" [[package]] name = "supafunc" -version = "0.6.2" +version = "0.7.0" description = "Library for Supabase Functions" optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "supafunc-0.6.2-py3-none-any.whl", hash = "sha256:101b30616b0a1ce8cf938eca1df362fa4cf1deacb0271f53ebbd674190fb0da5"}, - {file = "supafunc-0.6.2.tar.gz", hash = "sha256:c7dfa20db7182f7fe4ae436e94e05c06cd7ed98d697fed75d68c7b9792822adc"}, + {file = "supafunc-0.7.0-py3-none-any.whl", hash = "sha256:4160260dc02bdd906be1e2ffd7cb3ae8b74ae437c892bb475352b6a99d9ff8eb"}, + {file = "supafunc-0.7.0.tar.gz", hash = "sha256:5b1c415fba1395740b2b4eedd1d786384bd58b98f6333a11ba7889820a48b6a7"}, ] [package.dependencies] @@ -5363,13 +5349,13 @@ files = [ [[package]] name = "tqdm" -version = "4.66.6" +version = "4.67.0" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"}, - {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"}, + {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"}, + {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"}, ] [package.dependencies] @@ -5377,6 +5363,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +discord = ["requests"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] @@ -5906,13 +5893,13 @@ propcache = ">=0.2.0" [[package]] name = "zipp" -version = "3.20.2" +version = "3.21.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"}, - {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"}, + {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, + {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, ] [package.extras] @@ -5930,4 +5917,4 @@ ingestion-bundle = ["aiofiles", "aioshutil", "beautifulsoup4", "bs4", "markdown" [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "bef1a83eb406b0b81da58ca2f1a9dda0ed18be6361ea123a3b773bf27d8ea62c" +content-hash = "86935f6ca1f9822ea3fc3b1d28ae2e6414177851660799f6f10c30cfd1a0f169" diff --git a/py/pyproject.toml b/py/pyproject.toml index 74326f946..adecc21ce 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -81,6 +81,7 @@ python-pptx = { version = "^1.0.1", optional = true } python-docx = { version = "^1.1.0", optional = true } aiosmtplib = "^3.0.2" types-aiofiles = "^24.1.0.20240626" +aiohttp = "^3.10.10" [tool.poetry.extras] core = [ diff --git a/py/r2r.toml b/py/r2r.toml index e9f0023fa..5966c94b5 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -83,9 +83,10 @@ base_dimension = 512 # base_dimension = 3072 # quantization_settings = { quantization_type = "INT1" } +# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model + batch_size = 128 add_title_as_prefix = false -rerank_model = "None" concurrent_request_limit = 256 quantization_settings = { quantization_type = "FP32" } @@ -99,6 +100,13 @@ chunk_size = 1_024 chunk_overlap = 512 excluded_parsers = ["mp4"] +# Ingestion-time document summary parameters +# skip_document_summary = False +# document_summary_system_prompt = 'default_system' +# document_summary_task_prompt = 'default_summary' +# chunks_for_document_summary = 128 +# document_summary_model = "openai/gpt-4o-mini" + [ingestion.chunk_enrichment_settings] enable_chunk_enrichment = false # disabled by default strategies = ["semantic", "neighborhood"] diff --git a/py/sdk/mixins/ingestion.py b/py/sdk/mixins/ingestion.py index 758821e9c..980036e03 100644 --- a/py/sdk/mixins/ingestion.py +++ b/py/sdk/mixins/ingestion.py @@ -38,57 +38,60 @@ class IngestionMixins: "Number of metadatas must match number of document IDs." ) - all_file_paths: list[str] = [] - for path in file_paths: - if os.path.isdir(path): - for root, _, files in os.walk(path): - all_file_paths.extend( - os.path.join(root, file) for file in files - ) - else: - all_file_paths.append(path) - with ExitStack() as stack: - files_tuples = [ - ( - "files", + all_file_paths: list[str] = [] + for path in file_paths: + if os.path.isdir(path): + for root, _, files in os.walk(path): + all_file_paths.extend( + os.path.join(root, file) for file in files + ) + else: + all_file_paths.append(path) + + with ExitStack() as stack: + files_tuples = [ ( - os.path.basename(file), - stack.enter_context(open(file, "rb")), - "application/octet-stream", - ), - ) - for file in all_file_paths - ] + "files", + ( + os.path.basename(file), + stack.enter_context(open(file, "rb")), + "application/octet-stream", + ), + ) + for file in all_file_paths + ] - data = {} - if document_ids: - data["document_ids"] = json.dumps( - [str(doc_id) for doc_id in document_ids] - ) - if metadatas: - data["metadatas"] = json.dumps(metadatas) + data = {} + if document_ids: + data["document_ids"] = json.dumps( + [str(doc_id) for doc_id in document_ids] + ) + if metadatas: + data["metadatas"] = json.dumps(metadatas) - if ingestion_config: - data["ingestion_config"] = json.dumps(ingestion_config) + if ingestion_config: + data["ingestion_config"] = json.dumps(ingestion_config) - if run_with_orchestration is not None: - data["run_with_orchestration"] = str(run_with_orchestration) + if run_with_orchestration is not None: + data["run_with_orchestration"] = str( + run_with_orchestration + ) - if collection_ids: - data["collection_ids"] = json.dumps( - [ + if collection_ids: + data["collection_ids"] = json.dumps( [ - str(collection_id) - for collection_id in doc_collection_ids + [ + str(collection_id) + for collection_id in doc_collection_ids + ] + for doc_collection_ids in collection_ids ] - for doc_collection_ids in collection_ids - ] - ) + ) - return await self._make_request( # type: ignore - "POST", "ingest_files", data=data, files=files_tuples - ) + return await self._make_request( # type: ignore + "POST", "ingest_files", data=data, files=files_tuples + ) async def update_files( self, diff --git a/py/sdk/mixins/retrieval.py b/py/sdk/mixins/retrieval.py index ce537abce..0b08479f8 100644 --- a/py/sdk/mixins/retrieval.py +++ b/py/sdk/mixins/retrieval.py @@ -2,13 +2,12 @@ import logging from typing import AsyncGenerator, Optional, Union from ..models import ( - DocumentSearchSettings, GenerationConfig, KGSearchSettings, Message, RAGResponse, SearchResponse, - VectorSearchSettings, + SearchSettings, ) logger = logging.getLogger() @@ -18,14 +17,14 @@ class RetrievalMixins: async def search_documents( self, query: str, - settings: Optional[Union[dict, DocumentSearchSettings]] = None, + settings: Optional[Union[dict, SearchSettings]] = None, ) -> SearchResponse: """ Conduct a vector and/or KG search. Args: query (str): The query to search for. - vector_search_settings (Optional[Union[dict, VectorSearchSettings]]): Vector search settings. + vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. Returns: @@ -43,9 +42,7 @@ class RetrievalMixins: async def search( self, query: str, - vector_search_settings: Optional[ - Union[dict, VectorSearchSettings] - ] = None, + vector_search_settings: Optional[Union[dict, SearchSettings]] = None, kg_search_settings: Optional[Union[dict, KGSearchSettings]] = None, ) -> SearchResponse: """ @@ -53,7 +50,7 @@ class RetrievalMixins: Args: query (str): The query to search for. - vector_search_settings (Optional[Union[dict, VectorSearchSettings]]): Vector search settings. + vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. Returns: @@ -97,9 +94,7 @@ class RetrievalMixins: self, query: str, rag_generation_config: Optional[Union[dict, GenerationConfig]] = None, - vector_search_settings: Optional[ - Union[dict, VectorSearchSettings] - ] = None, + vector_search_settings: Optional[Union[dict, SearchSettings]] = None, kg_search_settings: Optional[Union[dict, KGSearchSettings]] = None, task_prompt_override: Optional[str] = None, include_title_if_available: Optional[bool] = False, @@ -110,7 +105,7 @@ class RetrievalMixins: Args: query (str): The query to search for. rag_generation_config (Optional[Union[dict, GenerationConfig]]): RAG generation configuration. - vector_search_settings (Optional[Union[dict, VectorSearchSettings]]): Vector search settings. + vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. task_prompt_override (Optional[str]): Task prompt override. include_title_if_available (Optional[bool]): Include the title if available. @@ -149,9 +144,7 @@ class RetrievalMixins: self, message: Optional[Union[dict, Message]] = None, rag_generation_config: Optional[Union[dict, GenerationConfig]] = None, - vector_search_settings: Optional[ - Union[dict, VectorSearchSettings] - ] = None, + vector_search_settings: Optional[Union[dict, SearchSettings]] = None, kg_search_settings: Optional[Union[dict, KGSearchSettings]] = None, task_prompt_override: Optional[str] = None, include_title_if_available: Optional[bool] = False, @@ -166,7 +159,7 @@ class RetrievalMixins: Args: messages (List[Union[dict, Message]]): The messages to send to the agent. rag_generation_config (Optional[Union[dict, GenerationConfig]]): RAG generation configuration. - vector_search_settings (Optional[Union[dict, VectorSearchSettings]]): Vector search settings. + vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. task_prompt_override (Optional[str]): Task prompt override. include_title_if_available (Optional[bool]): Include the title if available. @@ -221,3 +214,18 @@ class RetrievalMixins: return self._make_streaming_request("POST", "agent", json=data) # type: ignore else: return await self._make_request("POST", "agent", json=data) # type: ignore + + async def embedding( + self, + content: str, + ) -> list[float]: + """ + Generate embeddings for the provided content. + + Args: + content (str): The text content to embed. + + Returns: + list[float]: The generated embedding vector. + """ + return await self._make_request("POST", "embedding", json=content) # type: ignore diff --git a/py/sdk/models.py b/py/sdk/models.py index b3b3940d8..cee80c339 100644 --- a/py/sdk/models.py +++ b/py/sdk/models.py @@ -1,5 +1,4 @@ from shared.abstractions import ( - DocumentSearchSettings, GenerationConfig, HybridSearchSettings, KGCommunityResult, @@ -18,9 +17,9 @@ from shared.abstractions import ( MessageType, R2RException, R2RSerializable, + SearchSettings, Token, VectorSearchResult, - VectorSearchSettings, ) from shared.api.models import ( KGCreationResponse, @@ -51,8 +50,7 @@ __all__ = [ "R2RSerializable", "Token", "VectorSearchResult", - "VectorSearchSettings", - "DocumentSearchSettings", + "SearchSettings", "KGEntityDeduplicationSettings", "KGEntityDeduplicationResponse", "KGCreationResponse", diff --git a/py/shared/abstractions/__init__.py b/py/shared/abstractions/__init__.py index 6e073bc04..d17daef19 100644 --- a/py/shared/abstractions/__init__.py +++ b/py/shared/abstractions/__init__.py @@ -38,7 +38,6 @@ from .llm import ( from .prompt import Prompt from .search import ( AggregateSearchResult, - DocumentSearchSettings, HybridSearchSettings, KGCommunityResult, KGEntityResult, @@ -48,8 +47,8 @@ from .search import ( KGSearchResult, KGSearchResultType, KGSearchSettings, + SearchSettings, VectorSearchResult, - VectorSearchSettings, ) from .user import Token, TokenData, UserStats from .vector import ( @@ -115,8 +114,7 @@ __all__ = [ "KGGlobalResult", "KGSearchSettings", "VectorSearchResult", - "VectorSearchSettings", - "DocumentSearchSettings", + "SearchSettings", "HybridSearchSettings", # KG abstractions "KGCreationSettings", diff --git a/py/shared/abstractions/document.py b/py/shared/abstractions/document.py index cc6d82d08..28dd089cf 100644 --- a/py/shared/abstractions/document.py +++ b/py/shared/abstractions/document.py @@ -116,6 +116,7 @@ class IngestionStatus(str, Enum): EXTRACTING = "extracting" CHUNKING = "chunking" EMBEDDING = "embedding" + AUGMENTING = "augmenting" STORING = "storing" ENRICHING = "enriching" ENRICHED = "enriched" @@ -193,11 +194,18 @@ class DocumentInfo(R2RSerializable): created_at: Optional[datetime] = None updated_at: Optional[datetime] = None ingestion_attempt_number: Optional[int] = None + summary: Optional[str] = None + summary_embedding: Optional[list[float]] = None # Add optional embedding def convert_to_db_entry(self): """Prepare the document info for database entry, extracting certain fields from metadata.""" now = datetime.now() + # Format the embedding properly for Postgres vector type + embedding = None + if self.summary_embedding is not None: + embedding = f"[{','.join(str(x) for x in self.summary_embedding)}]" + return { "document_id": self.id, "collection_ids": self.collection_ids, @@ -212,6 +220,8 @@ class DocumentInfo(R2RSerializable): "created_at": self.created_at or now, "updated_at": self.updated_at or now, "ingestion_attempt_number": self.ingestion_attempt_number or 0, + "summary": self.summary, + "summary_embedding": embedding, } diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py index 4c7cc9a3e..c8170f15d 100644 --- a/py/shared/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -181,51 +181,7 @@ class HybridSearchSettings(R2RSerializable): ) -class DocumentSearchSettings(R2RSerializable): - search_over_metadata: bool = Field( - default=True, - description="Whether to search over the document metadata in the search procedure", - ) - - metadata_keys: list[str] = Field( - default=["title"], - description="Metadata keys to search over", - ) - - search_over_body: bool = Field( - default=False, - description="Whether to search over the document bodies in the search procedure", - ) - filters: dict[str, Any] = Field( - default_factory=dict, - description="Filters to apply to the search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.", - deprecated=True, - ) - search_filters: dict[str, Any] = Field( - default_factory=dict, - description="Alias for filters", - deprecated=True, - ) - offset: int = Field( - default=0, - ge=0, - description="Offset to paginate search results", - ) - limit: int = Field( - default=10, - description="Maximum number of results to return", - ge=1, - le=1_000, - ) - title_weight: float = Field( - default=0.5, description="Relative weight to apply to title search" - ) - metadata_weight: float = Field( - default=0.5, description="Relative weight to apply to body search" - ) - - -class VectorSearchSettings(R2RSerializable): +class SearchSettings(R2RSerializable): use_vector_search: bool = Field( default=True, description="Whether to use vector search" ) diff --git a/py/shared/api/models/management/responses.py b/py/shared/api/models/management/responses.py index cec9efd73..924c5276e 100644 --- a/py/shared/api/models/management/responses.py +++ b/py/shared/api/models/management/responses.py @@ -100,6 +100,7 @@ class DocumentOverviewResponse(BaseModel): version: str collection_ids: list[UUID] metadata: dict[str, Any] + summary: Optional[str] = None class DocumentChunkResponse(BaseModel): diff --git a/py/shared/api/models/retrieval/responses.py b/py/shared/api/models/retrieval/responses.py index b9c8e185e..64b4c50bd 100644 --- a/py/shared/api/models/retrieval/responses.py +++ b/py/shared/api/models/retrieval/responses.py @@ -158,9 +158,11 @@ class DocumentSearchResult(BaseModel): ) +from ..management.responses import DocumentOverviewResponse + WrappedCompletionResponse = ResultsWrapper[LLMChatCompletion] # Create wrapped versions of the responses WrappedSearchResponse = ResultsWrapper[SearchResponse] -WrappedDocumentSearchResponse = ResultsWrapper[list[DocumentSearchResult]] +WrappedDocumentSearchResponse = ResultsWrapper[list[DocumentOverviewResponse]] WrappedRAGResponse = ResultsWrapper[RAGResponse] WrappedRAGAgentResponse = ResultsWrapper[RAGAgentResponse] diff --git a/py/tests/integration/runner_cli.py b/py/tests/integration/runner_cli.py index 64d7089b1..ab63bfe59 100644 --- a/py/tests/integration/runner_cli.py +++ b/py/tests/integration/runner_cli.py @@ -80,7 +80,9 @@ def compare_document_fields(documents, expected_doc): def test_document_overview_sample_file_cli(): print("Testing: Document overview contains 'aristotle.txt'") output = run_command("poetry run r2r documents-overview") - output = output.replace("'", '"') + output = output.replace("'", '"').replace( + "None", "null" + ) # Replace Python None with JSON null output_lines = output.strip().split("\n")[1:] documents = [json.loads(ele) for ele in output_lines]