Files
R2R/py/core/base/providers/database.py
T
emrgnt-cmplxty a640fb20b2 Release 3.2.11 (#1365)
• Added Knowledge Graph (KG) tests (#1351)
  - CLI tests
  - SDK tests
  - Workflow ordering changes

• Added collection integration tests (#1352)
  - Fixed SDK test port
  - Fixed delete collection return check

• Fixed document info serialization (#1353)

• Added GraphRAG documentation (#1362)

• Implemented concurrent index creation (#1363)
  - Allow -1 for paginated entries

• Updated webdev-template for current Next.js and r2r-js SDK (#1218)

• Extended integration tests (#1361)
  - Cleaned up and simplified CLI
  - Added new workflows and actions
  - Implemented matrix test flow
  - Docker setup and configuration

• Rebased to r2r vars (#1364)
  - Updated Windows workflow
  - Revived full workflow and tests

• Fixed typos (#1366)

• Added ingestion concurrency limit (#1367)

• Fixed Ollama Tool Calling (#1372)

• Cleaned up Docker Compose (#1368)

• Fixed bug in deletion, improved validation error handling (#1374)

• Added vector index creation endpoint (#1373)

• Modified KG Endpoints and updated API spec (#1369)

• Implemented new Docker setup (#1371)
  - Updated actions and scripts

• Cleaned up ingest chunks, added to JS SDK (#1375)

• Various updates and fixes (#1376, #1378, #1379)
  - Increased entity limit
  - Changed Aristotle back to v2
  - Added test_ingest_sample_file_2_sdk

• Added GraphRAG documentation and refined code (#1382)
  - Added Python SDK documentation

• Added R2R_ prefix to env vars (#1383)
  - Cleaned up docker compose
  - Bumped Python and JS package versions
  - Updated form-data

---------

Co-authored-by: Shreyas Pimpalgaonkar <shreyas.gp.7@gmail.com>
Co-authored-by: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com>
Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
Co-authored-by: FutureProofTechOps <operations@theobald.nz>
Co-authored-by: Simeon <simeon@theobald.nz>
Co-authored-by: --global=Shreyas Pimpalgaonkar <--global=shreyas.gp.7@gmail.com>
2024-10-11 15:51:49 -07:00

106 lines
3.2 KiB
Python

import logging
from abc import ABC, abstractmethod
from typing import Any, Optional
from pydantic import BaseModel
from .base import Provider, ProviderConfig
logger = logging.getLogger(__name__)
class PostgresConfigurationSettings(BaseModel):
"""
Configuration settings with defaults defined by the PGVector docker image.
These settings are helpful in managing the connections to the database.
To tune these settings for a specific deployment, see https://pgtune.leopard.in.ua/
"""
max_connections: Optional[int] = 256
shared_buffers: Optional[int] = 16384
effective_cache_size: Optional[int] = 524288
maintenance_work_mem: Optional[int] = 65536
checkpoint_completion_target: Optional[float] = 0.9
wal_buffers: Optional[int] = 512
default_statistics_target: Optional[int] = 100
random_page_cost: Optional[float] = 4
effective_io_concurrency: Optional[int] = 1
work_mem: Optional[int] = 4096
huge_pages: Optional[str] = "try"
min_wal_size: Optional[int] = 80
max_wal_size: Optional[int] = 1024
max_worker_processes: Optional[int] = 8
max_parallel_workers_per_gather: Optional[int] = 2
max_parallel_workers: Optional[int] = 8
max_parallel_maintenance_workers: Optional[int] = 2
class DatabaseConfig(ProviderConfig):
"""A base database configuration class"""
provider: str = "postgres"
user: Optional[str] = None
password: Optional[str] = None
host: Optional[str] = None
port: Optional[int] = None
db_name: Optional[str] = None
project_name: Optional[str] = None
postgres_configuration_settings: Optional[
PostgresConfigurationSettings
] = None
default_collection_name: str = "Default"
default_collection_description: str = "Your default collection."
def __post_init__(self):
self.validate_config()
# Capture additional fields
for key, value in self.extra_fields.items():
setattr(self, key, value)
def validate_config(self) -> None:
if self.provider not in self.supported_providers:
raise ValueError(f"Provider '{self.provider}' is not supported.")
@property
def supported_providers(self) -> list[str]:
return ["postgres"]
class VectorDBProvider(Provider, ABC):
@abstractmethod
def _initialize_vector_db(self, dimension: int) -> None:
pass
class RelationalDBProvider(Provider, ABC):
@abstractmethod
async def _initialize_relational_db(self) -> None:
pass
class DatabaseProvider(Provider):
def __init__(self, config: DatabaseConfig):
if not isinstance(config, DatabaseConfig):
raise ValueError(
"DatabaseProvider must be initialized with a `DatabaseConfig`."
)
logger.info(f"Initializing DatabaseProvider with config {config}.")
super().__init__(config)
# remove later to re-introduce typing...
self.vector: Any = None
self.relational: Any = None
@abstractmethod
def _initialize_vector_db(self) -> VectorDBProvider:
pass
@abstractmethod
async def _initialize_relational_db(self) -> RelationalDBProvider:
pass
@abstractmethod
def _get_table_name(self, base_name: str) -> str:
pass