e7db62e6bd
* Add scheduler and vacuum * Lint * Refactor test workflow, add mock scheduler to tests * 0 3 * * * * Missing quote in toml * Add maintenance service mock
129 lines
4.0 KiB
TOML
129 lines
4.0 KiB
TOML
[app]
|
|
# app settings are global available like `r2r_config.agent.app`
|
|
# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
|
|
default_max_documents_per_user = 10_000
|
|
default_max_chunks_per_user = 10_000_000
|
|
default_max_collections_per_user = 5_000
|
|
|
|
# Set the default max upload size to 200 GB for local testing
|
|
default_max_upload_size = 214748364800
|
|
|
|
# LLM used for internal operations, like deriving conversation names
|
|
fast_llm = "openai/gpt-4o-mini"
|
|
|
|
# LLM used for user-facing output, like RAG replies
|
|
quality_llm = "openai/gpt-4o"
|
|
|
|
# LLM used for ingesting visual inputs
|
|
vlm = "openai/gpt-4o"
|
|
|
|
# LLM used for transcription
|
|
audio_lm = "openai/whisper-1"
|
|
|
|
# Reasoning model, used for `research` agent
|
|
reasoning_llm = "openai/o3-mini"
|
|
# Planning model, used for `research` agent
|
|
planning_llm = "anthropic/claude-3-7-sonnet-20250219"
|
|
|
|
|
|
[agent]
|
|
rag_agent_static_prompt = "static_rag_agent"
|
|
rag_agent_dynamic_prompt = "dynamic_rag_agent"
|
|
# The following tools are available to the `rag` agent
|
|
rag_tools = ["search_file_descriptions", "search_file_knowledge", "get_file_content"] # can add "web_search" | "web_scrape"
|
|
# The following tools are available to the `research` agent
|
|
research_tools = ["rag", "reasoning", "critique", "python_executor"]
|
|
|
|
[auth]
|
|
provider = "r2r"
|
|
access_token_lifetime_in_minutes = 60000
|
|
refresh_token_lifetime_in_days = 7
|
|
require_authentication = false
|
|
require_email_verification = false
|
|
default_admin_email = "admin@example.com"
|
|
default_admin_password = "change_me_immediately"
|
|
|
|
[completion]
|
|
provider = "r2r"
|
|
concurrent_request_limit = 64
|
|
|
|
[completion.generation_config]
|
|
temperature = 0.1
|
|
top_p = 1
|
|
max_tokens_to_sample = 4_096
|
|
stream = false
|
|
add_generation_kwargs = { }
|
|
|
|
[crypto]
|
|
provider = "bcrypt"
|
|
|
|
[database]
|
|
default_collection_name = "Default"
|
|
default_collection_description = "Your default collection."
|
|
collection_summary_prompt = "collection_summary"
|
|
|
|
[database.graph_creation_settings]
|
|
graph_entity_description_prompt = "graph_entity_description"
|
|
graph_extraction_prompt = "graph_extraction"
|
|
entity_types = [] # if empty, all entities are extracted
|
|
relation_types = [] # if empty, all relations are extracted
|
|
automatic_deduplication = true # enable automatic deduplication of entities
|
|
|
|
[database.graph_enrichment_settings]
|
|
graph_communities_prompt = "graph_communities"
|
|
|
|
[database.maintenance]
|
|
vacuum_schedule = "0 3 * * *" # Run at 3:00 AM daily
|
|
|
|
[embedding]
|
|
provider = "litellm"
|
|
# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
|
|
# For advanced applications, use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
|
|
base_model = "openai/text-embedding-3-small"
|
|
base_dimension = 512
|
|
# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
|
|
batch_size = 128
|
|
add_title_as_prefix = false
|
|
concurrent_request_limit = 256
|
|
initial_backoff = 1.0
|
|
quantization_settings = { quantization_type = "FP32" }
|
|
|
|
[completion_embedding]
|
|
# Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
|
|
provider = "litellm"
|
|
base_model = "openai/text-embedding-3-small"
|
|
base_dimension = 512
|
|
batch_size = 128
|
|
add_title_as_prefix = false
|
|
concurrent_request_limit = 256
|
|
|
|
[ingestion]
|
|
provider = "r2r"
|
|
chunking_strategy = "recursive"
|
|
chunk_size = 1_024
|
|
chunk_overlap = 512
|
|
excluded_parsers = ["mp4"]
|
|
automatic_extraction = true # enable automatic extraction of entities and relations
|
|
|
|
[ingestion.chunk_enrichment_settings]
|
|
chunk_enrichment_prompt = "chunk_enrichment"
|
|
enable_chunk_enrichment = false # disabled by default
|
|
n_chunks = 2 # the number of chunks (both preceding and succeeding) to use in enrichment
|
|
|
|
[ingestion.extra_parsers]
|
|
pdf = "zerox"
|
|
|
|
[logging]
|
|
provider = "r2r"
|
|
log_table = "logs"
|
|
log_info_table = "log_info"
|
|
|
|
[orchestration]
|
|
provider = "simple"
|
|
|
|
[email]
|
|
provider = "console_mock" # `smtp` | `sendgrid` supported
|
|
|
|
[scheduler]
|
|
provider = "apscheduler"
|