From 1b3628a18f6a1cbf7b922ed0bd5487334f783b66 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty <68796651+emrgnt-cmplxty@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:36:07 -0700 Subject: [PATCH] Feature/cleanup docs rebased (#397) * make document info table * make document info table * up * remove qdrant presence * fix conf * checkin work * add user stats * basic demo func work * up * working demo for local db * checkin * fix file update endpt * fix get end pts * merge w/ nolan changes * rest demo_vecs * final tweaks * cleanup * harmonize app * attempts at cleaning up documentation * fix * fix app endpt * fix demo * cleanup merge conflicts --- README.md | 14 +- docs/pages/_meta.json | 6 +- docs/pages/getting-started/quick-install.mdx | 2 +- docs/pages/getting-started/r2r-demo.mdx | 18 +-- docs/pages/index.mdx | 22 +-- docs/pages/tutorials/local_rag.mdx | 135 +++++++++---------- docs/public/swagger.json | 2 +- poetry.lock | 128 +++++++++++++++++- pyproject.toml | 4 +- r2r/core/__init__.py | 3 +- r2r/core/providers/vector_db_provider.py | 2 +- r2r/examples/clients/run_qna_client.py | 94 ------------- r2r/examples/configs/local_ollama.json | 42 +----- r2r/examples/demo.py | 10 +- r2r/main/r2r_app.py | 25 +--- r2r/main/r2r_client.py | 22 +++ r2r/vector_dbs/pgvector/pgvector_db.py | 13 -- 17 files changed, 266 insertions(+), 276 deletions(-) delete mode 100644 r2r/examples/clients/run_qna_client.py diff --git a/README.md b/README.md index 2b7f690cc..83f7696a2 100644 --- a/README.md +++ b/README.md @@ -13,18 +13,18 @@ Build, deploy, observe, and optimize your RAG system. # About -R2R, short for RAG to Riches, provides the fastest and most efficient way to deliver high-quality Retrieval-Augmented Generation (RAG) to end users. The framework is built around customizable pipelines and ships with a feature-rich REST API. +R2R, short for RAG to Riches, provides the fastest and most featureful way for developers to deliver high-quality Retrieval-Augmented Generation (RAG) to end users. The framework ships with a REST API powered by Postgres and pgvector - it includes user-level and document-level management as well as advanced RAG features. ## Why? -R2R was conceived to bridge the gap between local LLM experimentation and scalable production solutions. It is built with observability and customization in mind, ensuring that users can seamlessly transition from development to deployment. +R2R was conceived to help developers bridge the gap between local LLM experimentation and serving a scalable, production-ready application. Built with document management, observability and customization in mind, R2R provides adequate performance and features for most RAG use cases. ## Key Features -- **🔧 Build**: Use the framework to build arbitrary asynchronous pipelines. -- **🚀 Deploy**: Instantly launch production-ready asynchronous RAG pipelines with streaming capabilities. -- **🧩 Customize**: Tailor your multimodal pipeline with intuitive configuration files. -- **🔌 Extend**: Enhance your pipeline with custom code integrations. -- **🤖 OSS**: Benefit from a framework developed by the open-source community, designed to simplify RAG deployment. +- **🔧 Build**: Effortlessly create and manage observable, high-performance RAG pipelines with our robust framework. +- **🚀 Deploy**: Launch production-ready asynchronous RAG pipelines with seamless streaming capabilities +- **🧩 Customize**: Easily tailor your multimodal pipeline using intuitive configuration files to meet your specific needs. +- **🔌 Extend**: Enhance and extend your pipeline with custom code integrations to add new functionalities. +- **🤖 OSS**: Leverage a framework developed by the open-source community, ensuring flexibility, scalability, and ease of deployment. ## Table of Contents 1. [Quick Install](#quick-install) diff --git a/docs/pages/_meta.json b/docs/pages/_meta.json index dd6652588..16d247597 100644 --- a/docs/pages/_meta.json +++ b/docs/pages/_meta.json @@ -3,6 +3,9 @@ "getting-started": { "title": "Getting Started" }, + "tutorials": { + "title": "Tutorials" + }, "deep-dive": { "title": "Deep Dive" }, @@ -11,8 +14,5 @@ }, "development": { "title": "Development" - }, - "tutorials": { - "title": "Tutorials" } } diff --git a/docs/pages/getting-started/quick-install.mdx b/docs/pages/getting-started/quick-install.mdx index 3c89845cf..10f275402 100644 --- a/docs/pages/getting-started/quick-install.mdx +++ b/docs/pages/getting-started/quick-install.mdx @@ -1,6 +1,6 @@ ## Quick Install with `pip` -Install R2R swiftly using `pip` to get started with minimal setup. This method will get you set up with the default configuration: +Install R2R using `pip` to get started with minimal setup. This method will get you set up with the default configuration: ```bash pip install r2r diff --git a/docs/pages/getting-started/r2r-demo.mdx b/docs/pages/getting-started/r2r-demo.mdx index 29d403a94..15d435d1e 100644 --- a/docs/pages/getting-started/r2r-demo.mdx +++ b/docs/pages/getting-started/r2r-demo.mdx @@ -1,6 +1,8 @@ ## R2R Demo -The R2R demo offers a step-by-step walkthrough of the features provided by the R2R application. The application ships with a REST API exposing a Retrieval-Augmented Generation (RAG) pipeline and more. The demo demonstrates how to directly interface with the application logic to ingest a list of provided documents and demonstrates search, RAG, and more advanced functionality. The script at `r2r/examples/demo.py`, which powers the demo, can be configured and extended for your specific use case. +This complete R2R demo offers a step-by-step walkthrough of the default features provided by R2R. The framework ships with a REST API exposing document ingestion, RAG, evaluation, and supporting features like observability and document management. + +This demo starts by ingesting a list of provided documents and demonstrates search, RAG, and more advanced functionality. The script which powers the demo, [`r2r/examples/demo.py`](https://github.com/SciPhi-AI/R2R/blob/main/r2r/examples/demo.py), can be configured and extended for your specific use case. ![R2R App Architecture](./r2r_demo_architecture.png) @@ -52,11 +54,11 @@ r2r.pipes.embedding_pipe - INFO - Fragmented the input document ids into counts **Note** -Each ingested document derives a unique `document_id` from the input file path. As the document is parsed, chunked, and embedded, this association is maintained to allow for frictionless vector database management. Additionally, the demo includes a `user_id` during the document creation process to facilitate user-level document management. +Each ingested document derives a unique `document_id` from the input file path. As the document is parsed, chunked, and embedded, this association is maintained to allow for frictionless vector database management. Additionally, the demo submits a `user_id` during the document creation process which will be used to facilitate user-level document management. ### Step 2: Document Info -To verify the successful ingestion of the demo documents, fetch the metadata for the uploaded documents associated with the demo userId: +We can fetch associated document metadata after successfully ingesting the demo documents: ```bash python -m r2r.examples.demo documents_info @@ -93,15 +95,13 @@ python -m r2r.examples.demo users_stats ``` -## Basic Functionality +## Basic RAG Functionality -The basic functionality of the R2R framework allows you to search ingested documents and generate responses using Retrieval-Augmented Generation (RAG). These steps will guide you through performing a search query, generating a RAG response, and streaming RAG results. +The basic RAG functionality of the R2R framework allows you to search ingested documents and generate responses using Retrieval-Augmented Generation (RAG). These steps will guide you through performing a search query, generating a RAG response, and streaming RAG results. ### Step 3: Run a Demo Search -Documents are stored by default in a local vector database. The vector database provider and settings can be specified via an input `config.json`. To - - perform a search query on the ingested user documents, use the following command: +Documents are stored by default in a local vector database. The vector database provider and settings can be specified via an input `config.json`. To perform a search query over the user ingested documents, use the following command: ```bash python -m r2r.examples.demo search --query="Who was Aristotle?" @@ -181,7 +181,7 @@ r2r.main.r2r_config - INFO - Loading configuration from /config.js Lyft reported a net loss of $1,752,857,000 in 2020 according to [2]. Therefore, Lyft did not make a profit in 2020. ``` -## Advanced - Document Management +## Document Management Effective document management is crucial for maintaining a robust and efficient RAG system. This section guides you through various operations related to document management, including deleting documents and managing user-specific data. These steps will help ensure your document database remains organized and up-to-date. diff --git a/docs/pages/index.mdx b/docs/pages/index.mdx index 0eba322cd..4bebef303 100644 --- a/docs/pages/index.mdx +++ b/docs/pages/index.mdx @@ -6,22 +6,22 @@ import GithubButtons from '../components/GithubButtons'; -R2R, short for RAG to Riches, provides the fastest and most efficient way to deliver high-quality Retrieval-Augmented Generation (RAG) to end users. The framework is built around customizable pipelines and a feature-rich FastAPI implementation. +R2R, short for RAG to Riches, provides the fastest and most featureful way for developers to deliver high-quality Retrieval-Augmented Generation (RAG) to end users. The framework ships with a REST API powered by Postgres and pgvector - it includes user-level and document-level management as well as advanced RAG features. + +## Why? + +R2R was conceived to help developers bridge the gap between local LLM experimentation and serving a scalable, production-ready application. Built with document management, observability and customization in mind, R2R provides adequate performance and features for most RAG use cases. ## Key Features -- **🔧 Build**: Use the framework to build arbitrary asynchronous pipelines. -- **🚀 Deploy**: Instantly launch production-ready asynchronous RAG pipelines with streaming capabilities. -- **🧩 Customize**: Tailor your multimodal pipeline with intuitive configuration files. -- **🔌 Extend**: Enhance your pipeline with custom code integrations. -- **🤖 OSS**: Benefit from a framework developed by the open-source community, designed to simplify RAG deployment. - -## Why did we build this framework? - -R2R was conceived to bridge the gap between local LLM experimentation and scalable production solutions. It is built with observability and customization in mind, ensuring that users can seamlessly transition from development to deployment. +- **🔧 Build**: Effortlessly create and manage observable, high-performance RAG pipelines with our robust framework. +- **🚀 Deploy**: Launch production-ready asynchronous RAG pipelines with seamless streaming capabilities +- **🧩 Customize**: Easily tailor your multimodal pipeline using intuitive configuration files to meet your specific needs. +- **🔌 Extend**: Enhance and extend your pipeline with custom code integrations to add new functionalities. +- **🤖 OSS**: Leverage a framework developed by the open-source community, ensuring flexibility, scalability, and ease of deployment. ## Demo(s) -The [R2R Demo](/getting-started/r2r-demo) provides a step by step outline to run the default R2R Retrieval-Augmented Generation (RAG) pipeline. The demo ingests the provided documents and then illustrates search and RAG functionality. +The [R2R Demo](/getting-started/r2r-demo) provides a step-by-step guide to running the default R2R Retrieval-Augmented Generation (RAG) backend. The demo ingests the provided documents and illustrates search and RAG functionality, logging, analytics, and document management. ## Getting Started diff --git a/docs/pages/tutorials/local_rag.mdx b/docs/pages/tutorials/local_rag.mdx index ce777a28b..25b6afa7b 100644 --- a/docs/pages/tutorials/local_rag.mdx +++ b/docs/pages/tutorials/local_rag.mdx @@ -1,20 +1,55 @@ ## Building a Local RAG System with R2R -### Introduction +### Installation -R2R, short for "RAG to Riches," is a game-changing framework that simplifies the process of building RAG applications with LLMs. With R2R, you can hit the ground running and have a system running locally in minutes, eliminating the need for complex cloud infrastructure or costly hosted services. +We can start by using `pip` to install R2R with the local-embeddding dependencies: -In this comprehensive, step-by-step tutorial, we'll guide you through the process of installing R2R, ingesting your documents, querying those docs using a local LLM, and tailoring the RAG pipeline to perfectly fit your unique requirements. By the end of this guide, you'll have a fully functional, locally-hosted, and readily deployable LLM application at your fingertips! - -### Setting up and Running R2R +```bash +pip install r2r[local-embedding] +``` R2R supports `Ollama`, a popular tool for Local LLM inference. Ollama is provided through a connection managed by the `litellm` library. -If you wish to use Ollama, it must be installed independently. You can install Ollama by following the instructions on their [official website](https://ollama.com/) or by referring to their [GitHub README](https://github.com/ollama/ollama). +Ollama must be installed independently. You can install Ollama by following the instructions on their [official website](https://ollama.com/) or by referring to their [GitHub README](https://github.com/ollama/ollama). + + +### Configuration + +Let's move on to setting up the R2R pipeline. R2R relies on a `config.json` file for defining various settings, such as embedding models and chunk sizes. By default, the `config.json` found in the R2R GitHub repository's root directory is set up for cloud-based services. + +For setting up an on-premises RAG system, we need to adjust the configuration to use local resources. For this example, this only involves changing the embedding provider as our default LLM provider integrates seamlessly with Ollama. + +To streamline this process, we've provided pre-configured local settings in the [`examples/configs`](https://github.com/SciPhi-AI/R2R/blob/main/r2r/examples/configs) directory, named `local_ollama`. We've included a printout of the config below for your convenience: + +```json +{ + "embedding": { + "provider": "sentence-transformers", + "search_model": "all-MiniLM-L6-v2", + "dimension": 384, + "batch_size": 32 + } +} +``` + +You may also modify the configuration defaults for ingestion, logging, and your vector database provider in a similar manner. More information on these settings are included in this tutorial and throughout the documentation. + +This chosen config modification above instructs R2R to use the `sentence-transformers` library for embeddings with the `all-MiniLM-L6-v2` model, turns off evals, and sets the LLM provider to `ollama`. During ingestion, the default is to split documents into chunks of 512 characters with 20 characters of overlap between chunks. + +A local vector database will be used to store the embeddings. The current default is a minimal sqlite implementation. + +### Server Standup + + +```bash +# cd $WORKDIR +python -m r2r.examples.servers.configurable_pipeline --host 0.0.0.0 --port 8000 --config local_ollama --pipeline_type qna +``` + +The server exposes a REST API for interacting with the R2R RAG pipeline and application. See the [API docs](/getting-started/app-api) for more details on the available endpoints. **Docker Installation**: Using Docker can be a convenient way to run R2R without having to manage dependencies and installations on your local machine. The Docker image comes pre-configured and ready to run. -**Local Installation**: If you prefer to run R2R directly on your local machine, you can install it and its dependencies manually. This approach may be preferred if you need more control over the installation or want to integrate R2R with other local tools and environments.
Docker @@ -50,80 +85,32 @@ This command starts the R2R container with the following options:
-
-Local Installation +## Ingesting and Embedding Documents -To begin running R2R for local inference, we first need to clone the R2R repository locally. This can be done with the command: - -```bash -git clone https://github.com/SciPhi-AI/R2R.git -``` - -Then you should navigate to the project directory: -```bash -cd R2R -``` - -And install the project dependencies using Poetry -```bash -poetry install -``` - -
- -### Pipeline Configuration - -Let's move on to setting up the R2R pipeline. R2R relies on a `config.json` file for defining various settings, such as embedding models and chunk sizes. By default, the `config.json` found in the R2R GitHub repository's root directory is set up for cloud-based services. - -For setting up an on-premises RAG system, we need to adjust the configuration to use local resources. This involves changing the embedding provider, selecting the appropriate LLM provider, and disabling evaluations. - -To streamline this process, we've provided pre-configured local settings in the [`examples/configs`](https://github.com/SciPhi-AI/R2R/blob/main/r2r/examples/configs) directory, named `local_ollama`. Here's an overview of the primary changes from the default configuration: - -```json -{ - "embedding": { - "provider": "sentence-transformers", - "search_model": "all-MiniLM-L6-v2", - "dimension": 384, - "batch_size": 32 - }, - "completions": { - "provider": "litellm" - }, - … -} -``` - -You may also modify the configuration defaults for ingestion, logging, and your vector database provider in a similar manner. More information on this follows below. - -This chosen config modification above instructs R2R to use the `sentence-transformers` library for embeddings with the `all-MiniLM-L6-v2` model, turns off evals, and sets the LLM provider to `ollama`. During ingestion, the default is to split documents into chunks of 512 characters with 20 characters of overlap between chunks. - -A local vector database will be used to store the embeddings. The current default is a minimal sqlite implementation. - -### Server Standup - -If you are using Docker, your server will be stood up upon running the Dockerfile. If you are developing locally, you can stand up the server by running: - -```bash -python -m r2r.examples.servers.configurable_pipeline --host 0.0.0.0 --port 8000 --config local_ollama --pipeline_type qna -``` - -The server exposes a basic API for interacting with the RAG pipeline. See the [API docs](/getting-started/app-api) for more details on other available endpoints. - -### Ingesting and Embedding Documents - -With our environment set up and our server running in a separate process, we're ready to ingest a document! As an example, R2R includes a biography on Aristotle as a text file. This file ships with the package and is included by default. +With our environment set up and our server running in a separate process, we're ready to ingest a document! As an example, R2R includes a several files such as Aristotle's wikipedia page in `.txt` an assortment of Paul Graham's essays in `.html` and a few 10ks in `.pdf`. This file ships with the package and is included by default. Run this command to ingest the document: ```bash -python -m r2r.examples.clients.run_qna_client ingest +poetry run python -m r2r.examples.demo ingest_as_files --no-images=true --base_url=http://0.0.0.0:8000 ``` The output should look something like this: ``` -> {'results': ["File '/Users/user/R2R/r2r/examples/clients/../data/aristotle.txt' processed successfully."]} +... +{'results': + [ + "File 'aristotle.txt' processed successfully.", + "File 'pg_essay_1.html' processed successfully.", + "File 'pg_essay_2.html' processed successfully.", + "File 'pg_essay_3.html' processed successfully.", + "File 'pg_essay_4.html' processed successfully.", + "File 'pg_essay_5.html' processed successfully.", + "File 'lyft_2021.pdf' processed successfully.", + "File 'uber_2021.pdf' processed successfully." + ] +} ``` Here's what's happening under the hood: @@ -132,7 +119,7 @@ Here's what's happening under the hood: 3. Each chunk is embedded using the `all-MiniLM-L6-v2` model from `sentence-transformers`. 4. The chunks and embeddings are stored in the specified vector database, which defaults to a local SQLite database. -With just one command, we've gone from a raw document to an embedded knowledge base we can query. In addition to the raw chunks, metadata such as user ID or document ID can be attached to enable easy filtering later. +With just one command, we've gone from a raw document to an embedded knowledge base we can query. In addition to the raw chunks, metadata such as user ID or document ID can be attached to enable easy filtering later. For more information on similar functionalities provided by the demo, refer to the [R2R Demo](/getting-started/r2r-demo). ### Running Queries on the Local LLM @@ -147,9 +134,9 @@ ollama serve llama2 Then, to ask a question, run: ```bash -python -m r2r.examples.clients.run_qna_client rag_completion \ +python -m r2r.examples.demo rag \ --query="What contributions did Aristotle make to biology?" \ - --model="ollama/llama2" + --rag_generation_config='{"model": "ollama/llama2"}' ``` This command tells R2R to use the specified model to generate a completion for the given query. R2R will: diff --git a/docs/public/swagger.json b/docs/public/swagger.json index ba769d44b..8eb676aa8 100644 --- a/docs/public/swagger.json +++ b/docs/public/swagger.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/ingest_documents":{"post":{"summary":"Ingest Documents ","operationId":"ingest_documents_app_ingest_documents_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestDocumentsRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/ingest_files":{"post":{"summary":"Ingest Files ","description":"Ingest files into the system.","operationId":"ingest_files_app_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/update_documents":{"post":{"summary":"Update Documents ","operationId":"update_documents_app_update_documents_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentsRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/update_files=":{"post":{"summary":"Update Files ","operationId":"update_files_app_update_files__post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_update_files__post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/search":{"post":{"summary":"Search ","operationId":"search_app_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rag":{"post":{"summary":"Rag ","operationId":"rag_app_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RAGRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/evaluate":{"post":{"summary":"Evaluate ","operationId":"evaluate_app_evaluate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvalRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/delete":{"delete":{"summary":"Delete ","operationId":"delete_app_delete_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/get_document_data":{"get":{"summary":"Get Document Data ","operationId":"get_document_data_app_get_document_data_get","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DocumentDataRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/get_user_ids":{"get":{"summary":"Get User Ids ","operationId":"get_user_ids_app_get_user_ids_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/get_user_documents_metadata":{"post":{"summary":"Get User Documents Metadata ","operationId":"get_user_documents_metadata_app_get_user_documents_metadata_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UserDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/get_logs":{"post":{"summary":"Get Logs ","operationId":"get_logs_app_get_logs_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/LogsRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/get_open_api_endpoint":{"get":{"summary":"Get Open Api Endpoint","operationId":"get_open_api_endpoint_get_open_api_endpoint_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"Body_ingest_files_app_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files"},"metadatas":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Metadatas"},"ids":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Ids"}},"type":"object","required":["files"],"title":"Body_ingest_files_app_ingest_files_post"},"Body_update_files_app_update_files__post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files"},"metadatas":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Metadatas"},"ids":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Ids"}},"type":"object","required":["files"],"title":"Body_update_files_app_update_files__post"},"DeleteRequest":{"properties":{"keys":{"items":{"type":"string"},"type":"array","title":"Keys"},"values":{"items":{"anyOf":[{"type":"boolean"},{"type":"integer"},{"type":"string"}]},"type":"array","title":"Values"}},"type":"object","required":["keys","values"],"title":"DeleteRequest"},"Document":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"type":{"$ref":"#/components/schemas/DocumentType"},"data":{"anyOf":[{"type":"string"},{"type":"string","format":"binary"}],"title":"Data"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","type","data","metadata"],"title":"Document","description":"A document that has been stored in the system."},"DocumentDataRequest":{"properties":{"document_id":{"type":"string","title":"Document Id"}},"type":"object","required":["document_id"],"title":"DocumentDataRequest"},"DocumentType":{"type":"string","enum":["csv","docx","html","json","md","pdf","pptx","txt","xlsx","gif","png","jpg","jpeg","svg","mp3","mp4"],"title":"DocumentType","description":"Types of documents that can be stored."},"EvalRequest":{"properties":{"query":{"type":"string","title":"Query"},"context":{"type":"string","title":"Context"},"completion":{"type":"string","title":"Completion"}},"type":"object","required":["query","context","completion"],"title":"EvalRequest"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"IngestDocumentsRequest":{"properties":{"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents"}},"type":"object","required":["documents"],"title":"IngestDocumentsRequest"},"LogsRequest":{"properties":{"log_type_filter":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Log Type Filter"}},"type":"object","title":"LogsRequest"},"RAGRequest":{"properties":{"message":{"type":"string","title":"Message"},"search_filters":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Search Filters"},"search_limit":{"type":"integer","title":"Search Limit","default":10},"rag_generation_config":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rag Generation Config"},"streaming":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Streaming"}},"type":"object","required":["message"],"title":"RAGRequest"},"SearchRequest":{"properties":{"query":{"type":"string","title":"Query"},"search_filters":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Search Filters"},"search_limit":{"type":"integer","title":"Search Limit","default":10}},"type":"object","required":["query"],"title":"SearchRequest"},"UpdateDocumentsRequest":{"properties":{"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents"}},"type":"object","required":["documents"],"title":"UpdateDocumentsRequest"},"UserDocumentRequest":{"properties":{"user_id":{"type":"string","title":"User Id"}},"type":"object","required":["user_id"],"title":"UserDocumentRequest"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/update_prompt":{"post":{"summary":"Update Prompt","description":"Update a prompt's template and/or input types.","operationId":"update_prompt_app_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdatePromptRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/ingest_documents":{"post":{"summary":"Ingest Documents","operationId":"ingest_documents_app_ingest_documents_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestDocumentsRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/update_documents":{"post":{"summary":"Update Documents","operationId":"update_documents_app_update_documents_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentsRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/ingest_files":{"post":{"summary":"Ingest Files","description":"Ingest files into the system.","operationId":"ingest_files_app_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/update_files":{"post":{"summary":"Update Files","operationId":"update_files_app_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/search":{"post":{"summary":"Search","operationId":"search_app_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rag":{"post":{"summary":"Rag","operationId":"rag_app_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RAGRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/evaluate":{"post":{"summary":"Evaluate","operationId":"evaluate_app_evaluate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EvalRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/logs":{"get":{"summary":"Logs","operationId":"logs_app_logs_get","parameters":[{"name":"log_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Log Type Filter"}},{"name":"max_runs_requested","in":"query","required":false,"schema":{"type":"integer","default":100,"title":"Max Runs Requested"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/analytics":{"post":{"summary":"Analytics","operationId":"analytics_app_analytics_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_analytics_app_analytics_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/users_stats":{"get":{"summary":"Users Stats","operationId":"users_stats_app_users_stats_get","parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string","format":"uuid"}},{"type":"null"}],"title":"User Ids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/documents_info":{"get":{"summary":"Documents Info","operationId":"documents_info_app_documents_info_get","parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Document Ids"}},{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"User Ids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/delete":{"delete":{"summary":"Delete","operationId":"delete_app_delete_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/app_settings":{"get":{"summary":"App Settings","description":"Return the config.json and all prompts.","operationId":"app_settings_app_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_app_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AnalysisTypes":{"properties":{"analysis_types":{"anyOf":[{"additionalProperties":{"items":{"type":"string"},"type":"array"},"type":"object"},{"type":"null"}],"title":"Analysis Types"}},"type":"object","title":"AnalysisTypes"},"Body_analytics_app_analytics_post":{"properties":{"filter_criteria":{"$ref":"#/components/schemas/FilterCriteria"},"analysis_types":{"$ref":"#/components/schemas/AnalysisTypes"}},"type":"object","required":["filter_criteria","analysis_types"],"title":"Body_analytics_app_analytics_post"},"Body_ingest_files_app_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files"},"metadatas":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Metadatas"},"ids":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Ids"},"user_ids":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User Ids"}},"type":"object","required":["files"],"title":"Body_ingest_files_app_ingest_files_post"},"Body_update_files_app_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files"},"metadatas":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Metadatas"},"ids":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Ids"}},"type":"object","required":["files"],"title":"Body_update_files_app_update_files_post"},"DeleteRequest":{"properties":{"keys":{"items":{"type":"string"},"type":"array","title":"Keys"},"values":{"items":{"anyOf":[{"type":"boolean"},{"type":"integer"},{"type":"string"}]},"type":"array","title":"Values"}},"type":"object","required":["keys","values"],"title":"DeleteRequest"},"Document":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"type":{"$ref":"#/components/schemas/DocumentType"},"data":{"anyOf":[{"type":"string"},{"type":"string","format":"binary"}],"title":"Data"},"metadata":{"type":"object","title":"Metadata"},"title":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Title"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["id","type","data","metadata"],"title":"Document","description":"A document that has been stored in the system."},"DocumentType":{"type":"string","enum":["csv","docx","html","json","md","pdf","pptx","txt","xlsx","gif","png","jpg","jpeg","svg","mp3","mp4"],"title":"DocumentType","description":"Types of documents that can be stored."},"EvalRequest":{"properties":{"query":{"type":"string","title":"Query"},"context":{"type":"string","title":"Context"},"completion":{"type":"string","title":"Completion"}},"type":"object","required":["query","context","completion"],"title":"EvalRequest"},"FilterCriteria":{"properties":{"filters":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Filters"}},"type":"object","title":"FilterCriteria"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"IngestDocumentsRequest":{"properties":{"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents"}},"type":"object","required":["documents"],"title":"IngestDocumentsRequest"},"RAGRequest":{"properties":{"message":{"type":"string","title":"Message"},"search_filters":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Search Filters"},"search_limit":{"type":"integer","title":"Search Limit","default":10},"rag_generation_config":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rag Generation Config"},"streaming":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Streaming"}},"type":"object","required":["message"],"title":"RAGRequest"},"SearchRequest":{"properties":{"query":{"type":"string","title":"Query"},"search_filters":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Search Filters"},"search_limit":{"type":"integer","title":"Search Limit","default":10}},"type":"object","required":["query"],"title":"SearchRequest"},"UpdateDocumentsRequest":{"properties":{"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents"}},"type":"object","required":["documents"],"title":"UpdateDocumentsRequest"},"UpdatePromptRequest":{"properties":{"name":{"type":"string","title":"Name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types"}},"type":"object","required":["name"],"title":"UpdatePromptRequest"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}}} \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index fc3a287ac..954b1f2e2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -479,6 +479,17 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" +[[package]] +name = "decorator" +version = "4.4.2" +description = "Decorators for Humans" +optional = true +python-versions = ">=2.6, !=3.0.*, !=3.1.*" +files = [ + {file = "decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760"}, + {file = "decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"}, +] + [[package]] name = "deprecated" version = "1.2.14" @@ -946,6 +957,56 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] +[[package]] +name = "imageio" +version = "2.34.1" +description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." +optional = true +python-versions = ">=3.8" +files = [ + {file = "imageio-2.34.1-py3-none-any.whl", hash = "sha256:408c1d4d62f72c9e8347e7d1ca9bc11d8673328af3913868db3b828e28b40a4c"}, + {file = "imageio-2.34.1.tar.gz", hash = "sha256:f13eb76e4922f936ac4a7fec77ce8a783e63b93543d4ea3e40793a6cabd9ac7d"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=8.3.2" + +[package.extras] +all-plugins = ["astropy", "av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"] +all-plugins-pypy = ["av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"] +build = ["wheel"] +dev = ["black", "flake8", "fsspec[github]", "pytest", "pytest-cov"] +docs = ["numpydoc", "pydata-sphinx-theme", "sphinx (<6)"] +ffmpeg = ["imageio-ffmpeg", "psutil"] +fits = ["astropy"] +full = ["astropy", "av", "black", "flake8", "fsspec[github]", "gdal", "imageio-ffmpeg", "itk", "numpydoc", "pillow-heif", "psutil", "pydata-sphinx-theme", "pytest", "pytest-cov", "sphinx (<6)", "tifffile", "wheel"] +gdal = ["gdal"] +itk = ["itk"] +linting = ["black", "flake8"] +pillow-heif = ["pillow-heif"] +pyav = ["av"] +test = ["fsspec[github]", "pytest", "pytest-cov"] +tifffile = ["tifffile"] + +[[package]] +name = "imageio-ffmpeg" +version = "0.5.1" +description = "FFMPEG wrapper for Python" +optional = true +python-versions = ">=3.5" +files = [ + {file = "imageio-ffmpeg-0.5.1.tar.gz", hash = "sha256:0ed7a9b31f560b0c9d929c5291cd430edeb9bed3ce9a497480e536dd4326484c"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:1460e84712b9d06910c1f7bb524096b0341d4b7844cea6c20e099d0a24e795b1"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-manylinux2010_x86_64.whl", hash = "sha256:5289f75c7f755b499653f3209fea4efd1430cba0e39831c381aad2d458f7a316"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7fa9132a291d5eb28c44553550deb40cbdab831f2a614e55360301a6582eb205"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-win32.whl", hash = "sha256:89efe2c79979d8174ba8476deb7f74d74c331caee3fb2b65ba2883bec0737625"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-win_amd64.whl", hash = "sha256:1521e79e253bedbdd36a547e0cbd94a025ba0b558e17f08fea687d805a0e4698"}, +] + +[package.dependencies] +setuptools = "*" + [[package]] name = "importlib-metadata" version = "7.1.0" @@ -1395,6 +1456,30 @@ files = [ intel-openmp = "==2021.*" tbb = "==2021.*" +[[package]] +name = "moviepy" +version = "1.0.3" +description = "Video editing with Python" +optional = true +python-versions = "*" +files = [ + {file = "moviepy-1.0.3.tar.gz", hash = "sha256:2884e35d1788077db3ff89e763c5ba7bfddbd7ae9108c9bc809e7ba58fa433f5"}, +] + +[package.dependencies] +decorator = ">=4.0.2,<5.0" +imageio = {version = ">=2.5,<3.0", markers = "python_version >= \"3.4\""} +imageio_ffmpeg = {version = ">=0.2.0", markers = "python_version >= \"3.4\""} +numpy = {version = ">=1.17.3", markers = "python_version > \"2.7\""} +proglog = "<=1.0.0" +requests = ">=2.8.1,<3.0" +tqdm = ">=4.11.2,<5.0" + +[package.extras] +doc = ["Sphinx (>=1.5.2,<2.0)", "numpydoc (>=0.6.0,<1.0)", "pygame (>=1.9.3,<2.0)", "sphinx_rtd_theme (>=0.1.10b0,<1.0)"] +optional = ["matplotlib (>=2.0.0,<3.0)", "opencv-python (>=3.0,<4.0)", "scikit-image (>=0.13.0,<1.0)", "scikit-learn", "scipy (>=0.19.0,<1.5)", "youtube_dl"] +test = ["coverage (<5.0)", "coveralls (>=1.1,<2.0)", "pytest (>=3.0.0,<4.0)", "pytest-cov (>=2.5.1,<3.0)", "requests (>=2.8.1,<3.0)"] + [[package]] name = "mpmath" version = "1.3.0" @@ -1808,6 +1893,32 @@ typing-extensions = ">=4.7,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +[[package]] +name = "opencv-python" +version = "4.10.0.82" +description = "Wrapper package for OpenCV python bindings." +optional = true +python-versions = ">=3.6" +files = [ + {file = "opencv-python-4.10.0.82.tar.gz", hash = "sha256:dbc021eaa310c4145c47cd648cb973db69bb5780d6e635386cd53d3ea76bd2d5"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:5f78652339957ec24b80a782becfb32f822d2008a865512121fad8c3ce233e9a"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:e6be19a0615aa8c4e0d34e0c7b133e26e386f4b7e9b557b69479104ab2c133ec"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b49e530f7fd86f671514b39ffacdf5d14ceb073bc79d0de46bbb6b0cad78eaf"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955c5ce8ac90c9e4636ad7f5c0d9c75b80abbe347182cfd09b0e3eec6e50472c"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-win32.whl", hash = "sha256:ff54adc9e4daaf438e669664af08bec4a268c7b7356079338b8e4fae03810f2c"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-win_amd64.whl", hash = "sha256:2e3c2557b176f1e528417520a52c0600a92c1bb1c359f3df8e6411ab4293f065"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, +] + [[package]] name = "openpyxl" version = "3.1.3" @@ -1974,6 +2085,20 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "proglog" +version = "0.1.10" +description = "Log and progress bar manager for console, notebooks, web..." +optional = true +python-versions = "*" +files = [ + {file = "proglog-0.1.10-py3-none-any.whl", hash = "sha256:19d5da037e8c813da480b741e3fa71fb1ac0a5b02bf21c41577c7f327485ec50"}, + {file = "proglog-0.1.10.tar.gz", hash = "sha256:658c28c9c82e4caeb2f25f488fff9ceace22f8d69b15d0c1c86d64275e4ddab4"}, +] + +[package.dependencies] +tqdm = "*" + [[package]] name = "psycopg2-binary" version = "2.9.9" @@ -3629,10 +3754,11 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [extras] all = ["sentence-transformers", "tiktoken"] exa = ["exa-py"] +ingest-movies = ["moviepy", "opencv-python"] ionic = ["ionic-api-sdk"] local-embedding = ["sentence-transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "67316bbbefaa07e95fe6b3eb12257e1fba97c0df6305342c7f01be03e6217f68" +content-hash = "1dabb50143bd4093ea5ad020f37febe543ca4505733a06d0398eff566f1385d4" diff --git a/pyproject.toml b/pyproject.toml index fa1e43ef9..c2310dea9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,8 @@ pypdf = "^4.2.0" python-pptx = "^0.6.23" python-docx = "^1.1.0" nest-asyncio = "^1.6.0" +opencv-python = { version = "^4.10.0.82", optional = true } +moviepy = { version = "^1.0.3", optional = true } # embedding providers tiktoken = {version = "^0.5.2", optional = true} @@ -64,7 +66,7 @@ all = ["tiktoken", "sentence-transformers"] exa = ["exa-py"] ionic = ["ionic-api-sdk"] local-embedding = ["sentence-transformers"] - +ingest-movies = ["moviepy", "opencv-python"] [tool.poetry.group.dev.dependencies] black = "^24.3.0" flake8 = "6.1.0" diff --git a/r2r/core/__init__.py b/r2r/core/__init__.py index 50e571b22..a8617f0d2 100644 --- a/r2r/core/__init__.py +++ b/r2r/core/__init__.py @@ -96,9 +96,10 @@ __all__ = [ "LoggableAsyncPipe", "Prompt", "DataType", + "DocumentInfo", "DocumentType", "Document", - "DocumentInfo", + "", "Extraction", "ExtractionType", "Fragment", diff --git a/r2r/core/providers/vector_db_provider.py b/r2r/core/providers/vector_db_provider.py index 5855b630f..2b4891a19 100644 --- a/r2r/core/providers/vector_db_provider.py +++ b/r2r/core/providers/vector_db_provider.py @@ -116,4 +116,4 @@ class VectorDBProvider(Provider, ABC): @abstractmethod def get_users_stats(self, user_ids: Optional[list[str]] = None) -> dict: - pass + pass \ No newline at end of file diff --git a/r2r/examples/clients/run_qna_client.py b/r2r/examples/clients/run_qna_client.py deleted file mode 100644 index 0c7868f36..000000000 --- a/r2r/examples/clients/run_qna_client.py +++ /dev/null @@ -1,94 +0,0 @@ -import asyncio -import os -from typing import Optional - -import fire - -from r2r import R2RClient, generate_id_from_label - - -class QuestionAndAnswerClient: - def __init__( - self, - base_url: str = "http://localhost:8000", - user_id: Optional[str] = None, - ): - self.client = R2RClient(base_url="http://localhost:8000") - self.user_id = user_id or str(generate_id_from_label("user_id")) - - root_path = os.path.dirname(os.path.abspath(__file__)) - self.default_files = [ - os.path.join(root_path, "..", "data", "aristotle.txt"), - # Add more files here - ] - - def ingest(self, file_paths: Optional[list[str]] = None): - file_paths = file_paths or self.default_files - - ids = [ - str(generate_id_from_label(file_path.split(os.path.sep)[-1])) - for file_path in file_paths - ] - - metadatas = [ - { - "title": file_path.split(os.path.sep)[-1], - "user_id": self.user_id, - } - for file_path in file_paths - ] - - response = self.client.ingest_files( - metadatas=metadatas, files=file_paths, ids=ids - ) - print(response) - - def search(self, query: str): - results = self.client.search( - query, search_filters={"user_id": self.user_id} - ) - for result in results["results"]: - print(result) - - def rag_completion(self, query: str, model: str = "ollama/llama2"): - rag_generation_config = { - "model": model, - } - - response = self.client.rag( - message=query, - search_filters={"user_id": self.user_id}, - rag_generation_config=rag_generation_config, - streaming=False, - ) - print(response) - - def rag_completion_streaming( - self, query: str, model: str = "ollama/llama2" - ): - rag_generation_config = {"model": model} - - response = self.client.rag( - message=query, - search_filters={"user_id": self.user_id}, - rag_generation_config=rag_generation_config, - streaming=True, - ) - - async def _stream_response(): - async for chunk in response: - print(chunk, end="", flush=True) - - asyncio.run(_stream_response()) - - def delete(self, document_id: str): - response = self.client.delete(["document_id"], [document_id]) - print(response) - - def get_logs(self, pipeline_type: Optional[str] = None): - response = self.client.get_logs(pipeline_type) - print(response) - - -if __name__ == "__main__": - fire.Fire(QuestionAndAnswerClient) diff --git a/r2r/examples/configs/local_ollama.json b/r2r/examples/configs/local_ollama.json index e6954e10a..0e6d474dc 100644 --- a/r2r/examples/configs/local_ollama.json +++ b/r2r/examples/configs/local_ollama.json @@ -1,36 +1,8 @@ { - "embedding": { - "provider": "sentence-transformers", - "search_model": "all-MiniLM-L6-v2", - "dimension": 384, - "batch_size": 32 - }, - "completions": { - "provider": "litellm" - }, - "eval": { - "provider": "local", - "frequency": 0.0, - "llm":{ - "provider": "litellm" - } - }, - "logging_database": { - "provider": "local", - "collection_name": "demo_logs", - "level": "INFO" - }, - "ingestion":{ - "selected_parsers": { - "txt": "default" - } - }, - "vector_database": { - "provider": "local", - "collection_name": "demo_vecs" - }, - "app": { - "max_logs": 100, - "max_file_size_in_mb": 100 - } - } \ No newline at end of file + "embedding": { + "provider": "sentence-transformers", + "search_model": "all-MiniLM-L6-v2", + "dimension": 384, + "batch_size": 32 + } +} \ No newline at end of file diff --git a/r2r/examples/demo.py b/r2r/examples/demo.py index 10414ef2b..9c6a6fbb2 100644 --- a/r2r/examples/demo.py +++ b/r2r/examples/demo.py @@ -137,8 +137,16 @@ class R2RDemo: print(f"Time taken to update documents: {t1-t0:.2f} seconds") print(response) - def ingest_as_files(self, file_paths: Optional[list[str]] = None): + def ingest_as_files(self, file_paths: Optional[list[str]] = None, no_images=False): file_paths = file_paths or self.default_files + + if no_images: + file_paths = [ + file_path + for file_path in file_paths + if file_path.split(".")[-1] != "png" + ] + ids = [ generate_id_from_label(file_path.split(os.path.sep)[-1]) for file_path in file_paths diff --git a/r2r/main/r2r_app.py b/r2r/main/r2r_app.py index 6de72aae0..64b0f6d0e 100644 --- a/r2r/main/r2r_app.py +++ b/r2r/main/r2r_app.py @@ -235,7 +235,7 @@ class R2RApp(metaclass=AsyncSyncMeta): methods=["GET"], ) self.app.add_api_route( - path="/open_api_spec", + path="/openapi_spec", endpoint=self.openapi_spec_app, methods=["GET"], ) @@ -561,13 +561,6 @@ class R2RApp(metaclass=AsyncSyncMeta): value=str(e), is_info_log=False, ) - - await self.ingestion_pipeline.pipe_logger.log( - log_id=run_id, - key="error", - value=str(e), - is_info_log=False, - ) raise HTTPException(status_code=500, detail=str(e)) from e @syncable @@ -775,13 +768,6 @@ class R2RApp(metaclass=AsyncSyncMeta): value=str(e), is_info_log=False, ) - - await self.search_pipeline.pipe_logger.log( - log_id=run_id, - key="error", - value=str(e), - is_info_log=False, - ) raise HTTPException(status_code=500, detail=str(e)) from e @syncable @@ -996,13 +982,6 @@ class R2RApp(metaclass=AsyncSyncMeta): value=str(e), is_info_log=False, ) - - await self.eval_pipeline.pipe_logger.log( - log_id=run_id, - key="error", - value=str(e), - is_info_log=False, - ) raise HTTPException(status_code=500, detail=str(e)) from e @syncable @@ -1298,4 +1277,4 @@ class R2RApp(metaclass=AsyncSyncMeta): allow_credentials=True, allow_methods=["*"], # Allows all methods allow_headers=["*"], # Allows all headers - ) + ) \ No newline at end of file diff --git a/r2r/main/r2r_client.py b/r2r/main/r2r_client.py index a45e96334..9be828bc4 100644 --- a/r2r/main/r2r_client.py +++ b/r2r/main/r2r_client.py @@ -305,3 +305,25 @@ class R2RClient: response = requests.get(url, params=params) response.raise_for_status() return response.json() + + def get_documents_info( + self, document_ids: Optional[str] = None, user_ids: Optional[str] = None + ) -> dict: + url = f"{self.base_url}/documents_info" + params = {} + params["document_ids"] = ( + json.dumps(document_ids) if document_ids else None + ) + params["user_ids"] = json.dumps(user_ids) if user_ids else None + response = requests.get(url, params=params) + response.raise_for_status() + return response.json() + + def users_stats(self, user_ids: Optional[list[str]] = None) -> dict: + url = f"{self.base_url}/users_stats" + params = {} + if user_ids is not None: + params["user_ids"] = ",".join(user_ids) + response = requests.get(url, params=params) + response.raise_for_status() + return response.json() diff --git a/r2r/vector_dbs/pgvector/pgvector_db.py b/r2r/vector_dbs/pgvector/pgvector_db.py index 4d1848f83..c1293c842 100644 --- a/r2r/vector_dbs/pgvector/pgvector_db.py +++ b/r2r/vector_dbs/pgvector/pgvector_db.py @@ -192,19 +192,6 @@ class PGVectorDB(VectorDBProvider): } ) - def delete_document_info_by_metadata( - self, metadata_fields: str, metadata_values: Union[bool, int, str] - ) -> None: - filters = {k: v for k, v in zip(metadata_fields, metadata_values)} - query = text( - f""" - DELETE FROM document_info WHERE {" AND ".join([f"{k} = :{k}" for k in filters.keys()])}; - """ - ) - with self.vx.Session() as sess: - with sess.begin(): - sess.execute(query, filters) - def get_metadatas( self, metadata_fields: list[str],