diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py index 9bbd297..065bae8 100644 --- a/bootstraprag/cli.py +++ b/bootstraprag/cli.py @@ -38,6 +38,8 @@ def create(project_name, framework, template, observability): 'rag-with-controllable-agents', 'rag-with-llama-parse', 'rag-with-adjacent-context', + 'rag-with-sub-question-query-engine', + 'rag-with-recursive-retriever', 'rag-with-citation', 'agents-with-introspection', 'llama-deploy-with-simplemq', diff --git a/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/readme.md b/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/readme.md index e69de29..bb0dad4 100644 --- a/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/readme.md @@ -0,0 +1,17 @@ +### Recursive Retrieve Agents +In LlamaIndex, the Recursive Retriever is a specialized component designed to enhance information retrieval by navigating through interconnected nodes within a document or across multiple documents. Unlike traditional retrieval methods that fetch information based solely on direct relevance, the Recursive Retriever delves deeper into the relationships between data points, allowing for a more comprehensive extraction of pertinent information. + +### How to run? +`pip install -r requirements.txt` +`python main.py` + +### How to expose as API? +`python api_server.py` +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "explain mlops architecture" +} +``` \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/recursive_retriever_agents_core.py b/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/recursive_retriever_agents_core.py index fb89449..343ed04 100644 --- a/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/recursive_retriever_agents_core.py +++ b/bootstraprag/templates/llamaindex/rag_with_recursive_retriever/recursive_retriever_agents_core.py @@ -27,6 +27,7 @@ def __init__(self, agent_names: List[str], data_dir: str = 'data'): self.document_data = {} self.agents = {} self.query_engine = None + self.client: qdrant_client.QdrantClient = None # Load environment variables load_dotenv(find_dotenv()) @@ -60,6 +61,7 @@ def _setup_vector_store(self): url=os.environ['DB_URL'], api_key=os.environ['DB_API_KEY'] ) + self.client = client self.vector_store = QdrantVectorStore( client=client, collection_name=os.environ['COLLECTION_NAME'] @@ -91,11 +93,16 @@ def _create_query_engine_tools(self, agent_name: str, vector_index: VectorStoreI def _build_agents(self): """Build agents with their respective tools.""" for agent_name in self.agent_names: - # Build indices - vector_index = VectorStoreIndex.from_documents( - self.document_data[agent_name], - storage_context=self.storage_context - ) + + if not self.client.collection_exists(collection_name=os.environ.get("COLLECTION_NAME")): + # Build indices + vector_index = VectorStoreIndex.from_documents( + self.document_data[agent_name], + storage_context=self.storage_context + ) + else: + vector_index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store) + summary_index = SummaryIndex.from_documents( self.document_data[agent_name], storage_context=self.storage_context diff --git a/bootstraprag/templates/llamaindex/rag_with_recursive_retriever_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_recursive_retriever_with_observability/readme.md index e69de29..53edcd7 100644 --- a/bootstraprag/templates/llamaindex/rag_with_recursive_retriever_with_observability/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_recursive_retriever_with_observability/readme.md @@ -0,0 +1,24 @@ +### Recursive Retrieve Agents +In LlamaIndex, the Recursive Retriever is a specialized component designed to enhance information retrieval by navigating through interconnected nodes within a document or across multiple documents. Unlike traditional retrieval methods that fetch information based solely on direct relevance, the Recursive Retriever delves deeper into the relationships between data points, allowing for a more comprehensive extraction of pertinent information. + +### How to run? +`pip install -r requirements.txt` +`python main.py` + +### How to expose as API? +`python api_server.py` +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "explain mlops architecture" +} +``` + +#### How to spin observability +- run `docker compose -f docker-compose-langfuse.yml up` +- launch langfuse in browser `http://localhost:3000` +- click on `signup` +- create `organization` & `project` +- once done create your `public` and `private` api keys diff --git a/bootstraprag/templates/llamaindex/rag_with_self_correction_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_self_correction_with_observability/readme.md index 8208515..b0ad459 100644 --- a/bootstraprag/templates/llamaindex/rag_with_self_correction_with_observability/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_self_correction_with_observability/readme.md @@ -5,7 +5,7 @@ - click on `signup` - create `organization` & `project` - once done create your `public` and `private` api keys -- + #### Instructions to run the code - Navigate to the root of the project and run the below command - `pip install -r requirements.txt` diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/.env b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/.env new file mode 100644 index 0000000..76f59d6 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/.env @@ -0,0 +1,29 @@ +DB_URL='http://localhost:6333' +DB_API_KEY='th3s3cr3tk3y' +COLLECTION_NAME='SUB_QUESTION_COLLECTION' + +OPENAI_API_KEY='sk-proj-' +OPENAI_EMBED_MODEL='gpt-4o' + +# use this incase you are prefering to experiment with local models. +OLLAMA_BASE_URL='http://localhost:11434' +OLLAMA_LLM_MODEL='llama3.1' +OLLAMA_EMBED_MODEL='nomic-embed-text:latest' + +CHUNK_SIZE=128 +CHUNK_OVERLAP=20 + +# logger can be controlled usiing env +CRITICAL = 50 +FATAL = 50 +ERROR = 40 +WARNING = 30 +WARN = 30 +INFO = 20 +DEBUG = 10 +NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 + +IS_EVALUATION_NEEDED=true diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/api_server.py b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/api_server.py index e69de29..ca72e19 100644 --- a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/api_server.py +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/api_server.py @@ -0,0 +1,29 @@ +from dotenv import load_dotenv, find_dotenv +from sub_question_query_engine import SubQuestionQueryEngineAgent +import litserve as lit +import os + + +class SubQuestionQueryAPI(lit.LitAPI): + def __init__(self): + load_dotenv(find_dotenv()) + self.engine = None + + def setup(self, device): + self.engine = SubQuestionQueryEngineAgent() + + def decode_request(self, request, **kwargs): + return request['query'] + + def predict(self, x, **kwargs): + return self.engine.query(x) + + def encode_response(self, output, **kwargs): + return {'Agent': output} + + +if __name__ == "__main__": + lit_api = SubQuestionQueryAPI() + server = lit.LitServer(lit_api=lit_api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/data/orthodontics.pdf b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/data/orthodontics.pdf new file mode 100644 index 0000000..11dda80 Binary files /dev/null and b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/data/orthodontics.pdf differ diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/main.py b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/main.py new file mode 100644 index 0000000..5b1fd72 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/main.py @@ -0,0 +1,45 @@ +from sub_question_query_engine import SubQuestionQueryEngineAgent + + +def print_welcome_message(): + print("\n=== Orthodontics Query System ===") + print("Type your question and press Enter") + print("Type 'quit' to exit the program") + print("================================\n") + + +if __name__ == "__main__": + # Initialize the engine + engine = SubQuestionQueryEngineAgent() + + # Load and index documents + print("Initializing the system... Please wait...") + engine.load_and_index_documents() + + # Display welcome message + print_welcome_message() + + while True: + try: + # Get user input + user_question = input("\nEnter your question: ").strip() + + # Check for quit command + if user_question.lower() == 'quit': + print("\nThank you for using the system. Goodbye!") + break + + # Skip empty questions + if not user_question: + print("Please enter a valid question.") + continue + + # Execute query and print response + print("\nProcessing your question...\n") + response = engine.query(user_question) + print("\nResponse:", response) + print("\n" + "-" * 50) # Separator line + + except Exception as e: + print(f"\nAn error occurred: {str(e)}") + print("Please try again with a different question.") diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/readme.md b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/readme.md index e69de29..46c6fb1 100644 --- a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/readme.md @@ -0,0 +1,17 @@ +### Sub Question Query Engine +The Sub-Question Query Engine in LlamaIndex is designed to handle complex queries that require information from multiple data sources. It operates by decomposing a complex query into several sub-questions, each directed to the most relevant data source. After obtaining responses to these sub-questions, it synthesizes them into a comprehensive final answer. + +### How to run? +`pip install -r requirements.txt` +`python main.py` + +### How to expose as API? +`python api_server.py` +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "Explain vertical plane in orthodontics" +} +``` \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/requirements.txt index b130564..35d158d 100644 --- a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/requirements.txt +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/requirements.txt @@ -1 +1,6 @@ -llamaindex \ No newline at end of file +llamaindex +llama-index-llms-ollama +llama-index-llms-openai +llama-index-agent-openai +llama-index-vector-stores-qdrant +qdrant-client \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/sub_question_query_engine.py b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/sub_question_query_engine.py index e69de29..6984191 100644 --- a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/sub_question_query_engine.py +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine/sub_question_query_engine.py @@ -0,0 +1,93 @@ +from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext +from llama_index.core.tools import QueryEngineTool, ToolMetadata +from llama_index.core.query_engine import SubQuestionQueryEngine +from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler +from llama_index.core import Settings +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.llms.openai import OpenAI +from llama_index.vector_stores.qdrant import QdrantVectorStore +from dotenv import load_dotenv, find_dotenv +import qdrant_client +import os + + +class SubQuestionQueryEngineAgent: + def __init__(self): + # Load environment variables + load_dotenv(find_dotenv()) + + # Initialize debug handler and settings + self._setup_settings() + + # Initialize Qdrant clients + self.client = qdrant_client.QdrantClient( + url=os.environ['DB_URL'], + api_key=os.environ['DB_API_KEY'] + ) + self.aclient = qdrant_client.AsyncQdrantClient( + url=os.environ['DB_URL'], + api_key=os.environ['DB_API_KEY'] + ) + + # Initialize query engine + self.query_engine = None + + def _setup_settings(self): + """Configure LlamaIndex settings""" + llama_debug = LlamaDebugHandler(print_trace_on_end=True) + callback_manager = CallbackManager([llama_debug]) + + Settings.callback_manager = callback_manager + Settings.llm = OpenAI( + model=os.environ.get("OPENAI_EMBED_MODEL"), + temperature=0.0 + ) + Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") + Settings.chunk_size = int(os.environ.get("CHUNK_SIZE")) + Settings.chunk_overlap = int(os.environ.get("CHUNK_OVERLAP")) + + def load_and_index_documents(self, input_dir="data"): + """Load documents and create vector store index""" + # Load documents + orthodontics_docs = SimpleDirectoryReader(input_dir=input_dir).load_data( + show_progress=True + ) + + # Setup vector store and index + vector_store = QdrantVectorStore( + client=self.client, + aclient=self.aclient, + collection_name=os.environ.get("COLLECTION_NAME") + ) + storage_context = StorageContext.from_defaults(vector_store=vector_store) + if not self.client.collection_exists(collection_name=os.environ.get("COLLECTION_NAME")): + vector_query_engine = VectorStoreIndex.from_documents( + documents=orthodontics_docs, + storage_context=storage_context + ).as_query_engine() + else: + vector_query_engine = VectorStoreIndex.from_vector_store( + vector_store=vector_store).as_query_engine() + + # Setup query engine tools + query_engine_tools = [ + QueryEngineTool( + query_engine=vector_query_engine, + metadata=ToolMetadata( + name="orthodontics_tool", + description="guide on the orthodontics", + ), + ), + ] + + # Initialize SubQuestionQueryEngine + self.query_engine = SubQuestionQueryEngine.from_defaults( + query_engine_tools=query_engine_tools + ) + + def query(self, question: str): + """Execute a query and return the response""" + if self.query_engine is None: + raise ValueError("Query engine not initialized. Call load_and_index_documents first.") + + return self.query_engine.query(question) diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/.env b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/.env new file mode 100644 index 0000000..7893634 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/.env @@ -0,0 +1,34 @@ +DB_URL='http://localhost:6333' +DB_API_KEY='th3s3cr3tk3y' +COLLECTION_NAME='SUB_QUESTION_COLLECTION' + +OPENAI_API_KEY='sk-proj-' +OPENAI_EMBED_MODEL='gpt-4o' + +# use this incase you are prefering to experiment with local models. +OLLAMA_BASE_URL='http://localhost:11434' +OLLAMA_LLM_MODEL='llama3.1' +OLLAMA_EMBED_MODEL='nomic-embed-text:latest' + +CHUNK_SIZE=128 +CHUNK_OVERLAP=20 + +# Langfuse Observability Details +LANGFUSE_PUBLIC_KEY='pk-lf-' +LANGFUSE_SECRET_KEY='sk-lf-' +LANGFUSE_HOST='http://localhost:3000' + +# logger can be controlled usiing env +CRITICAL = 50 +FATAL = 50 +ERROR = 40 +WARNING = 30 +WARN = 30 +INFO = 20 +DEBUG = 10 +NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 + +IS_EVALUATION_NEEDED=true diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/__init__.py b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/api_server.py b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/api_server.py new file mode 100644 index 0000000..ca72e19 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/api_server.py @@ -0,0 +1,29 @@ +from dotenv import load_dotenv, find_dotenv +from sub_question_query_engine import SubQuestionQueryEngineAgent +import litserve as lit +import os + + +class SubQuestionQueryAPI(lit.LitAPI): + def __init__(self): + load_dotenv(find_dotenv()) + self.engine = None + + def setup(self, device): + self.engine = SubQuestionQueryEngineAgent() + + def decode_request(self, request, **kwargs): + return request['query'] + + def predict(self, x, **kwargs): + return self.engine.query(x) + + def encode_response(self, output, **kwargs): + return {'Agent': output} + + +if __name__ == "__main__": + lit_api = SubQuestionQueryAPI() + server = lit.LitServer(lit_api=lit_api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/data/orthodontics.pdf b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/data/orthodontics.pdf new file mode 100644 index 0000000..11dda80 Binary files /dev/null and b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/data/orthodontics.pdf differ diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/docker-compose-langfuse.yml b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/docker-compose-langfuse.yml new file mode 100644 index 0000000..2363657 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/docker-compose-langfuse.yml @@ -0,0 +1,36 @@ +version: '3.8' + +services: + postgres: + image: postgres:latest + container_name: postgres + environment: + POSTGRES_PASSWORD: postgres + POSTGRES_DB: langfuse + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - langfuse-network + + langfuse: + image: langfuse/langfuse:latest + container_name: langfuse + environment: + DATABASE_URL: postgresql://postgres:postgres@postgres:5432/langfuse + NEXTAUTH_URL: http://localhost:3000 + NEXTAUTH_SECRET: mysecret + SALT: mysalt + ENCRYPTION_KEY: 98637d42c277ef10b8a324e25d492daa8eee8f769574124ba25132f71481f183 + ports: + - "3000:3000" + depends_on: + - postgres + networks: + - langfuse-network + +volumes: + postgres_data: + +networks: + langfuse-network: + driver: bridge diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/main.py b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/main.py new file mode 100644 index 0000000..5b1fd72 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/main.py @@ -0,0 +1,45 @@ +from sub_question_query_engine import SubQuestionQueryEngineAgent + + +def print_welcome_message(): + print("\n=== Orthodontics Query System ===") + print("Type your question and press Enter") + print("Type 'quit' to exit the program") + print("================================\n") + + +if __name__ == "__main__": + # Initialize the engine + engine = SubQuestionQueryEngineAgent() + + # Load and index documents + print("Initializing the system... Please wait...") + engine.load_and_index_documents() + + # Display welcome message + print_welcome_message() + + while True: + try: + # Get user input + user_question = input("\nEnter your question: ").strip() + + # Check for quit command + if user_question.lower() == 'quit': + print("\nThank you for using the system. Goodbye!") + break + + # Skip empty questions + if not user_question: + print("Please enter a valid question.") + continue + + # Execute query and print response + print("\nProcessing your question...\n") + response = engine.query(user_question) + print("\nResponse:", response) + print("\n" + "-" * 50) # Separator line + + except Exception as e: + print(f"\nAn error occurred: {str(e)}") + print("Please try again with a different question.") diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/readme.md new file mode 100644 index 0000000..452a767 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/readme.md @@ -0,0 +1,24 @@ +### Sub Question Query Engine +The Sub-Question Query Engine in LlamaIndex is designed to handle complex queries that require information from multiple data sources. It operates by decomposing a complex query into several sub-questions, each directed to the most relevant data source. After obtaining responses to these sub-questions, it synthesizes them into a comprehensive final answer. + +### How to run? +`pip install -r requirements.txt` +`python main.py` + +### How to expose as API? +`python api_server.py` +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "Explain vertical plane in orthodontics" +} +``` + +#### How to spin observability +- run `docker compose -f docker-compose-langfuse.yml up` +- launch langfuse in browser `http://localhost:3000` +- click on `signup` +- create `organization` & `project` +- once done create your `public` and `private` api keys diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/requirements.txt new file mode 100644 index 0000000..29e467b --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/requirements.txt @@ -0,0 +1,8 @@ +llamaindex +llama-index-llms-ollama +llama-index-llms-openai +llama-index-agent-openai +llama-index-vector-stores-qdrant +qdrant-client +# observability +langfuse==2.52.2 \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/sub_question_query_engine.py b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/sub_question_query_engine.py new file mode 100644 index 0000000..b4ccfa1 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/sub_question_query_engine.py @@ -0,0 +1,98 @@ +from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext +from llama_index.core.tools import QueryEngineTool, ToolMetadata +from llama_index.core.query_engine import SubQuestionQueryEngine +from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler +from llama_index.core import Settings +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.llms.openai import OpenAI +from llama_index.vector_stores.qdrant import QdrantVectorStore +from langfuse.llama_index import LlamaIndexInstrumentor +from dotenv import load_dotenv, find_dotenv +import qdrant_client +import os + +# Load environment variables +load_dotenv(find_dotenv()) + +# instrumenting observability +instrumentor = LlamaIndexInstrumentor() +instrumentor.start() + + +class SubQuestionQueryEngineAgent: + def __init__(self): + # Initialize debug handler and settings + self._setup_settings() + + # Initialize Qdrant clients + self.client = qdrant_client.QdrantClient( + url=os.environ['DB_URL'], + api_key=os.environ['DB_API_KEY'] + ) + self.aclient = qdrant_client.AsyncQdrantClient( + url=os.environ['DB_URL'], + api_key=os.environ['DB_API_KEY'] + ) + + # Initialize query engine + self.query_engine = None + + def _setup_settings(self): + """Configure LlamaIndex settings""" + llama_debug = LlamaDebugHandler(print_trace_on_end=True) + callback_manager = CallbackManager([llama_debug]) + + Settings.callback_manager = callback_manager + Settings.llm = OpenAI( + model=os.environ.get("OPENAI_EMBED_MODEL"), + temperature=0.0 + ) + Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") + Settings.chunk_size = int(os.environ.get("CHUNK_SIZE")) + Settings.chunk_overlap = int(os.environ.get("CHUNK_OVERLAP")) + + def load_and_index_documents(self, input_dir="data"): + """Load documents and create vector store index""" + # Load documents + orthodontics_docs = SimpleDirectoryReader(input_dir=input_dir).load_data( + show_progress=True + ) + + # Setup vector store and index + vector_store = QdrantVectorStore( + client=self.client, + aclient=self.aclient, + collection_name=os.environ.get("COLLECTION_NAME") + ) + storage_context = StorageContext.from_defaults(vector_store=vector_store) + if not self.client.collection_exists(collection_name=os.environ.get("COLLECTION_NAME")): + vector_query_engine = VectorStoreIndex.from_documents( + documents=orthodontics_docs, + storage_context=storage_context + ).as_query_engine() + else: + vector_query_engine = VectorStoreIndex.from_vector_store( + vector_store=vector_store).as_query_engine() + + # Setup query engine tools + query_engine_tools = [ + QueryEngineTool( + query_engine=vector_query_engine, + metadata=ToolMetadata( + name="orthodontics_tool", + description="guide on the orthodontics", + ), + ), + ] + + # Initialize SubQuestionQueryEngine + self.query_engine = SubQuestionQueryEngine.from_defaults( + query_engine_tools=query_engine_tools + ) + + def query(self, question: str): + """Execute a query and return the response""" + if self.query_engine is None: + raise ValueError("Query engine not initialized. Call load_and_index_documents first.") + + return self.query_engine.query(question) diff --git a/setup.py b/setup.py index a2af65b..e0c1aac 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='bootstrap-rag', - version='0.0.15', + version='0.0.16', long_description=long_description, long_description_content_type="text/markdown", packages=find_packages(),