abetlen
diff --git a/‎README.md
Copy file name to clipboardExpand all lines: README.md
+40Lines changed: 40 additions & 0 deletions b/‎README.md
Copy file name to clipboardExpand all lines: README.md
+40Lines changed: 40 additions & 0 deletions
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+83Lines changed: 83 additions & 0 deletions b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+83Lines changed: 83 additions & 0 deletions
diff --git a/‎pyproject.toml
Copy file name to clipboardExpand all lines: pyproject.toml
+2Lines changed: 2 additions & 0 deletions b/‎pyproject.toml
Copy file name to clipboardExpand all lines: pyproject.toml
+2Lines changed: 2 additions & 0 deletions
@@ -649,6 +649,46 @@ For instance, if you want to work with larger contexts, you can expand the conte
 llm = Llama(model_path="./models/7B/llama-model.gguf", n_ctx=2048)
 ```
 
+### Cross-encoders
+
+To utilise the cross-encoder, you follow the below code instructions:
+```
+- Define the location of your nltk data directory
+data_path1 = llama_cpp.nlLoader(nltkData="<PATH TO NLTK DATA DIRECTORY>")
+
+- Initialize the sentenc-splitter function
+paragraphs1 = llama_cpp.sentSplit()
+
+- Define the passSearch function to join sentences
+passages1 = llama_cpp.passSearch()
+
+- Call the LlamaX class to define the model. Add the path to your Bert cross-encoder.
+import os
+username = os.getenv("USERNAME")
+modelPath = LlamaX(
+    model_path=f"C:\\Users\\{username}\\Models\\cross-encoder\\ms-marco-TinyBERT-L-2",
+)
+
+- Load LlamaCpp GGUF embeddings model and parse in the searchQuery function with the scored passages to generate embeddings
+username = os.getenv("USERNAME")
+print("")
+try:
+    llm_embed = Llama(
+        model_path=f"C:\\Users\\{username}\\Models\\mxbai\\mxbai-embed-large-v1.Q4_K_M.gguf",
+        embedding=True,
+    )
+# Store each document in a vector embedding database
+    for i, d in enumerate(llama_cpp.searchQuery(
+    question=question,
+    model_path=modelPath)):
+        response = llm_embed.create_embedding(
+            input=d
+        )
+        embedding = response["data"][0]["embedding"]
+        collection.add(ids=[str(i)], embeddings=[embedding], documents=[d])
+        print("Processing embeddings...")
+```
+
 ## OpenAI Compatible Web Server
 
 `llama-cpp-python` offers a web server which aims to act as a drop-in replacement for the OpenAI API.
 
@@ -12,6 +12,11 @@
 import warnings
 import contextlib
 import multiprocessing
+from sentence_transformers import CrossEncoder
+from pathlib import Path
+from nltk import sent_tokenize
+import nltk
+import os
 
 from typing import (
     Any,
@@ -51,6 +56,84 @@
 from ._logger import set_verbose
 from ._utils import suppress_stdout_stderr
 
+# LlamaX class that takes a string as input and returns that string
+class LlamaX:
+    def __init__(self, model_path: str):
+        self.model_path = model_path
+
+    def get_model_path(self) -> str:
+        return self.model_path
+
+# nltk dataloader. requires nltk to installed
+def nlLoader(nltkData):
+    nltk_data_dir = Path(nltkData)
+    nltk.data.path.append(str(nltk_data_dir))
+
+# Sentence-splitter function
+def sentSplit():
+        
+        username = os.getenv('USERNAME')
+        path = f'C:\\Users\\{username}\\Documents\\Data'
+        if os.path.exists(path) == False:
+            print('Creating new directory called `Data` in your Documents directory. Please ensure that you add your RAG ingested data to the directory as a .txt file. Thank you!')
+            os.mkdir(f'C:\\Users\\{username}\\Documents\\Data')
+
+        context_dir = path #"context_dir"
+        for filename in os.listdir(context_dir):
+            file_path = os.path.join(context_dir, filename)
+            if filename.endswith('.txt'):
+                with open(file_path, encoding='utf-8') as file:
+                    document = file.read()
+
+        ## We split this article into paragraphs and then every paragraph into sentences
+        paragraphs = []
+        try:
+            for paragraph in document.replace("\r\n", "\n").split("\n\n"):
+                if len(paragraph.strip()) > 0:
+                    paragraphs.append(sent_tokenize(paragraph.strip()))
+                    return paragraphs
+        except Exception as e:
+            print('''
+        IMPORTANT NOTICE:
+        Please add your text dataset to the Data directory. Before continuing. Thank you!''')
+            print(e)
+
+# Paragraph search function. Window-size may be adjusted
+def passSearch():
+    window_size = 3
+    passages = []
+    for paragraph in sentSplit():
+        for start_idx in range(0, len(paragraph), window_size):
+            end_idx = min(start_idx + window_size, len(paragraph))
+            passages.append(" ".join(paragraph[start_idx:end_idx]))
+            return passages
+
+# Search in a loop for individual queries and predict the scores for the [query, passage] pairs
+def searchQuery(question, model_path: str):
+    query = []
+    query.append(question)
+    docs = []
+
+    for que in query:
+        try:
+            model = model_path
+            # Concatenate the query and all passages and predict the scores for the pairs [query, passage]
+            model_inputs = [[que, passage] for passage in passSearch()]
+            scores = model.predict(model_inputs)
+
+            # Sort the scores in decreasing order
+            results = [{"input": inp, "score": score} for inp, score in zip(model_inputs, scores)]
+            results = sorted(results, key=lambda x: x["score"], reverse=True)
+            for hit in results[0:1]:
+                print("")
+                docs.append(hit["input"][1])
+                print("Performing in-document search...")
+                print("")
+                return docs
+        except Exception as e:
+            print("Oh no!, It seems that you have not added you text dataset to the Data directory.")
+            print(e)
+
 
 class Llama:
     """High-level Python wrapper for a llama.cpp model."""
 
@@ -16,6 +16,8 @@ dependencies = [
     "numpy>=1.20.0",
     "diskcache>=5.6.1",
     "jinja2>=2.11.3",
+    "sentence-transformers==2.7.0",
+    "nltk==3.8.1",
 ]
 requires-python = ">=3.8"
 classifiers = [