Elbachaik commited on
Commit
7e7a14c
1 Parent(s): 72ff092

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -0
  2. main.py +37 -0
  3. rag_retriever.py +67 -0
  4. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.1O
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+
10
+ COPY ./rag_retriver.py /code/rag_retriver.py
11
+
12
+ COPY ./main.py /code/main.py
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Optional
4
+
5
+ from rag_retriever import initialize_llm, initialize_pinecone, create_query_engine, get_response
6
+
7
+ app = FastAPI()
8
+
9
+ # Load settings from.env file
10
+
11
+
12
+ # Initialize LLM
13
+ initialize_llm()
14
+
15
+
16
+
17
+ # Initialize Pinecone index
18
+ index = initialize_pinecone()
19
+
20
+ # Create query engine
21
+ query_engine = create_query_engine(index)
22
+
23
+ class QueryRequest(BaseModel):
24
+ query: str
25
+
26
+ @app.post("/query")
27
+ async def query(query_request: QueryRequest):
28
+ try:
29
+ response = get_response(query_engine, query_request.query)
30
+ return {"response": response}
31
+ except Exception as e:
32
+ raise HTTPException(status_code=500, detail=str(e))
33
+
34
+ @app.get("/healthcheck")
35
+ async def healthcheck():
36
+ return {"status": "ok"}
37
+
rag_retriever.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
6
+ from llama_index.core.settings import Settings
7
+ from llama_index.core import SimpleDirectoryReader
8
+ from llama_index.core.node_parser import SentenceWindowNodeParser
9
+ from pinecone import Pinecone
10
+ from llama_index.core import VectorStoreIndex, StorageContext
11
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
12
+ from llama_index.core.postprocessor import MetadataReplacementPostProcessor
13
+ from llama_index.core.postprocessor import SentenceTransformerRerank
14
+
15
+
16
+ def initialize_llm():
17
+ load_dotenv()
18
+ HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
19
+
20
+ Settings.llm = HuggingFaceInferenceAPI(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1", model_kwargs={"temperature": 0.1}, token=HF_TOKEN)
21
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
22
+
23
+
24
+
25
+
26
+ def initialize_pinecone():
27
+ load_dotenv()
28
+ api_key = os.environ["PINECONE_API_KEY"]
29
+ index_name = "chatbot"
30
+ pc = Pinecone(api_key=api_key)
31
+ pinecone_index = pc.Index(index_name)
32
+ vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
33
+ index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
34
+ return index
35
+
36
+ def create_query_engine(index):
37
+ postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
38
+ rerank = SentenceTransformerRerank(top_n=2, model="BAAI/bge-reranker-base")
39
+ query_engine = index.as_query_engine(similarity_top_k = 6,vector_store_query_mode="hybrid",alpha=0.5,node_postprocessors = [postproc, rerank],
40
+ )
41
+ return query_engine
42
+
43
+ def generate_prompt(context, query):
44
+ prompt_template = """
45
+ You are a highly informed representative of KYC SUD CONSULTING.
46
+ Your role is to assist users by answering questions about our company with accurate and helpful information.
47
+ Use the provided context to guide your responses effectively.
48
+ If a question falls outside your knowledge or the context provided, simply state that you don't have the information.
49
+
50
+ Given the following context:
51
+ {context_str}
52
+
53
+ Answer the following question in different words while keeping the meaning intact:
54
+ {query_str}
55
+
56
+ Response:
57
+ """
58
+ return prompt_template.format(context_str=context, query_str=query)
59
+
60
+
61
+
62
+ # Function to process input sentence and return response
63
+ def get_response(query_engine, sentence):
64
+ retrieved_context = query_engine.query(sentence)
65
+ prompt = generate_prompt(retrieved_context, sentence)
66
+ response = Settings.llm.complete(prompt) # Use the correct method to generate response
67
+ return response.text
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pinecone-client
2
+ python-dotenv
3
+ llama-index
4
+ llama_index.embeddings.huggingface
5
+ llama-index.llms.huggingface
6
+ llama-index.vector-stores.pinecone
7
+ fastapi
8
+ pydantic
9
+ uvicorn