load_data.py:
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pymongo import MongoClient
import key_param
Connect to your Atlas cluster
client = MongoClient(key_param.MONGO_URI)
Define collection and index name
db_name = “langchain_db”
collection_name = “test”
atlas_collection = client[db_name][collection_name]
vector_search_index = “vector_index”
Load the PDF
loader = PyPDFLoader(
“https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP”)
data = loader.load()
Split PDF into documents
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=200, chunk_overlap=20)
docs = text_splitter.split_documents(data)
Create the vector store
vector_search = MongoDBAtlasVectorSearch.from_documents(
documents=docs,
embedding=OpenAIEmbeddings(
openai_api_key=key_param.openai_api_key, disallowed_special=()),
collection=atlas_collection,
index_name=vector_search_index
)
extract_information.py:
import pprint
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pymongo import MongoClient
import key_param
client = MongoClient(key_param.MONGO_URI)
db_name = “langchain_demo”
collection_name = “collection_of_text_blobs”
atlas_collection = client[db_name][collection_name]
vector_search_index = “vector_index”
Define the text embedding model
embedding = OpenAIEmbeddings(
openai_api_key=key_param.openai_api_key, disallowed_special=())
Initialize the Vector Store
vector_search = MongoDBAtlasVectorSearch(
collection=atlas_collection,
embedding=embedding,
)
query = “MongoDB Atlas security”
results = vector_search.similarity_search(query)
print(“\nvector_search.similarity_search(query):”)
pprint.pprint(results)
Instantiate Atlas Vector Search as a retriever
retriever = vector_search.as_retriever(
search_type=“similarity”,
search_kwargs={“k”: 10, “score_threshold”: 0.75}
)
Define a prompt template
template = “”"
Use the following pieces of context to answer the question at the end.
If you don’t know the answer, just say that you don’t know, don’t try to make up an answer.
{context}
Question: {question}
“”"
custom_rag_prompt = PromptTemplate.from_template(template)
llm = ChatOpenAI(openai_api_key=key_param.openai_api_key, temperature=0)
def format_docs(docs):
return “\n\n”.join(doc.page_content for doc in docs)
Construct a chain to answer questions on your data
rag_chain = (
{“context”: retriever | format_docs, “question”: RunnablePassthrough()}
| custom_rag_prompt
| llm
| StrOutputParser()
)
Prompt the chain
question = “How can I secure my MongoDB Atlas cluster?”
answer = rag_chain.invoke(question)
print("Question: " + question)
print("Answer: " + answer)
Return source documents
documents = retriever.get_relevant_documents(question)
print(“\nSource documents:”)
pprint.pprint(documents)
results:
vector_search.similarity_search(query):
Question: How can I secure my MongoDB Atlas cluster?
Answer: Some ways to secure your MongoDB Atlas cluster include enabling network encryption, setting up IP whitelisting, enabling authentication mechanisms like username/password or LDAP, enabling role-based access control, enabling auditing, and regularly updating your MongoDB version to the latest stable release with security patches. Additionally, you can consider implementing encryption at rest and using VPC peering for added security measures.
If you need more specific guidance on securing your MongoDB Atlas cluster, it would be best to refer to the official MongoDB documentation or consult with a database security expert.
Source documents: