from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.llms import OpenAI from langchain.chains.question_answering import load_qa_chain import os import fitz import re import gradio as gr def preprocess(text): text = text.replace('\n', '') return text def pdf_to_text(path, start_page=1, end_page=None): doc = fitz.open(path) total_pages = doc.page_count if end_page is None: end_page = total_pages text_list = [] for i in range(start_page - 1, end_page): text = doc.load_page(i).get_text("text") text_list.append(text) doc.close() return text_list def law_split(path,name): text_list=pdf_to_text(path) text= ''.join(text_list) text_split=re.split(r'第.+条\s',text)[1:] for index, text in enumerate(text_split): text=preprocess(text) text_split[index]=f'《中华人民共和国{name}》 第{index+1}条 '+text return text_split def folder_read(path): text_list=[] paths=os.listdir(path) for file in paths: name=file.split('.')[0] suffix=file.split('.')[-1] if suffix=='pdf': text_list+=law_split(f'{path}/{file}',name) return text_list text_list=folder_read('laws') embeddings = OpenAIEmbeddings() vectordb = Chroma.from_texts(texts=text_list, embedding=embeddings) llm = OpenAI(temperature=0.5,max_tokens=1024) def generate_answer(question): prompt=''' 请根据给出的法律条文回答问题,给出适当的法律建议。回答时要说出你引用的法律条文是第几条,并说出引用的每一条是哪部法律中的。 引用的法律条文不要超过两条,回答尽量简明扼要 如果问题与搜索结果无关,就仅回答"该问题与青少年法律无关"即可。 ''' most_relevant_texts = vectordb.max_marginal_relevance_search(question, k=5) chain = load_qa_chain(llm) answer = chain.run(input_documents=most_relevant_texts, question=question+prompt) return answer def ask_api(question): if question.strip() == '': return '[ERROR]: 未输入问题' return generate_answer(question) title = '青少年法律科普问答' description = """ 本bot旨在根据中华人民共和国的法律回答有关青少年的问题,目前囊括的法律有\n 《未成年人保护法》\n 《义务教育法》\n 《预防未成年人犯罪法》\n 《妇女儿童权益保护法》 """ print('test start') print(generate_answer('未成年遭受网络欺凌该怎么办?')) print('test end') demo = gr.Interface( title=title, description=description, fn=ask_api, inputs=gr.Textbox(label="请输入与青少年法律相关的问题",lines=2), outputs=gr.outputs.Textbox(label="参考回答"), examples=[["未成年遭受网络欺凌该怎么办?"],['年满多少岁的儿童应当接受义务教育?'],['若发现离家出走的未成年人,应如何处理?']]) demo.launch()