from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.llms import OpenAI from langchain.chains.question_answering import load_qa_chain import os import fitz import re import gradio as gr import time def preprocess(text): text = text.replace('\n', '') return text def pdf_to_text(path, start_page=1, end_page=None): doc = fitz.open(path) total_pages = doc.page_count if end_page is None: end_page = total_pages text_list = [] for i in range(start_page - 1, end_page): text = doc.load_page(i).get_text("text") text_list.append(text) doc.close() return text_list def law_split(path,name): text_list=pdf_to_text(path) text= ''.join(text_list) text_split=re.split(r'第.+条\s',text)[1:] for index, text in enumerate(text_split): text=preprocess(text) text_split[index]=f'《中华人民共和国{name}》 第{index+1}条 '+text return text_split def folder_read(path): text_list=[] paths=os.listdir(path) for file in paths: name=file.split('.')[0] suffix=file.split('.')[-1] if suffix=='pdf': text_list+=law_split(f'{path}/{file}',name) return text_list text_list=folder_read('laws') embeddings = OpenAIEmbeddings() vectordb = Chroma.from_texts(texts=text_list, embedding=embeddings) llm = OpenAI(temperature=0.5,max_tokens=1024) prompt=''' 请根据给出的法律条文回答问题,给出适当的法律建议。回答时要说出你引用的法律条文是第几条,并说出引用的每一条是哪部法律中的。 引用的法律条文不要超过两条,回答尽量简明扼要 如果问题与搜索结果无关,就仅回答"该问题与青少年法律无关"即可。 ''' def generate_answer(question): start_time = time.time() most_relevant_texts = vectordb.max_marginal_relevance_search(question, k=5) print(most_relevant_texts) chain = load_qa_chain(llm) answer = chain.run(input_documents=most_relevant_texts, question=question+prompt) run_time = time.time() - start_time return (answer, '\n'.join([t.page_content for t in most_relevant_texts]), run_time ) def ask_api(question): if question.strip() == '': return '[ERROR]: 未输入问题' return generate_answer(question) with gr.Blocks() as demo: gr.Markdown( """ # 青少年法律科普问答 本bot旨在根据中华人民共和国的法律回答有关青少年的问题,目前囊括的法律有 《未成年人保护法》 《义务教育法》 《预防未成年人犯罪法》 《妇女儿童权益保护法》 """) with gr.Column(): text_input = gr.Textbox(label='请输入与青少年法律相关的问题') text_button = gr.Button("提交") gr.Examples( [["未成年遭受网络欺凌该怎么办?"],['年满多少岁的儿童应当接受义务教育?'],['遇到离家出走的未成年应该如何处理?']], [text_input], label='示例问题' ) text_output = [gr.Textbox(label='参考回答'), gr.Textbox(label='相关法律原文'), gr.Number(label="运行时长(s)")] text_button.click(generate_answer, inputs=text_input, outputs=text_output) demo.launch()