|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.vectorstores import Chroma |
|
from langchain.llms import OpenAI |
|
from langchain.chains.question_answering import load_qa_chain |
|
import os |
|
import fitz |
|
import re |
|
import gradio as gr |
|
import time |
|
|
|
|
|
|
|
|
|
|
|
def preprocess(text): |
|
text = text.replace('\n', '') |
|
return text |
|
|
|
def pdf_to_text(path, start_page=1, end_page=None): |
|
doc = fitz.open(path) |
|
total_pages = doc.page_count |
|
|
|
if end_page is None: |
|
end_page = total_pages |
|
|
|
text_list = [] |
|
|
|
for i in range(start_page - 1, end_page): |
|
text = doc.load_page(i).get_text("text") |
|
text_list.append(text) |
|
|
|
doc.close() |
|
return text_list |
|
|
|
def law_split(path,name): |
|
text_list=pdf_to_text(path) |
|
text= ''.join(text_list) |
|
text_split=re.split(r'第.+条\s',text)[1:] |
|
|
|
for index, text in enumerate(text_split): |
|
text=preprocess(text) |
|
text_split[index]=f'《中华人民共和国{name}》 第{index+1}条 '+text |
|
|
|
return text_split |
|
|
|
|
|
def folder_read(path): |
|
text_list=[] |
|
paths=os.listdir(path) |
|
for file in paths: |
|
name=file.split('.')[0] |
|
suffix=file.split('.')[-1] |
|
if suffix=='pdf': |
|
text_list+=law_split(f'{path}/{file}',name) |
|
|
|
|
|
return text_list |
|
|
|
|
|
text_list=folder_read('laws') |
|
embeddings = OpenAIEmbeddings() |
|
vectordb = Chroma.from_texts(texts=text_list, embedding=embeddings) |
|
llm = OpenAI(temperature=0.5,max_tokens=1024) |
|
|
|
|
|
|
|
|
|
prompt=''' |
|
请根据给出的法律条文回答问题,给出适当的法律建议。回答时要说出你引用的法律条文是第几条,并说出引用的每一条是哪部法律中的。 |
|
引用的法律条文不要超过两条,回答尽量简明扼要 |
|
如果问题与搜索结果无关,就仅回答"该问题与青少年法律无关"即可。 |
|
''' |
|
|
|
|
|
def generate_answer(question): |
|
start_time = time.time() |
|
most_relevant_texts = vectordb.max_marginal_relevance_search(question, k=5) |
|
print(most_relevant_texts) |
|
chain = load_qa_chain(llm) |
|
answer = chain.run(input_documents=most_relevant_texts, question=question+prompt) |
|
|
|
run_time = time.time() - start_time |
|
return (answer, |
|
'\n'.join([t.page_content for t in most_relevant_texts]), |
|
run_time |
|
) |
|
|
|
|
|
|
|
def ask_api(question): |
|
|
|
if question.strip() == '': |
|
return '[ERROR]: 未输入问题' |
|
|
|
return generate_answer(question) |
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# 青少年法律科普问答 |
|
本bot旨在根据中华人民共和国的法律回答有关青少年的问题,目前囊括的法律有 |
|
|
|
《未成年人保护法》 |
|
|
|
《义务教育法》 |
|
|
|
《预防未成年人犯罪法》 |
|
|
|
《妇女儿童权益保护法》 |
|
""") |
|
|
|
with gr.Column(): |
|
text_input = gr.Textbox(label='请输入与青少年法律相关的问题') |
|
text_button = gr.Button("提交") |
|
gr.Examples( |
|
[["未成年遭受网络欺凌该怎么办?"],['年满多少岁的儿童应当接受义务教育?'],['遇到离家出走的未成年应该如何处理?']], |
|
[text_input], |
|
label='示例问题' |
|
|
|
) |
|
text_output = [gr.Textbox(label='参考回答'), |
|
gr.Textbox(label='相关法律原文'), |
|
gr.Number(label="运行时长(s)")] |
|
|
|
|
|
text_button.click(generate_answer, inputs=text_input, outputs=text_output) |
|
|
|
demo.launch() |
|
|
|
|
|
|
|
|
|
|