File size: 3,051 Bytes
1197cde 46823f8 0b69770 fa5de89 e794787 46823f8 0dda6bc 90757b8 0dda6bc 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 4853459 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 4853459 90757b8 4853459 90757b8 1197cde 46823f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
import os
import fitz
import re
import gradio as gr
def preprocess(text):
text = text.replace('\n', '')
return text
def pdf_to_text(path, start_page=1, end_page=None):
doc = fitz.open(path)
total_pages = doc.page_count
if end_page is None:
end_page = total_pages
text_list = []
for i in range(start_page - 1, end_page):
text = doc.load_page(i).get_text("text")
text_list.append(text)
doc.close()
return text_list
def law_split(path,name):
text_list=pdf_to_text(path)
text= ''.join(text_list)
text_split=re.split(r'第.+条\s',text)[1:]
for index, text in enumerate(text_split):
text=preprocess(text)
text_split[index]=f'《中华人民共和国{name}》 第{index+1}条 '+text
return text_split
def folder_read(path):
text_list=[]
paths=os.listdir(path)
for file in paths:
name=file.split('.')[0]
suffix=file.split('.')[-1]
if suffix=='pdf':
text_list+=law_split(f'{path}/{file}',name)
return text_list
text_list=folder_read('laws')
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_texts(texts=text_list, embedding=embeddings)
llm = OpenAI(temperature=0.5,max_tokens=1024)
def generate_answer(question):
prompt='''
请根据给出的法律条文回答问题,给出适当的法律建议。回答时要说出你引用的法律条文是第几条,并说出引用的每一条是哪部法律中的。
引用的法律条文不要超过两条,回答尽量简明扼要
如果问题与搜索结果无关,就仅回答"该问题与青少年法律无关"即可。
'''
most_relevant_texts = vectordb.max_marginal_relevance_search(question, k=5)
chain = load_qa_chain(llm)
answer = chain.run(input_documents=most_relevant_texts, question=question+prompt)
return answer
def ask_api(question):
if question.strip() == '':
return '[ERROR]: 未输入问题'
return generate_answer(question)
title = '青少年法律科普问答'
description = """ 本bot旨在根据中华人民共和国的法律回答有关青少年的问题,目前囊括的法律有\n
《未成年人保护法》\n
《义务教育法》\n
《预防未成年人犯罪法》\n
《妇女儿童权益保护法》
"""
print('test start')
print(generate_answer('未成年遭受网络欺凌该怎么办?'))
print('test end')
demo = gr.Interface(
title=title,
description=description,
fn=ask_api,
inputs=gr.Textbox(label="请输入与青少年法律相关的问题",lines=2),
outputs=gr.outputs.Textbox(label="参考回答"),
examples=[["未成年遭受网络欺凌该怎么办?"],['年满多少岁的儿童应当接受义务教育?'],['若发现离家出走的未成年人,应如何处理?']])
demo.launch() |