File size: 3,051 Bytes
1197cde
 
 
 
46823f8
0b69770
fa5de89
e794787
46823f8
0dda6bc
 
 
90757b8
0dda6bc
46823f8
1197cde
46823f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1197cde
 
 
 
46823f8
1197cde
 
 
46823f8
1197cde
46823f8
 
1197cde
 
 
 
 
 
 
 
46823f8
 
1197cde
 
 
 
 
 
 
4853459
 
46823f8
 
1197cde
 
 
 
46823f8
1197cde
 
 
46823f8
1197cde
46823f8
 
 
 
 
 
 
 
 
1197cde
 
 
 
 
 
 
 
4853459
90757b8
4853459
 
90757b8
1197cde
 
 
 
 
 
 
46823f8
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
import os
import fitz
import re
import gradio as gr






def preprocess(text):
    text = text.replace('\n', '')
    return text

def pdf_to_text(path, start_page=1, end_page=None):
    doc = fitz.open(path)
    total_pages = doc.page_count

    if end_page is None:
        end_page = total_pages

    text_list = []

    for i in range(start_page - 1, end_page):
        text = doc.load_page(i).get_text("text")
        text_list.append(text)

    doc.close()
    return text_list

def law_split(path,name):
    text_list=pdf_to_text(path)
    text= ''.join(text_list)
    text_split=re.split(r'第.+条\s',text)[1:]

    for index, text in enumerate(text_split):
        text=preprocess(text)
        text_split[index]=f'《中华人民共和国{name}》 第{index+1}条 '+text
    
    return text_split


def folder_read(path):
    text_list=[]
    paths=os.listdir(path)
    for file in paths:
        name=file.split('.')[0]
        suffix=file.split('.')[-1]
        if suffix=='pdf':
            text_list+=law_split(f'{path}/{file}',name)
        
        
    return text_list


text_list=folder_read('laws')
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_texts(texts=text_list, embedding=embeddings)
llm = OpenAI(temperature=0.5,max_tokens=1024)



def generate_answer(question):
    prompt='''
    请根据给出的法律条文回答问题,给出适当的法律建议。回答时要说出你引用的法律条文是第几条,并说出引用的每一条是哪部法律中的。
    引用的法律条文不要超过两条,回答尽量简明扼要
    如果问题与搜索结果无关,就仅回答"该问题与青少年法律无关"即可。
    '''
    most_relevant_texts = vectordb.max_marginal_relevance_search(question, k=5)
    chain = load_qa_chain(llm)
    answer = chain.run(input_documents=most_relevant_texts, question=question+prompt)
    return answer
    
def ask_api(question):
    
    if question.strip() == '':
        return '[ERROR]: 未输入问题'

    return generate_answer(question)



title = '青少年法律科普问答'
description = """ 本bot旨在根据中华人民共和国的法律回答有关青少年的问题,目前囊括的法律有\n
《未成年人保护法》\n
《义务教育法》\n
《预防未成年人犯罪法》\n
《妇女儿童权益保护法》
"""

print('test start')
print(generate_answer('未成年遭受网络欺凌该怎么办?'))
print('test end')


demo = gr.Interface(
    title=title,
    description=description,
    fn=ask_api, 
    inputs=gr.Textbox(label="请输入与青少年法律相关的问题",lines=2), 
    outputs=gr.outputs.Textbox(label="参考回答"), 
    examples=[["未成年遭受网络欺凌该怎么办?"],['年满多少岁的儿童应当接受义务教育?'],['若发现离家出走的未成年人,应如何处理?']])


demo.launch()