File size: 7,979 Bytes
e2af017
 
 
96bff79
e2af017
 
 
e727bfc
 
 
 
 
 
 
 
 
63c894a
e727bfc
535c2d9
63c894a
535c2d9
 
a16cb6b
 
 
31810c1
f2de8aa
63c894a
a16cb6b
535c2d9
f2de8aa
96bff79
e727bfc
 
 
 
 
96bff79
e2af017
02b222c
e2af017
 
2b9a5fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa81cfc
2b9a5fa
49f17d6
2b9a5fa
 
e2af017
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b9a5fa
e2af017
 
 
 
 
 
 
 
 
 
 
2b9a5fa
e2af017
 
 
 
 
 
 
 
 
 
 
 
 
02b222c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import streamlit as st
import pandas as pd
from transformers import BartForConditionalGeneration, TapexTokenizer, T5ForConditionalGeneration, T5Tokenizer
from prophet import Prophet
import datetime
import sentencepiece as spm

# Caminho para o arquivo CSS, ajuste conforme a estrutura do seu projeto
css_file = "style.css"

# Abrindo e lendo o arquivo CSS
with open(css_file, "r") as css:
    css_style = css.read()

# Markdown combinado com a importação da fonte e o HTML
html_content = f"""
<style>
{css_style}
@import url('https://fonts.googleapis.com/css2?family=Kanit:wght@700&display=swap');
</style>
<div style='display: flex; flex-direction: column; align-items: flex-start;'>
    <div style='display: flex; align-items: center;'>
        <div style='width: 20px; height: 40px; background-color: green; margin-right: 1px;'></div>
        <div style='width: 20px; height: 40px; background-color: red; margin-right: 1px;'></div>
        <div style='width: 20px; height: 40px; background-color: yellow; margin-right: 20px;'></div>
        <span style='font-size: 50px; font-weight: normal; font-family: "Kanit", sans-serif;'><strong>PROTAX</strong></span>
    </div>
    <div style='text-align: left; width: 100%;'>
        <span style='font-size: 20px; font-weight: normal; color: #333; font-family: "Kanit", sans-serif'>
        <strong>PRO</strong>phet & <strong>TA</strong>pex E<strong>X</strong>plorer</span>
    </div>
</div>
"""

# Aplicar o markdown combinado no Streamlit
st.markdown(html_content, unsafe_allow_html=True)


# File upload interface
uploaded_file = st.file_uploader("Carregue um arquivo CSV ou XLSX", type=['csv', 'xlsx'])

if uploaded_file:
    if 'all_anomalies' not in st.session_state:
        with st.spinner('Aplicando modelo de série temporal...'):
            # Load the file into a DataFrame
            if uploaded_file.name.endswith('.csv'):
                df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
            elif uploaded_file.name.endswith('.xlsx'):
                df = pd.read_excel(uploaded_file)

            # Data preprocessing for Prophet
            new_df = df.iloc[2:, 9:-1].fillna(0)
            new_df.columns = df.iloc[1, 9:-1]
            new_df.columns = new_df.columns.str.replace(r" \(\d+\)", "", regex=True)

            month_dict = {
                'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
                'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
                'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
            }

            def convert_column_name(column_name):
                if column_name == 'Rótulos de Linha':
                    return column_name
                parts = column_name.split('/')
                month = parts[0].strip()
                year = parts[1].strip()
                year = ''.join(filter(str.isdigit, year))
                month_number = month_dict.get(month, '00')
                return f"{month_number}/{year}"

            new_df.columns = [convert_column_name(col) for col in new_df.columns]
            new_df.columns = pd.to_datetime(new_df.columns, errors='coerce')
            new_df.rename(columns={new_df.columns[0]: 'Rotulo'}, inplace=True)
            df_clean = new_df.copy()

            # Create an empty DataFrame to store all anomalies
            all_anomalies = pd.DataFrame()

            # Process each row in the DataFrame
            for index, row in df_clean.iterrows():
                data = pd.DataFrame({
                    'ds': [col for col in df_clean.columns if isinstance(col, pd.Timestamp)],
                    'y': row[[isinstance(col, pd.Timestamp) for col in df_clean.columns]].values
                })

                data = data[data['y'] > 0].reset_index(drop=True)
                if data.empty or len(data) < 2:
                    print(f"Skipping group {row['Rotulo']} because there are less than 2 non-zero observations.")
                    continue

                try:
                    model = Prophet(interval_width=0.95)
                    model.fit(data)
                except ValueError as e:
                    print(f"Skipping group {row['Rotulo']} due to error: {e}")
                    continue

                future = model.make_future_dataframe(periods=12, freq='M')
                forecast = model.predict(future)

                num_real = len(data)
                num_forecast = len(forecast)
                real_values = list(data['y']) + [None] * (num_forecast - num_real)
                forecast['real'] = real_values
                anomalies = forecast[(forecast['real'] < forecast['yhat_lower']) | (forecast['real'] > forecast['yhat_upper'])]

                anomalies['Group'] = row['Rotulo']
                all_anomalies = pd.concat([all_anomalies, anomalies[['ds', 'real', 'Group']]], ignore_index=True)

            # Store the result in session state
            all_anomalies.rename(columns={"ds": "datetime", "real": "monetary value", "Group": "group"}, inplace=True)
            all_anomalies = all_anomalies[all_anomalies['monetary value'].astype('float') >= 10,000,000.00]
            all_anomalies['monetary value'] = all_anomalies['monetary value'].apply(lambda x: f"{x:.2f}")
            all_anomalies.sort_values(by=['monetary value'], ascending=False, inplace=True)
            all_anomalies = all_anomalies.fillna('').astype(str)
            st.session_state['all_anomalies'] = all_anomalies

    # Load translation models
    pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
    en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
    tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")

    # Load TAPEX model
    tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
    tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")

    def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
        input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
        outputs = model.generate(input_ids)
        translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return translated_text

    def response(user_question, table_data):
        question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
        encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
        outputs = tapex_model.generate(**encoding)
        response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
        return response_pt

    # Streamlit interface
    st.dataframe(st.session_state['all_anomalies'].head())

    # Chat history
    if 'history' not in st.session_state:
        st.session_state['history'] = []

    user_question = st.text_input("Escreva sua questão aqui:", "")

    if user_question:
        st.session_state['history'].append(('👤', user_question))
        st.markdown(f"**👤 {user_question}**")
        
        bot_response = response(user_question, st.session_state['all_anomalies'])
        
        st.session_state['history'].append(('🤖', bot_response))
        st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)

    if st.button("Limpar"):
        st.session_state['history'] = []

    for sender, message in st.session_state['history']:
        if sender == '👤':
            st.markdown(f"**👤 {message}**")
        elif sender == '🤖':
            st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
else:
    st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")