fschwartzer commited on
Commit
2b9a5fa
1 Parent(s): 131fcc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -75
app.py CHANGED
@@ -15,79 +15,82 @@ st.markdown("""
15
  """, unsafe_allow_html=True)
16
 
17
  # File upload interface
18
- uploaded_file = st.file_uploader("Upload a CSV or XLSX file", type=['csv', 'xlsx'])
19
 
20
  if uploaded_file:
21
- # Load the file into a DataFrame
22
- if uploaded_file.name.endswith('.csv'):
23
- df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
24
- elif uploaded_file.name.endswith('.xlsx'):
25
- df = pd.read_excel(uploaded_file)
26
-
27
- # Data preprocessing for Prophet
28
- new_df = df.iloc[2:, 9:-1].fillna(0)
29
- new_df.columns = df.iloc[1, 9:-1]
30
- new_df.columns = new_df.columns.str.replace(r" \(\d+\)", "", regex=True)
31
-
32
- month_dict = {
33
- 'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
34
- 'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
35
- 'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
36
- }
37
-
38
- def convert_column_name(column_name):
39
- if column_name == 'Rótulos de Linha':
40
- return column_name
41
- parts = column_name.split('/')
42
- month = parts[0].strip()
43
- year = parts[1].strip()
44
- year = ''.join(filter(str.isdigit, year))
45
- month_number = month_dict.get(month, '00')
46
- return f"{month_number}/{year}"
47
-
48
- new_df.columns = [convert_column_name(col) for col in new_df.columns]
49
- new_df.columns = pd.to_datetime(new_df.columns, errors='coerce')
50
- new_df.rename(columns={new_df.columns[0]: 'Rotulo'}, inplace=True)
51
- df_clean = new_df.copy()
52
-
53
- # Create an empty DataFrame to store all anomalies
54
- all_anomalies = pd.DataFrame()
55
-
56
- # Process each row in the DataFrame
57
- for index, row in df_clean.iterrows():
58
- data = pd.DataFrame({
59
- 'ds': [col for col in df_clean.columns if isinstance(col, pd.Timestamp)],
60
- 'y': row[[isinstance(col, pd.Timestamp) for col in df_clean.columns]].values
61
- })
62
-
63
- data = data[data['y'] > 0].reset_index(drop=True)
64
- if data.empty or len(data) < 2:
65
- print(f"Skipping group {row['Rotulo']} because there are less than 2 non-zero observations.")
66
- continue
67
-
68
- try:
69
- model = Prophet(interval_width=0.95)
70
- model.fit(data)
71
- except ValueError as e:
72
- print(f"Skipping group {row['Rotulo']} due to error: {e}")
73
- continue
74
-
75
- future = model.make_future_dataframe(periods=12, freq='M')
76
- forecast = model.predict(future)
77
-
78
- num_real = len(data)
79
- num_forecast = len(forecast)
80
- real_values = list(data['y']) + [None] * (num_forecast - num_real)
81
- forecast['real'] = real_values
82
- anomalies = forecast[(forecast['real'] < forecast['yhat_lower']) | (forecast['real'] > forecast['yhat_upper'])]
83
-
84
- anomalies['Group'] = row['Rotulo']
85
- all_anomalies = pd.concat([all_anomalies, anomalies[['ds', 'real', 'Group']]], ignore_index=True)
86
-
87
- # Preparing anomalies DataFrame for TAPEX model
88
- all_anomalies.rename(columns={"ds": "datetime", "real": "monetary value", "Group": "explanation"}, inplace=True)
89
- all_anomalies['monetary value'] = all_anomalies['monetary value'].apply(lambda x: f"{x:.2f}")
90
- all_anomalies = all_anomalies.fillna('').astype(str)
 
 
 
91
 
92
  # Load translation models
93
  pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
@@ -113,8 +116,7 @@ if uploaded_file:
113
  return response_pt
114
 
115
  # Streamlit interface
116
-
117
- st.dataframe(all_anomalies.head())
118
 
119
  # Chat history
120
  if 'history' not in st.session_state:
@@ -126,7 +128,7 @@ if uploaded_file:
126
  st.session_state['history'].append(('👤', user_question))
127
  st.markdown(f"**👤 {user_question}**")
128
 
129
- bot_response = response(user_question, all_anomalies)
130
 
131
  st.session_state['history'].append(('🤖', bot_response))
132
  st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
@@ -140,4 +142,4 @@ if uploaded_file:
140
  elif sender == '🤖':
141
  st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
142
  else:
143
- st.warning("Please upload a CSV or XLSX file to start.")
 
15
  """, unsafe_allow_html=True)
16
 
17
  # File upload interface
18
+ uploaded_file = st.file_uploader("carregue um arquivo CSV ou XLSX", type=['csv', 'xlsx'])
19
 
20
  if uploaded_file:
21
+ if 'all_anomalies' not in st.session_state:
22
+ with st.spinner('Aplicando modelo de série temporal...'):
23
+ # Load the file into a DataFrame
24
+ if uploaded_file.name.endswith('.csv'):
25
+ df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
26
+ elif uploaded_file.name.endswith('.xlsx'):
27
+ df = pd.read_excel(uploaded_file)
28
+
29
+ # Data preprocessing for Prophet
30
+ new_df = df.iloc[2:, 9:-1].fillna(0)
31
+ new_df.columns = df.iloc[1, 9:-1]
32
+ new_df.columns = new_df.columns.str.replace(r" \(\d+\)", "", regex=True)
33
+
34
+ month_dict = {
35
+ 'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
36
+ 'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
37
+ 'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
38
+ }
39
+
40
+ def convert_column_name(column_name):
41
+ if column_name == 'Rótulos de Linha':
42
+ return column_name
43
+ parts = column_name.split('/')
44
+ month = parts[0].strip()
45
+ year = parts[1].strip()
46
+ year = ''.join(filter(str.isdigit, year))
47
+ month_number = month_dict.get(month, '00')
48
+ return f"{month_number}/{year}"
49
+
50
+ new_df.columns = [convert_column_name(col) for col in new_df.columns]
51
+ new_df.columns = pd.to_datetime(new_df.columns, errors='coerce')
52
+ new_df.rename(columns={new_df.columns[0]: 'Rotulo'}, inplace=True)
53
+ df_clean = new_df.copy()
54
+
55
+ # Create an empty DataFrame to store all anomalies
56
+ all_anomalies = pd.DataFrame()
57
+
58
+ # Process each row in the DataFrame
59
+ for index, row in df_clean.iterrows():
60
+ data = pd.DataFrame({
61
+ 'ds': [col for col in df_clean.columns if isinstance(col, pd.Timestamp)],
62
+ 'y': row[[isinstance(col, pd.Timestamp) for col in df_clean.columns]].values
63
+ })
64
+
65
+ data = data[data['y'] > 0].reset_index(drop=True)
66
+ if data.empty or len(data) < 2:
67
+ print(f"Skipping group {row['Rotulo']} because there are less than 2 non-zero observations.")
68
+ continue
69
+
70
+ try:
71
+ model = Prophet(interval_width=0.95)
72
+ model.fit(data)
73
+ except ValueError as e:
74
+ print(f"Skipping group {row['Rotulo']} due to error: {e}")
75
+ continue
76
+
77
+ future = model.make_future_dataframe(periods=12, freq='M')
78
+ forecast = model.predict(future)
79
+
80
+ num_real = len(data)
81
+ num_forecast = len(forecast)
82
+ real_values = list(data['y']) + [None] * (num_forecast - num_real)
83
+ forecast['real'] = real_values
84
+ anomalies = forecast[(forecast['real'] < forecast['yhat_lower']) | (forecast['real'] > forecast['yhat_upper'])]
85
+
86
+ anomalies['Group'] = row['Rotulo']
87
+ all_anomalies = pd.concat([all_anomalies, anomalies[['ds', 'real', 'Group']]], ignore_index=True)
88
+
89
+ # Store the result in session state
90
+ all_anomalies.rename(columns={"ds": "datetime", "real": "monetary value", "Group": "group"}, inplace=True)
91
+ all_anomalies['monetary value'] = all_anomalies['monetary value'].apply(lambda x: f"{x:.2f}")
92
+ all_anomalies = all_anomalies.fillna('').astype(str)
93
+ st.session_state['all_anomalies'] = all_anomalies
94
 
95
  # Load translation models
96
  pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
 
116
  return response_pt
117
 
118
  # Streamlit interface
119
+ st.dataframe(st.session_state['all_anomalies'].head())
 
120
 
121
  # Chat history
122
  if 'history' not in st.session_state:
 
128
  st.session_state['history'].append(('👤', user_question))
129
  st.markdown(f"**👤 {user_question}**")
130
 
131
+ bot_response = response(user_question, st.session_state['all_anomalies'])
132
 
133
  st.session_state['history'].append(('🤖', bot_response))
134
  st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
 
142
  elif sender == '🤖':
143
  st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
144
  else:
145
+ st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")