fschwartzer commited on
Commit
caea1f5
1 Parent(s): bafee93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import torch
4
  from transformers import pipeline
5
  import datetime
 
6
 
7
  # Load the CSV file
8
  df = pd.read_csv("anomalies.csv", quotechar='"')
@@ -13,25 +14,39 @@ df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
13
  # Fill NaN values and convert all columns to strings
14
  df = df.fillna('').astype(str)
15
 
16
- # Subset the DataFrame for the relevant query
17
- def subset_dataframe(df, date, group_keyword):
18
- subset_df = df[(df['ds'] == date) & (df['Group'].str.contains(group_keyword, case=False))]
19
- return subset_df
 
 
 
 
 
 
 
 
 
20
 
21
  # Function to generate a response using the TAPAS model
22
  def response(user_question, df):
23
  a = datetime.datetime.now()
24
 
25
- # Subset the DataFrame for December 2022 and IPVA
26
- subset_df = subset_dataframe(df, "2022-12-01", "IPVA")
 
 
 
 
 
27
 
28
  # Initialize the TAPAS model
29
  tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
30
  tokenizer_kwargs={"clean_up_tokenization_spaces": False})
31
 
32
  # Debugging information
33
- print("Subset DataFrame shape:", subset_df.shape)
34
- print("Subset DataFrame head:\n", subset_df.head())
35
  print("User question:", user_question)
36
 
37
  # Query the TAPAS model
 
3
  import torch
4
  from transformers import pipeline
5
  import datetime
6
+ from rapidfuzz import process, fuzz
7
 
8
  # Load the CSV file
9
  df = pd.read_csv("anomalies.csv", quotechar='"')
 
14
  # Fill NaN values and convert all columns to strings
15
  df = df.fillna('').astype(str)
16
 
17
+ # Function to filter the DataFrame using RapidFuzz for dates
18
+ def filter_dataframe_by_date(df, date_str, threshold=80):
19
+ # Apply fuzzy matching on the 'ds' (date) column
20
+ matches = process.extract(date_str, df['ds'], scorer=fuzz.token_sort_ratio, limit=None)
21
+ filtered_rows = [match[2] for match in matches if match[1] >= threshold]
22
+ return df.iloc[filtered_rows]
23
+
24
+ # Function to filter the DataFrame using RapidFuzz for groups
25
+ def filter_dataframe_by_group(df, group_keyword, threshold=80):
26
+ # Apply fuzzy matching on the 'Group' column
27
+ matches = process.extract(group_keyword, df['Group'], scorer=fuzz.token_sort_ratio, limit=None)
28
+ filtered_rows = [match[2] for match in matches if match[1] >= threshold]
29
+ return df.iloc[filtered_rows]
30
 
31
  # Function to generate a response using the TAPAS model
32
  def response(user_question, df):
33
  a = datetime.datetime.now()
34
 
35
+ # Extract date and group keywords from the user question
36
+ date_str = "December 2022" # Example; you'd extract this from the user question
37
+ group_keyword = "IPVA"
38
+
39
+ # Filter the DataFrame by date and group
40
+ subset_df = filter_dataframe_by_date(df, date_str)
41
+ subset_df = filter_dataframe_by_group(subset_df, group_keyword)
42
 
43
  # Initialize the TAPAS model
44
  tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
45
  tokenizer_kwargs={"clean_up_tokenization_spaces": False})
46
 
47
  # Debugging information
48
+ print("Filtered DataFrame shape:", subset_df.shape)
49
+ print("Filtered DataFrame head:\n", subset_df.head())
50
  print("User question:", user_question)
51
 
52
  # Query the TAPAS model