fschwartzer commited on
Commit
acf8351
1 Parent(s): 6c96c7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import torch
4
  from transformers import pipeline
5
  from transformers import TapasTokenizer, TapasForQuestionAnswering
 
6
 
7
  df = pd.read_excel('discrepantes.xlsx', index_col='Unnamed: 0')
8
  df.fillna(0, inplace=True)
@@ -10,17 +11,27 @@ table_data = df.astype(str)
10
  print(table_data.head())
11
 
12
  def response(user_question, table_data):
13
- tokenizer = TapasTokenizer.from_pretrained("google/tapas-large-finetuned-wtq", drop_rows_to_fit=True)
14
- model = TapasForQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
15
 
16
- inputs = tokenizer(table=table_data, queries=user_question, padding="max_length", truncation=True, return_tensors="pt")
17
- outputs = model(**inputs)
 
18
 
19
- predicted_answer_coordinates = outputs.predicted_answer_coordinates.detach().cpu().numpy()
20
- id2aggregation = {0: 'NONE', 1: 'SUM', 2: 'AVERAGE', 3: 'COUNT'}
21
- aggregation_predictions = id2aggregation[outputs.aggregation_predictions.detach().cpu().numpy()[0]]
22
 
23
- return predicted_answer_coordinates, aggregation_predictions
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Streamlit interface
26
  st.markdown("""
 
3
  import torch
4
  from transformers import pipeline
5
  from transformers import TapasTokenizer, TapasForQuestionAnswering
6
+ import datetime
7
 
8
  df = pd.read_excel('discrepantes.xlsx', index_col='Unnamed: 0')
9
  df.fillna(0, inplace=True)
 
11
  print(table_data.head())
12
 
13
  def response(user_question, table_data):
14
+ a = datetime.datetime.now()
 
15
 
16
+ model_name = "microsoft/tapex-large-finetuned-wtq"
17
+ model = BartForConditionalGeneration.from_pretrained(model_name)
18
+ tokenizer = TapexTokenizer.from_pretrained(model_name)
19
 
20
+ queries = [user_question]
 
 
21
 
22
+ encoding = tokenizer(table=table_data, query=queries, padding=True, return_tensors="pt",truncation=True)
23
+ outputs = model.generate(**encoding)
24
+ ans = tokenizer.batch_decode(outputs, skip_special_tokens=True)
25
+
26
+ query_result = {
27
+ "query": query,
28
+ "answer": ans[0]
29
+ }
30
+
31
+ b = datetime.datetime.now()
32
+ print(b - a)
33
+
34
+ return query_result, table
35
 
36
  # Streamlit interface
37
  st.markdown("""