edithram23 commited on
Commit
3d15ff1
1 Parent(s): adbaf3e

commented_small model

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -15,24 +15,24 @@ model_dir_large = 'edithram23/Redaction_Personal_info_v1'
15
  tokenizer_large = AutoTokenizer.from_pretrained(model_dir_large)
16
  model_large = AutoModelForSeq2SeqLM.from_pretrained(model_dir_large)
17
 
18
- model_dir_small = 'edithram23/Redaction'
19
- tokenizer_small = AutoTokenizer.from_pretrained(model_dir_small)
20
- model_small = AutoModelForSeq2SeqLM.from_pretrained(model_dir_small)
21
-
22
- def small(text, model=model_small, tokenizer=tokenizer_small):
23
- inputs = ["Mask Generation: " + text.lower() + '.']
24
- inputs = tokenizer(inputs, max_length=256, truncation=True, return_tensors="pt")
25
- output = model.generate(**inputs, num_beams=8, do_sample=True, max_length=len(text))
26
- decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
27
- predicted_title = decoded_output.strip()
28
- pattern = r'\[.*?\]'
29
- redacted_text = re.sub(pattern, '[redacted]', predicted_title)
30
- return redacted_text
31
 
32
  def mask_generation(text, model=model_large, tokenizer=tokenizer_large):
33
- if len(text) < 200:
34
  text = text + '.'
35
- return small(text)
36
  inputs = ["Mask Generation: " + text.lower() + '.']
37
  inputs = tokenizer(inputs, max_length=512, truncation=True, return_tensors="pt")
38
  output = model.generate(**inputs, num_beams=8, do_sample=True, max_length=len(text))
 
15
  tokenizer_large = AutoTokenizer.from_pretrained(model_dir_large)
16
  model_large = AutoModelForSeq2SeqLM.from_pretrained(model_dir_large)
17
 
18
+ # model_dir_small = 'edithram23/Redaction'
19
+ # tokenizer_small = AutoTokenizer.from_pretrained(model_dir_small)
20
+ # model_small = AutoModelForSeq2SeqLM.from_pretrained(model_dir_small)
21
+
22
+ # def small(text, model=model_small, tokenizer=tokenizer_small):
23
+ # inputs = ["Mask Generation: " + text.lower() + '.']
24
+ # inputs = tokenizer(inputs, max_length=256, truncation=True, return_tensors="pt")
25
+ # output = model.generate(**inputs, num_beams=8, do_sample=True, max_length=len(text))
26
+ # decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
27
+ # predicted_title = decoded_output.strip()
28
+ # pattern = r'\[.*?\]'
29
+ # redacted_text = re.sub(pattern, '[redacted]', predicted_title)
30
+ # return redacted_text
31
 
32
  def mask_generation(text, model=model_large, tokenizer=tokenizer_large):
33
+ if len(text) < 90:
34
  text = text + '.'
35
+ # return small(text)
36
  inputs = ["Mask Generation: " + text.lower() + '.']
37
  inputs = tokenizer(inputs, max_length=512, truncation=True, return_tensors="pt")
38
  output = model.generate(**inputs, num_beams=8, do_sample=True, max_length=len(text))