naveed-stockmark commited on
Commit
5c0b561
1 Parent(s): da8a25b

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +76 -0
utils.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import jsonlines
2
+ import json
3
+
4
+ def read_jsonlines(path):
5
+ with jsonlines.open(path) as reader:
6
+ lines = [obj for obj in reader]
7
+ return lines
8
+
9
+ def write_jsonlines(file, path):
10
+ with jsonlines.open(path, 'w') as writer:
11
+ writer.write_all(file)
12
+
13
+ def dump_json(file, path):
14
+ """Save json object"""
15
+
16
+ with open(path, 'w', encoding='utf-8') as f:
17
+ json.dump(file, f, indent=4, ensure_ascii=False)
18
+ print("Saved json to path: " + str(path))
19
+
20
+ def load_json(path):
21
+ """load json object"""
22
+ with open(path, 'rb') as f:
23
+ data = json.load(f)
24
+ print("Loaded json from path: " + str(path))
25
+ return data
26
+
27
+ def split_w_delimiter(line, delimiter):
28
+ len_delimiter = len(delimiter)
29
+
30
+ sents = [sent for sent in line.split(delimiter) if sent.strip()]
31
+
32
+ if len(sents) == 0:
33
+ return []
34
+
35
+ if line[-len_delimiter:] == delimiter:
36
+ out = [sent + delimiter for sent in sents]
37
+ else:
38
+ out = [sent + delimiter for sent in sents[:-1]] + [sents[-1]]
39
+
40
+ return out
41
+
42
+ import difflib
43
+
44
+ def generate_diff_html(text_pairs, output_file=''):
45
+ diff_css = '''
46
+ <style>
47
+ ins {
48
+ color: white;
49
+ background-color: #d4fcbc;
50
+ text-decoration: none;
51
+ }
52
+ del {
53
+ background-color: #fbb6c2;
54
+ text-decoration: none;
55
+ }
56
+ </style>
57
+ '''
58
+
59
+ diff_html = '<!DOCTYPE html>\n<html lang="en">\n<head>\n<meta charset="UTF-8">\n<title>Text Diff</title>\n'
60
+ diff_html += diff_css
61
+ diff_html += '\n</head>\n<body>\n'
62
+
63
+ for i, (text, text_processed) in enumerate(text_pairs, 1):
64
+ diff_html += f'<h3>Pair {i}</h3>\n'
65
+ d = difflib.HtmlDiff()
66
+ diff_table = d.make_table(text.splitlines(), text_processed.splitlines())
67
+ diff_html += diff_table
68
+ diff_html += '<br>\n'
69
+
70
+ diff_html += '</body>\n</html>'
71
+
72
+ if output_file != '':
73
+ with open(output_file, 'w') as f:
74
+ f.write(diff_html)
75
+
76
+ return diff_html