import csv import re def load_lines(filename): lines = [] with open(filename) as f: for line in f: lines.append(line.strip()) return lines def remove_titles_and_bad_tracks(lines): is_track = re.compile(r"^\d.*") better_lines = [] for line in lines: if is_track.match(line) and "???" not in line: better_lines.append(line) return better_lines def group_by_set(lines): is_set_title = re.compile(r".*:$") is_track = re.compile(r"^\d.*:") grouped_lines = [] current_set = [] for line in lines: if not line.strip(): continue if is_set_title.match(line) and len(current_set) > 0: grouped_lines.append(current_set) current_set = [] elif is_track.match(line) and "???" not in line: current_set.append(line) return grouped_lines def get_grouped_artists(grouped_lines): artist_from_track = re.compile(r"\d+\: (.+?) - .+?") artist_names = [] for dj_set_lines in grouped_lines: dj_set_artists = [] for line in dj_set_lines: if artist_match := artist_from_track.match(line): artist_name = artist_match.group(1).strip().lower() dj_set_artists.append(artist_name) artist_names.append(dj_set_artists) return artist_names def write_to_csv(filename): with open(output_filename, "w", newline="") as csvfile: writer = csv.writer(csvfile) for artists in artist_names: writer.writerow(artists) if __name__ == "__main__": filename = "data/radio-original.txt" output_filename = "data/artist-names-per-row.csv" lines = load_lines(filename) grouped_lines = group_by_set(lines) artist_names = get_grouped_artists(grouped_lines) write_to_csv(output_filename)