tracklist-artist-to-vec / parse_tracklists.py
Thor Kell
add python code
628e563
raw
history blame contribute delete
No virus
1.85 kB
import csv
import re
def load_lines(filename):
lines = []
with open(filename) as f:
for line in f:
lines.append(line.strip())
return lines
def remove_titles_and_bad_tracks(lines):
is_track = re.compile(r"^\d.*")
better_lines = []
for line in lines:
if is_track.match(line) and "???" not in line:
better_lines.append(line)
return better_lines
def group_by_set(lines):
is_set_title = re.compile(r".*:$")
is_track = re.compile(r"^\d.*:")
grouped_lines = []
current_set = []
for line in lines:
if not line.strip():
continue
if is_set_title.match(line) and len(current_set) > 0:
grouped_lines.append(current_set)
current_set = []
elif is_track.match(line) and "???" not in line:
current_set.append(line)
return grouped_lines
def get_grouped_artists(grouped_lines):
artist_from_track = re.compile(r"\d+\: (.+?) - .+?")
artist_names = []
for dj_set_lines in grouped_lines:
dj_set_artists = []
for line in dj_set_lines:
if artist_match := artist_from_track.match(line):
artist_name = artist_match.group(1).strip().lower()
dj_set_artists.append(artist_name)
artist_names.append(dj_set_artists)
return artist_names
def write_to_csv(filename):
with open(output_filename, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
for artists in artist_names:
writer.writerow(artists)
if __name__ == "__main__":
filename = "data/radio-original.txt"
output_filename = "data/artist-names-per-row.csv"
lines = load_lines(filename)
grouped_lines = group_by_set(lines)
artist_names = get_grouped_artists(grouped_lines)
write_to_csv(output_filename)