Hi, I have/had the same issue and found nothing close to what I was looking for. I understand and respect the reasoning. However, for me it was more important to have my collection tagged with the oldest date for a recording, understanding that some will be incorrect and at some point I may have to correct manually. I can live with a small margin of error, so without further ado, here is a solution that worked for me. A buggy python script that I wrote from another example on the web.
You can run it per file script_name filename or
find ./ -type f -name ‘*.mp3’ -exec script_name ‘{}’ ;
#!/usr/bin/env python3
# Copyright (c) 2018 Kristofer Berggren
# All rights reserved.
#
# idntag is distributed under the MIT license, see LICENSE for details.
import acoustid
import argparse
import base64
import glob
import os
import re
import taglib
import time
import musicbrainzngs
from datetime import datetime
API_KEY = '1vOwZtEn'
SCORE_THRESH = 0.5
_matches = {}
def acoustid_matches(path):
"""Gets metadata for a file from Acoustid and populates the _matches.
"""
print('\npath ', path)
try:
duration, fp = acoustid.fingerprint_file(path)
except acoustid.FingerprintGenerationError as exc:
print(u'fingerprinting of {0} failed: {1}',
path, exc)
return None
try:
res = acoustid.lookup(API_KEY, fp, duration,
meta='recordings releases')
except acoustid.AcoustidError as exc:
print(u'fingerprint matching {0} failed: {1}',
path, exc)
return None
# Ensure the response is usable and parse it.
if res['status'] != 'ok' or not res.get('results'):
print(u'no match found')
return None
result = res['results'][0] # Best match.
if result['score'] < SCORE_THRESH:
print(u'no results above threshold')
return None
# Get recording and releases from the result.
if not result.get('recordings'):
print(u'no recordings found')
return None
recording_ids = []
release_ids = []
for recording in result['recordings']:
recording_ids.append(recording['id'])
if 'releases' in recording:
release_ids += [rel['id'] for rel in recording['releases']]
_matches[path] = recording_ids, release_ids
def rate_limit(min_interval):
try:
sleep_duration = min_interval - (time.time() - rate_limit.last_timestamp)
except AttributeError:
sleep_duration = 0
if sleep_duration > 0:
time.sleep(sleep_duration)
rate_limit.last_timestamp = time.time()
def calc_date(release_date, release_year):
year = None
dt = None
rdt = release_date
if len(rdt) == 4:
dt = datetime.strptime(rdt, '%Y')
elif len(rdt) == 7:
dt = datetime.strptime(rdt, '%Y-%m')
elif len(rdt) == 10:
dt = datetime.strptime(rdt, '%Y-%m-%d')
else:
try:
dt = datetime.strptime(rdt, '%Y-%m-%d')
except:
pass
if dt:
year = dt.year
if year < release_year:
release_year = year
print("year: ", release_year)
return release_year
def calc_older_date_from_acoustid(id, release_year):
release = None
try:
result = musicbrainzngs.get_recording_by_id(id, includes=["releases"])
if result:
recording = result.get('recording')
if recording:
if 'release-list' in recording and len(recording['release-list']) > 0:
release = recording['release-list'][0]
except musicbrainzngs.ResponseError as err:
print("err ", err)
if err.cause.code == 404:
print("disc not found")
else:
print("received bad response from the MB server")
if release and 'date' in release:
release_year = calc_date(release['date'], release_year)
return release_year
def identify_and_update(path):
release_date = None
dt = datetime.strptime('2200', '%Y')
release_year = dt.year
acoustid_matches(path)
musicbrainzngs.set_useragent(
"python-musicbrainz-ngs-example",
"0.1",
"https://github.com/alastair/python-musicbrainz-ngs/",)
acoustIDs = None
if path in _matches and len(_matches[path]) > 0 and len(_matches[path][0]) > 0:
try:
acoustIDs = _matches[path][0]
except:
pass
if len(acoustIDs) == 0:
print('acoustIDs NOT FOUND!!!')
return False
for id in acoustIDs:
release_year = calc_older_date_from_acoustid(id, release_year)
release_date = str(release_year)
rate_limit(1.0/3.0)
try:
results = acoustid.match(base64.b64decode(b'Ym5jenB4cmtoOA=='), path)
except acoustid.NoBackendError:
print("FAIL : backend library not found")
return False
except acoustid.FingerprintGenerationError:
print("FAIL : fingerprint generation error")
return False
except acoustid.WebServiceError as exc:
print("FAIL : web service error (" + exc.message + ")")
return False
for score, rid, title, artist in results:
song = taglib.File(path)
#song.tags["ARTIST"] = [artist]
#song.tags["TITLE"] = [title]
#print("song.tags ", [song.tags])
if release_date and release_date != '2200':
if "DATE" in song.tags:
if song.tags["DATE"][0] != release_date:
print("current YEAR ", song.tags["DATE"])
try:
song.tags["DATE"] = release_date
except:
pass
if "ORIGINALYEAR" in song.tags:
if song.tags["ORIGINALYEAR"][0] != release_date:
print("current ORIGINALYEAR ", song.tags["ORIGINALYEAR"])
try:
song.tags["ORIGINALYEAR"] = release_date
except:
pass
try:
song.save()
except:
pass
if release_date:
print("OK release_year: ", release_date)
else:
print("FAIL : no matches found")
def main():
parser = argparse.ArgumentParser(prog="idntag", description=
"Find oldest release year and update track. "
"This is so we can make play lists such as: "
"60s, 70s, 80s, etc... ")
parser.add_argument("-v", "--version", action='version', version='%(prog)s v1.03')
parser.add_argument('path', nargs='+', help='path of a file or directory')
args = parser.parse_args()
abs_paths = [os.path.join(os.getcwd(), path) for path in args.path]
paths = set()
for path in abs_paths:
if os.path.isfile(path):
paths.add(path)
elif os.path.isdir(path):
abs_paths += glob.glob(path + '/*')
for path in paths:
identify_and_update(path)
if __name__ == "__main__":
main()