PlexKodiConnect/resources/lib/plex_api/fanart_lookup.py

185 lines
7.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, unicode_literals
from builtins import str
from builtins import range
from logging import getLogger
from re import sub
from string import punctuation
from ..downloadutils import DownloadUtils as DU
from .. import utils, variables as v
LOG = getLogger('PLEX.api.fanartlookup')
API_KEY = utils.settings('themoviedbAPIKey')
# How far apart can a video's airing date be (in years)
YEARS_APART = 1
# levenshtein_distance_ratio() returns a value between 0 (no match) and 1 (full
# match). What's the threshold?
LEVENSHTEIN_RATIO_THRESHOLD = 0.95
# Which character should we ignore when matching video titles?
EXCLUDE_CHARS = set(punctuation)
def external_item_id(title, year, plex_type, collection):
LOG.debug('Start identifying %s (%s, %s)', title, year, plex_type)
year = int(year) if year else None
media_type = 'tv' if plex_type == v.PLEX_TYPE_SHOW else plex_type
# if the title has the year in remove it as tmdb cannot deal with it...
# replace e.g. 'The Americans (2015)' with 'The Americans'
title = sub(r'\s*\(\d{4}\)$', '', title, count=1)
url = 'https://api.themoviedb.org/3/search/%s' % media_type
parameters = {
'api_key': API_KEY,
'language': v.KODILANGUAGE,
'query': title.encode('utf-8')
}
data = DU().downloadUrl(url,
authenticate=False,
parameters=parameters,
timeout=7)
try:
data = data['results']
except (AttributeError, KeyError, TypeError):
LOG.debug('No match found on themoviedb for %s (%s, %s)',
title, year, media_type)
return
LOG.debug('themoviedb returned results: %s', data)
# Some entries don't contain a title or id - get rid of them
data = [x for x in data if 'title' in x and 'id' in x]
# Get rid of all results that do NOT have a matching release year
if year:
data = [x for x in data if __year_almost_matches(year, x)]
if not data:
LOG.debug('Empty results returned by themoviedb for %s (%s, %s)',
title, year, media_type)
return
# Calculate how similar the titles are
title = sanitize_string(title)
for entry in data:
entry['match_score'] = levenshtein_distance_ratio(
sanitize_string(entry['title']), title)
# (one of the possibly many) best match using levenshtein distance ratio
entry = max(data, key=lambda x: x['match_score'])
if entry['match_score'] < LEVENSHTEIN_RATIO_THRESHOLD:
LOG.debug('Best themoviedb match not good enough: %s', entry)
return
# Check if we got several matches. If so, take the most popular one
best_matches = [x for x in data if
x['match_score'] == entry['match_score']
and 'popularity' in x]
entry = max(best_matches, key=lambda x: x['popularity'])
LOG.debug('Found themoviedb match: %s', entry)
# lookup external tmdb_id and perform artwork lookup on fanart.tv
tmdb_id = entry.get('id')
parameters = {'api_key': API_KEY}
if media_type == 'movie':
url = 'https://api.themoviedb.org/3/movie/%s' % tmdb_id
parameters['append_to_response'] = 'videos'
elif media_type == 'tv':
url = 'https://api.themoviedb.org/3/tv/%s' % tmdb_id
parameters['append_to_response'] = 'external_ids,videos'
media_id, poster, background = None, None, None
for language in (v.KODILANGUAGE, 'en'):
parameters['language'] = language
data = DU().downloadUrl(url,
authenticate=False,
parameters=parameters,
timeout=7)
try:
data.get('test')
except AttributeError:
LOG.warning('Could not download %s with parameters %s',
url, parameters)
continue
if collection is False:
if data.get('imdb_id'):
media_id = str(data.get('imdb_id'))
break
if (data.get('external_ids') and
data['external_ids'].get('tvdb_id')):
media_id = str(data['external_ids']['tvdb_id'])
break
else:
if not data.get('belongs_to_collection'):
continue
media_id = data.get('belongs_to_collection').get('id')
if not media_id:
continue
media_id = str(media_id)
LOG.debug('Retrieved collections tmdb id %s for %s',
media_id, title)
url = 'https://api.themoviedb.org/3/collection/%s' % media_id
data = DU().downloadUrl(url,
authenticate=False,
parameters=parameters,
timeout=7)
try:
data.get('poster_path')
except AttributeError:
LOG.debug('Could not find TheMovieDB poster paths for %s'
' in the language %s', title, language)
continue
if not poster and data.get('poster_path'):
poster = ('https://image.tmdb.org/t/p/original%s' %
data.get('poster_path'))
if not background and data.get('backdrop_path'):
background = ('https://image.tmdb.org/t/p/original%s' %
data.get('backdrop_path'))
return media_id, poster, background
def __year_almost_matches(year, entry):
try:
entry_year = int(entry['release_date'][0:4])
except (KeyError, ValueError):
return True
return abs(year - entry_year) <= YEARS_APART
def sanitize_string(s):
s = s.lower().strip()
# Get rid of chars in EXCLUDE_CHARS
s = ''.join(character for character in s if character not in EXCLUDE_CHARS)
# Get rid of multiple spaces
s = ' '.join(s.split())
return s
def levenshtein_distance_ratio(s, t):
"""
Calculates levenshtein distance ratio between two strings.
The more similar the strings, the closer the result will be to 1.
The farther disjunct the string, the closer the result to 0
https://www.datacamp.com/community/tutorials/fuzzy-string-python
"""
# Initialize matrix of zeros
rows = len(s) + 1
cols = len(t) + 1
distance = [[0 for x in range(cols)] for y in range(rows)]
# Populate matrix of zeros with the indeces of each character of both strings
for i in range(1, rows):
for k in range(1,cols):
distance[i][0] = i
distance[0][k] = k
# Iterate over the matrix to compute the cost of deletions,insertions and/or substitutions
for col in range(1, cols):
for row in range(1, rows):
if s[row-1] == t[col-1]:
cost = 0 # If the characters are the same in the two strings in a given position [i,j] then the cost is 0
else:
# In order to align the results with those of the Python Levenshtein package, if we choose to calculate the ratio
# the cost of a substitution is 2. If we calculate just distance, then the cost of a substitution is 1.
cost = 2
distance[row][col] = min(distance[row-1][col] + 1, # Cost of deletions
distance[row][col-1] + 1, # Cost of insertions
distance[row-1][col-1] + cost) # Cost of substitutions
return ((len(s)+len(t)) - distance[row][col]) / (len(s)+len(t))