Here's a couple Scanners from MCM, they haven't been working well for though. Although, the other scanners and agents aren't working 100% either. Especially tv shows, it's a joke there.
They are supposed to grab from local data to collect all the stuff it needs
#!/usr/bin/python2.4
# Modified by Cyrille Lefevre to remove french tags
import Filter
import os.path, re, datetime, titlecase, unicodedata
video_exts = [‘3g2’, ‘3gp’, ‘asf’, ‘asx’, ‘avc’, ‘avi’, ‘avs’, ‘bin’, ‘bivx’, ‘bup’, ‘divx’, ‘dv’, ‘dvr-ms’, ‘evo’, ‘fli’, ‘flv’, ‘ifo’, ‘img’,
‘iso’, ‘m2t’, ‘m2ts’, ‘m2v’, ‘m4v’, ‘mkv’, ‘mov’, ‘mp4’, ‘mpeg’, ‘mpg’, ‘mts’, ‘nrg’, ‘nsv’, ‘nuv’, ‘ogm’, ‘ogv’,
‘pva’, ‘qt’, ‘rm’, ‘rmvb’, ‘sdp’, ‘svq3’, ‘strm’, ‘ts’, ‘ty’, ‘vdr’, ‘viv’, ‘vob’, ‘vp3’, ‘wmv’, ‘wpl’, ‘wtv’, ‘xsp’, ‘xvid’, ‘webm’]
ignore_files = [’[-._ ]sample’, ‘sample[-._ ]’, ‘-trailer.’]
ignore_dirs = [‘extras?’, ‘!?samples?’, ‘bonus’, ‘.bonus disc.’]
ignore_suffixes = [’.dvdmedia’]
source_dict = {‘bluray’:[‘bdrc’,‘bdrip’,‘bluray’,‘bd’,‘brrip’,‘hdrip’,‘hddvd’,‘hddvdrip’],‘cam’:[‘cam’],‘dvd’:[‘ddc’,‘dvdrip’,‘dvd’,‘r1’,‘r3’],‘retail’:[‘retail’],
‘dtv’:[‘dsr’,‘dsrip’,‘hdtv’,‘pdtv’,‘ppv’],‘stv’:[‘stv’,‘tvrip’,‘tv’],‘r5’:[‘r5’],‘screener’:[‘bdscr’,‘dvdscr’,‘dvdscreener’,‘scr’,‘screener’],
‘svcd’:[‘svcd’],‘vcd’:[‘vcd’],‘telecine’:[‘tc’,‘telecine’],‘telesync’:[‘ts’,‘telesync’],‘workprint’:[‘wp’,‘workprint’],‘vhs’:[‘vhs’,‘vhsrip’]}
source = []
for d in source_dict:
for s in source_dict[d]:
if source != ‘’:
source.append(s)
audio = [’([^0-9])5.1[ ]*ch(.)’,’([^0-9])5.1([^0-9]?)’,’([^0-9])7.1[ ]*ch(.)’,’([^0-9])7.1([^0-9])’]
subs = [‘multi’,‘multisubs’]
misc = [‘cd1’,‘cd2’,‘1cd’,‘2cd’,‘custom’,‘internal’,‘repack’,‘read.nfo’,‘readnfo’,‘nfofix’,‘proper’,‘rerip’,‘dubbed’,‘subbed’,‘extended’,‘unrated’,‘xxx’,‘nfo’,‘dvxa’,‘lte’]
french = [‘french’,‘truefrench’,‘subfrench’,‘frenchedit’,‘vf’,‘vvf’,‘vo’,‘vostfr’,‘vost’,‘rip’]
format = [‘ac3’,‘dc’,‘divx’,‘fragment’,‘limited’,‘ogg’,‘ogm’,‘ntsc’,‘pal’,‘ps3avchd’,‘r1’,‘r3’,‘r5’,‘720i’,‘720p’,‘1080i’,‘1080p’,‘x264’,‘xvid’,‘vorbis’,‘aac’,‘dts’,‘fs’,‘ws’,‘1920x1080’,‘1280x720’,‘h264’]
edition = [‘dc’,‘se’] # dc = directors cut, se = special edition
yearRx = ‘([([.-])([1-2][0-9]{3})([.-)],+])’
Cleanup folder / filenames
def CleanName(name, noYear=False):
orig = name
Make sure we pre-compose.
name = unicodedata.normalize(‘NFKC’, name.decode(‘utf-8’))
name = name.lower()
grab the year, if there is one. set ourselves up to ignore everything after the year later on.
year = None
if noYear == False:
yearMatch = re.search(yearRx, name)
if yearMatch:
yearStr = yearMatch.group(2)
yearInt = int(yearStr)
if yearInt > 1900 and yearInt < (datetime.date.today().year + 1):
year = int(yearStr)
name = name.replace(yearMatch.group(1) + yearStr + yearMatch.group(3), ’ yearBreak ')
Take out things in brackets. (sub acts weird here, so we have to do it a few times)
done = False
while done == False:
(name, count) = re.subn(r’[[^]]+]’, ‘’, name, re.IGNORECASE)
if count == 0:
done = True
Take out bogus suffixes.
for suffix in ignore_suffixes:
rx = re.compile(suffix + ‘$’, re.IGNORECASE)
name = rx.sub(’’, name)
Take out audio specs, after suffixing with space to simplify rx.
name = name + ’ ’
for s in audio:
rx = re.compile(s, re.IGNORECASE)
name = rx.sub(’ ', name)
Now tokenize.
tokens = re.split(’([^ -_.()+]+)’, name)
Process tokens.
newTokens = []
for t in tokens:
t = t.strip()
if not re.match(’[.-()+]+’, t) and len(t) > 0:
#if t not in (’.’, ‘-’, '’, ‘(’, ‘)’) and len(t) > 0:
newTokens.append(t)
Now build a bitmap of good and bad tokens.
tokenBitmap = []
garbage = subs
garbage.extend(misc)
garbage.extend(french)
garbage.extend(format)
garbage.extend(edition)
garbage.extend(source)
garbage.extend(video_exts)
garbage = set(garbage)
for t in newTokens:
if t.lower() in garbage:
tokenBitmap.append(False)
else:
tokenBitmap.append(True)
Now strip out the garbage, with one heuristic; if we encounter 2+ BADs after encountering
a GOOD, take out the rest (even if they aren’t BAD). Special case for director’s cut.
numGood = 0
numBad = 0
finalTokens = []
for i in range(len(tokenBitmap)):
good = tokenBitmap*
# If we've only got one or two tokens, don't whack any, they might be part of
# the actual name (e.g. "Internal Affairs" "XXX 2")
#
if len(tokenBitmap) <= 2:
good = True
if good and numBad < 1:
if newTokens* == '*yearBreak*':
#if we have a year, we can ignore everything after this.
break
else:
finalTokens.append(newTokens*)
elif not good and newTokens*.lower() == 'dc':
finalTokens.append("(Director's cut)")
if good == True:
numGood += 1
else:
numBad += 1
If we took all the tokens out, use the first one, otherwise we’ll end up with no name at all.
if len(finalTokens) == 0 and len(newTokens) > 0:
finalTokens.append(newTokens[0])
#print “CLEANED [%s] => [%s]” % (orig, u’ '.join(finalTokens))
#print "TOKENS: ", newTokens
#print "BITMAP: ", tokenBitmap
#print "FINAL: ", finalTokens
cleanedName = ’ '.join(finalTokens)
cleanedName = cleanedName.encode(‘utf-8’)
return (titlecase.titlecase(cleanedName), year)
#
# Media Center Master Plex T.V. scanner
#
# Prefer episodes matching before date matching and additional logic to
# correct season numbers (instead of using dates in shows with years in the
# title).
#
# Portions of the code used by this scanner was provided by:
# http://forums.plexapp.com/index.php/topic/53368-custom-plex-scanners/
#
# Which is a patched version of the original Plex series scanner that is
# copyrighted (c) 2010 by the Plex Development Team (all rights reserved).
#
# Parts of this code are by copyrighted by Media Center Master:
# (C) 2013 Media Center Master, Inc.
# All rights reserved.
# http://www.MediaCenterMaster.com/
#
# Additional credits to:
# Guillaume Boudreau
# modified to ignore folders containing .plexignore marker files
# Cyrille Lefevre
# modified to allow episode names without episode numbers
#
# Version 1.00 2013-02-11
# Version 1.01 2013-07-16 updated Scan with optional parameters and VideoFiles.Scan() with 'root' parameter
#
import sys, os, os.path, re
import Media, VideoFiles, Utils, MCMCustomVideoFiles, MCMCustomStack, MCMCustomUtils
from mp4file import mp4file, atomsearch
episode_regexps = [
# S03E03, S03.03, S03_03, S03 03, S03E03 E04, S03E03 S03E04, S03E03 +04, S03E03-E04, S03E03-S03E04, S03E03-+04,
# S03E03 title, S03E03. title, …, show S03E03, …, show S03E03 title, …
‘(?P.?)sS[._ ]eE([- ]?([sS][0-9]+)?Ee+)?(.? +(?P.+)(…+)?$)?’,
# S03-03, S03-03 title, S03-03. title, show S03-03, show S03-03 title, …
‘(?P.?)sS[._- ]+(?P[0-9]+)(.? +(?P.+)(…+)?$)?’,
# 3x03, 3x03-04, 3x03-3x04, 03x03, 03x03-04, 03x03-3x04, 3x03 title, 3x03. title, …, show 3x03, …, show 3x03 title, …
'(?P.?)([^0-9]|^)(?P[0-9]{1,2})Xx(-([0-9]+[Xx])?(?P[0-9]+))?(.? +(?P.+)(…+)?$)?’,
‘(.*?)^0-9a-z(?P[0-9]{2})([.-][0-9]+(?P[0-9]{2})([ -_.]|$)[.-]?)?([^0-9a-z%]|$)’ # .602.
]
date_regexps = [
‘(?P[0-9]{4})[^0-9a-zA-Z]+(?P[0-9]{2})[^0-9a-zA-Z]+(?P[0-9]{2})([^0-9]|$)’, # 2009-02-10
‘(?P[0-9]{2})[^0-9a-zA-Z]+(?P[0-9]{2})[^0-9a-zA-Z(]+(?P[0-9]{4})([^0-9a-zA-Z]|$)’, # 02-10-2009
]
standalone_episode_regexs = [
‘(.?)( (([0-9]+)))? - ([0-9]+)+x([0-9]+)(-[0-9]+Xx)?( - (.))?’, # Newzbin style, no UNPACK
‘(.?)( (([0-9]+)))?Ss+Ee(-[0-9]+Xx)?( - (.))?’ # standard s00e00
]
season_regex = ‘.*?(?P[0-9]+)$’ # folder for a season
just_episode_regexs = [
‘(?P[0-9]{1,3})[. -]of[. -]+[0-9]{1,3}’, # 01 of 08
‘^(?P[0-9]{1,3})[^0-9]’, # 01 - Foo
‘e[a-z][ .-_](?P[0-9]{2,3})([^0-9c-uw-z%]|$)’, # Blah Blah ep234
‘.? .-_[^0-9c-uw-z%]+’, # Flah - 04 - Blah
'.? .-_$’, # Flah - 04
‘.*?^0-9x$’ # Flah707
]
ends_with_number = ‘.*([0-9]{1,2})$’
ends_with_episode = [’[ ]*[0-9]{1,2}x[0-9]{1,3}$’, ‘[ ]*S[0-9]+E[0-9]+$’]
Look for episodes.
def Scan(path, files, mediaList, subdirs, language=None, root=None):
# Don't scan ignored subdirs
MCMCustomUtils.Ignore(subdirs)
# Scan for video files.
VideoFiles.Scan(path, files, mediaList, subdirs, root)
# Take top two as show/season, but require at least the top one.
paths = Utils.SplitPath(path)
# Discover season numbers in the folder
MCMSeason = -1
if len(paths) >= 2:
season = paths[len(paths)-1]
match = re.match(season_regex, season, re.IGNORECASE)
if match:
MCMSeason = int(match.group('season'))
if len(paths) >= 3 and MCMSeason == -1: # a subfolder of the season (T.V.-on-disc)
season = paths[len(paths)-2]
match = re.match(season_regex, season, re.IGNORECASE)
if match:
MCMSeason = int(match.group('season'))
if len(paths) == 1 and len(paths[0]) == 0:
# Run the select regexps we allow at the top level.
for i in files:
file = os.path.basename(i)
#print >> sys.stderr, ('file0: %s' % file)
for rx in episode_regexps[0:-1]:
match = re.search(rx, file, re.IGNORECASE)
if match:
# Extract data.
show = match.group('show')
season = int(match.group('season'))
episode = int(match.group('ep'))
endEpisode = episode
if match.groupdict().has_key('secondEp') and match.group('secondEp'):
endEpisode = int(match.group('secondEp'))
title = None
if match.groupdict().has_key('title') and match.group('title'):
title = match.group('title')
# Clean title.
(name, year) = MCMCustomVideoFiles.CleanName(show)
if len(name) > 0:
for ep in range(episode, endEpisode+1):
#print >> sys.stderr, ('media0:', name, season, ep, title, year)
if MCMSeason > -1:
season = MCMSeason
tv_show = Media.Episode(name, season, ep, title, year)
tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
tv_show.parts.append(i)
mediaList.append(tv_show)
elif len(paths) > 0 and len(paths[0]) > 0:
done = False
# See if parent directory is a perfect match (e.g. a directory like "24 - 8x02 - Day 8_ 5_00P.M. - 6_00P.M")
if len(files) == 1:
for rx in standalone_episode_regexs:
res = re.findall(rx, paths[-1])
if len(res):
(show, junk, year, season, episode, junk, endEpisode, junk, title) = res[0]
# If it didn't have a show, then grab it from the directory.
if len(show) == 0:
(show, year) = MCMCustomVideoFiles.CleanName(paths[0])
episode = int(episode)
if len(endEpisode) > 0:
endEpisode = int(endEpisode)
else:
endEpisode = episode
for ep in range(episode, endEpisode+1):
#print >> sys.stderr, ('media1:', show, season, ep, title, year)
if MCMSeason > -1:
season = MCMSeason
tv_show = Media.Episode(show, season, ep, title, year)
tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
tv_show.parts.append(files[0])
mediaList.append(tv_show)
done = True
break
if done == False:
# Not a perfect standalone match, so get information from directories. (e.g. "Lost/Season 1/s0101.mkv")
season = None
seasonNumber = None
(show, year) = MCMCustomVideoFiles.CleanName(paths[0])
if MCMSeason > -1:
seasonNumber = MCMSeason
# Make sure an episode name didn't make it into the show.
for rx in ends_with_episode:
show = re.sub(rx, '', show)
for i in files:
done = False
file = os.path.basename(i)
(file, ext) = os.path.splitext(file)
if ext.lower() in ['.mp4', '.m4v', '.mov']:
m4season = m4ep = m4year = 0
m4show = title = ''
try:
mp4fileTags = mp4file.Mp4File(i)
# Show.
try: m4show = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvshow').encode('utf-8')
except: pass
# Season.
try: m4season = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvseason'))
except: pass
# Episode.
m4ep = None
try:
# tracknum (can be 101)
m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tracknum'))
except:
try:
# tvepisodenum (can be S2E16)
m4ep = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisodenum')
except:
# TV Episode (can be 101)
m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisode'))
if m4ep is not None:
found = False
try:
# See if it matches regular expression.
for rx in episode_regexps[:-1]:
match = re.search(rx, file, re.IGNORECASE)
if match:
m4season = int(match.group('season'))
m4ep = int(match.group('ep'))
found = True
if found == False and re.match('[0-9]+', str(m4ep)):
# Carefully convert to episode number.
m4ep = int(m4ep) % 100
elif found == False:
m4ep = int(re.findall('[0-9]+', m4ep)[0])
except:
pass
# Title.
try: title = find_data(mp4fileTags, 'moov/udta/meta/ilst/title').encode('utf-8')
except: pass
# Year.
try: m4year = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/year')[:4])
except: pass
if year and m4year == 0:
m4year = year
# If we have all the data we need, add it.
if len(m4show) > 0 and m4season > 0 and m4ep > 0:
#print >> sys.stderr, ('media2:', show, season, ep, title, year)
if MCMSeason > -1:
m4season = MCMSeason
tv_show = Media.Episode(m4show, m4season, m4ep, title, m4year)
tv_show.parts.append(i)
mediaList.append(tv_show)
continue
except:
pass
# Check for episode ID regexp's first
if done == False:
# Take the year out, because it's not going to help at this point.
(cleanName, cleanYear) = MCMCustomVideoFiles.CleanName(file)
if cleanYear != None:
file = file.replace(str(cleanYear), 'XXXX')
# !!! already done by VideoFiles.CleanName() !!!
## Minor cleaning on the file to avoid false matches on H.264, 720p, etc.
##whackRx = ['([hHx][\.]?264)[^0-9]', '[^[0-9](720[pP])', '[^[0-9](1080[pP])', '[^[0-9](480[pP])']
##for rx in whackRx:
## file = re.sub(rx, ' ', file)
for rx in episode_regexps:
match = re.search(rx, file, re.IGNORECASE)
if match:
# Parse season and episode.
the_season = int(match.group('season'))
episode = int(match.group('ep'))
endEpisode = episode
if match.groupdict().has_key('secondEp') and match.group('secondEp'):
endEpisode = int(match.group('secondEp'))
title = None
if match.groupdict().has_key('title') and match.group('title'):
title = match.group('title')
# More validation for the weakest regular expression.
if rx == episode_regexps[-1]:
# Look like a movie? Skip it.
if re.match('.+ \([1-2][0-9]{3}\)', paths[-1]):
done = True
break
# Skip episode 0 on the weak regex since it's pretty much never right.
if the_season == 0:
break
# Make sure this isn't absolute order.
if seasonNumber is not None:
if seasonNumber != the_season:
# Something is amiss, see if it starts with an episode numbers.
if re.search('^[0-9]+ -', file):
# Let the episode matcher have it.
break
# Treat the whole thing as an episode.
episode = episode + the_season*100
if endEpisode is not None:
endEpisode = endEpisode + the_season*100
for ep in range(episode, endEpisode+1):
#print >> sys.stderr, ('media4:', show, the_season, ep, title, year)
if MCMSeason > -1:
the_season = MCMSeason
tv_show = Media.Episode(show, the_season, ep, title, year)
tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
tv_show.parts.append(i)
mediaList.append(tv_show)
done = True
break
# Check for date-based regexps second.
for rx in date_regexps:
match = re.search(rx, file)
if match:
year = int(match.group('year'))
month = int(match.group('month'))
day = int(match.group('day'))
# Use the year as the season.
#print >> sys.stderr, ('media3:', show, year, None, None, None)
tv_show = None
if MCMSeason > -1:
tv_show = Media.Episode(show, MCMSeason, None, None, None)
else:
tv_show = Media.Episode(show, year, None, None, None)
tv_show.released_at = '%d-%02d-%02d' % (year, month, day)
tv_show.parts.append(i)
mediaList.append(tv_show)
done = True
break
#print >> sys.stderr, ('done4: %s' % done)
if done == False:
# OK, next let's see if we're dealing with something that looks like an episode.
# Begin by cleaning the filename to remove garbage like "h.264" that could throw
# things off.
(file, fileYear) = MCMCustomVideoFiles.CleanName(file)
# if don't have a good year from before (when checking the parent folders) AND we just got a good year, use it.
if not year and fileYear:
year = fileYear
for rx in just_episode_regexs:
episode_match = re.search(rx, file, re.IGNORECASE)
if episode_match is not None:
the_episode = int(episode_match.group('ep'))
the_season = 1
# Now look for a season.
if seasonNumber is not None:
the_season = seasonNumber
# See if we accidentally parsed the episode as season.
if the_episode >= 100 and int(the_episode / 100) == the_season:
the_episode = the_episode % 100
if MCMSeason > -1:
the_season = MCMSeason
tv_show = Media.Episode(show, the_season, the_episode, None, year)
tv_show.parts.append(i)
mediaList.append(tv_show)
done = True
break
if done == False:
print "Got nothing for:", file
# Stack the results.
MCMCustomStack.Scan(path, files, mediaList, subdirs)
def find_data(atom, name):
child = atomsearch.find_path(atom, name)
data_atom = child.find(‘data’)
if data_atom and ‘data’ in data_atom.attrs:
return data_atom.attrs[‘data’]
if name == ‘main’:
print “Hello, world!”
path = sys.argv[1]
files = [os.path.join(path, file) for file in os.listdir(path)]
media = []
Scan(path[1:], files, media, [])
print “Media:”, media