I'll start with the movie formatter which should be useful for everyone collecting and watching films in multiple languages. It tags movie folders with uppercase audio languages and lowercase subtitle languages. It also uses the original title in the folder name if it is in latin script.
Code: Select all
{
// FileBot Movie Formatter v20171120
// see https://www.filebot.net/forums/viewtopic.php?f=5&t=5505
// -------------------------------------------------------------------------------------------------------------------------
import groovy.transform.*
import net.filebot.Language
import net.filebot.Logging
def movie = MyMovie.create(binding).toString()
return allOf{"Movies/"}{movie}{".CD${pi}"}{subt} .join();
// -------------------------------------------------------------------------------------------------------------------------
class MyMovie
{
private static final Map LANGUAGE_MAP = [
'en' : null, // first entry should be the same language we use for database lookups
'de' : "Umlauts", // other entries are languages we like to keep the original titles
'fr' : null // map values are (optional) custom transliterators to ASCII filenames
];
// This should cover the usual English, German and French variants
private static final String RE_SORTNAME = /(?i)^((?:the|a|an|der|die|das|la|le|les)\b|l')\s*(.+)\s*$/;
// no need to tag folders or files with these
private static final Map CODEC_DISPLAY_MAP = [
'xvid': null, 'divx': null, 'avc': null, 'x264': null,
'hevc': "x265",
'mp3': null, 'aac': null
];
private static final Map CHANNEL_DISPLAY_MAP = [
'1.0': null, '2.0': null,
'6.0': "5.1"
];
// TODO: see source/net/filebot/media/ReleaseInfo.properties and eliminate whatever Filebot already supports
private static final String RE_TAGS = /(?i)\b(unrated|extended|remastered|uncensored|special[ ._-]edition|imax)\b/;
private static final String RE_SOURCE = /(?i)\b(hdtv|dvbt|pdtv|r5|ts|ld|dvdscr|svcd|dubbed)\b/;
private static final String RE_HIDE_SOURCE = /(?i)web-dl|webrip|bluray|brrip|xyz/
// Pattern we used to mark hardcoded subs, e.g. zz-hard zzz.hard zz_hard zz(hard)
private static final String RE_HARDSUBS = /(?i)\b[a-z]{2,3}(?=[-(._]hard\)?)\b/
// ------------------------------------------------------------------------------------------------------------------------
// TODO: see filetypes supported by Filebot - maybe even get them at runtime: source/net/filebot/MediaTypes.properties
// TODO: it would be much easier if the {model} entries had a FileType=VIDEO|AUDIO|TEXT property
private static final String RE_AUDIO_EXT = /(?i)^mp3|m4a|dts|ac3|wav|ogg$/
private static final String RE_TEXT_EXT = /(?i)^ass|psb|srt|ssa|ssf|sub$/
// VobSub lang extraction
private static final String RE_IDX_LANG = /(?im)(?<=^id: )[a-z]+/;
// TODO: ICU60+ has de-ASCII conversion?
private static final String UNIDECODE = "Any-Latin; NFKD; NFC; Latin-ASCII";
// ------------------------------------------------------------------------------------------------------------------------
private static MyMovie instance = null;
static MyMovie create(binding)
{
if( instance?.completeName != binding.media.completeName)
{
instance = new MyMovie(binding);
}
def indent = " " * ( binding.type.size() +2 );
def mime = getit({binding.self.mime},[]).join('/');
log("${indent}<> \"${binding.self.f}\" [$mime]")
return instance;
}
private final Object binding;
final String completeName;
final Language primaryLanguage;
private MyMovie(binding)
{
this.binding = binding;
this.completeName = media.completeName;
this.primaryLanguage = guessPrimaryLanguage();
log("=== ${type} \"${media.completeName}\" [$mimeType]" );
}
// try to guess the primary language of the movie, in case we have audio tracks with no language information
private Language guessPrimaryLanguage()
{
def dbSpokenLang = languages?:[];
if (1 == dbSpokenLang.size())
return dbSpokenLang[0]; // single spoken language from movie DB
else
return findLanguage(info?.OriginalLanguage); // prduction language from the movie DB, not necessarily spoken
}
private String getMimeType()
{
return getit{media.InternetMediaType} ?: "???/???";
}
private static Object getit(Closure c, Object defaultValue = null)
{
try { return c() }
catch (all) {}
return defaultValue
}
def propertyMissing(String name)
{
try { return binding[name] }
catch (net.filebot.format.BindingException be) {}
catch (all) { log("propertyMissing: $name: " + all) }
return null
}
// hijacking FileBot's log file...
private static void log(it)
{
Logging.log.fine( it )
}
String toString()
{
return allOf{"($sortFolder)"}{folderName}{fileName}
.collect{ normalizeFileName(it) }
.join('/');
}
private static String normalizeFileName(String it)
{
it
.tr("*`|/\\", "x'---") // replace some dangerous characters
.replaceAll(/(?<=\d)[:](?=\d)/, ".") // replace e.g. 12:00 with 12.00
.replaceAll(/[:]/, " -") // replace all colons
.removeAll(/[?]/) // remove some dangerous characters
.after(/^[.,]+/) // avoid file and foldernames starting with a dot (== hidden on some systems)
// TODO: normalize whitespace
}
// Language from ISO539-2 code, recognizing deprecated identifiers
private static Language findLanguage(String it)
{
try
{
return Language.findLanguage( ISO639_2_MAP.get(it, it) );
}
catch( all )
{
log( "findLanguage: ${all.inspect()}");
return null;
}
}
private static final ISO639_2_MAP = [ 'iw' : 'he', 'ji' : 'yi', 'in' : 'id' ]; // map deprecated ISO639-2 identifier
// -----------------------------------------------------------------------------------------------------------------------
// helper class to enumerate embedded and external streams
enum StreamType{ AUDIO, TEXT, HARD }
class Stream { //...implements Comparable //int compareTo(Object other) { lang.ISO2 <=> other.lang.ISO2 }
final StreamType type
final Language lang
Stream(StreamType t, Language l) { type=t; lang=l; log() }
Stream(StreamType t, Locale l) { type=t; lang=Language.getLanguage(l); log() }
Stream(StreamType t, String l) { type=t; lang=MyMovie.findLanguage(l); log() }
void log() { MyMovie.log(" Stream type=$type lang=$lang"); }
boolean equals(Object other) { lang?.ISO2 == other.lang?.ISO2 }
String toString() {
switch (type) {
case StreamType.AUDIO: return lang?.ISO2?.upper() ?: "UN";
case StreamType.TEXT: return lang?.ISO2 ?: "un";
case StreamType.HARD: return lang ? lang.ISO2+"-hard" : "un-hard";
default: break;
}
return null;
}
}
List getStreams()
{
List<Stream> streams = [];
// Embedded audio streams, provided by mediainfo
// TODO: access mediainfo data and find the first 'default' audio track, if available?
streams += (audioLanguages?:[]) .collect{
new Stream( StreamType.AUDIO, it )
};
// if no other information available, assume the default audio stream is in the primary language
if( ! streams && primaryLanguage ) streams += [
new Stream( StreamType.AUDIO, primaryLanguage)
];
// External audio streams
streams += model.findAll{it.ext =~ RE_AUDIO_EXT} .collect{
logStream(it);
new Stream( StreamType.AUDIO, (Language) it.lang )
};
// Embedded text/subtitle streams, provided by mediainfo
streams += (textLanguages?:[]) .collect{
new Stream( StreamType.TEXT, it )
};
// External VobSub titles (.idx and .sub) supporting multiple languages
streams += model.findAll{it.ext=='idx'} .collectMany{
logStream(it);
it.file.text.findAll(RE_IDX_LANG)
} .collect{
new Stream( StreamType.TEXT, it )
};
// External text/subtitle streams
model.findAll{it.ext =~ RE_TEXT_EXT} .each {
try
{ // avoid exceptions from VobSub .sub files
streams << new Stream( StreamType.TEXT, (Language) it.lang )
logStream(it);
}
catch (all) { log("${it.f}: ${all.inspect()}") }
}
// try to match patterns indicating hard subs
def path = allOf{mediaPath}{original} .join(' :: ')
streams += path.findAll(RE_HARDSUBS) .collect{
new Stream( StreamType.HARD, it )
};
streams.unique();
return streams;
}
private void logStream(it)
{
def indent = " " * ( type.size() +2 );
log("${indent}++ \"${it.f}\" [${(it.mime?:[]).join('/')}]"); // it.f ./. ${it.mediaPath}${it.subst?:''}
}
String getTag()
{
def miHD = hd =~ /SD/ ? resolution : vf; // for SD movies, use actual resolution
def miVC = filterTag(CODEC_DISPLAY_MAP, vc); // transform video codec display
def miBitdepth = any{bitdepth}{8} < 10 ? null : "${bitdepth}bit"; // include bitdepth when >= 10
def miAC = filterTag(CODEC_DISPLAY_MAP, ac);
def miCH = filterTag(CHANNEL_DISPLAY_MAP, channels);
def mediaInfo = allOf{s3d}{miHD}{miVC}{miBitdepth}{miAC}{miCH}; // eg. ['720p','x265','AC3','5.1']
def path = allOf{mediaPath}{original} .join(' :: ')
def fnTags = path.findAll(RE_TAGS) *.upper() .unique(); // to supplement FileBot's {tags}
def fnSource = path.findAll(RE_SOURCE) *.upper() .unique(); // to supplement FileBot's {source}
// TODO: check that the above are not part of the title!
// hide some sources from the result
def mySource = any{source}{"_"} =~ RE_HIDE_SOURCE ? null : source;
return allOf{tags *.upper()}{fnTags}{mediaInfo}{streams}{mySource}{fnSource}
.flatten()
.join(' ');
}
private static String filterTag(map, String it)
{
return map.get( it.lower(), it);
}
private static Boolean isLatin(String it)
{
return it && it ==~ /^[\p{IsCommon}\p{IsLatin}]+$/;
}
// lang = null means locale of database query, pass ISO2 code otherwise
// TODO: if( lang == 'de' ) transliterate umlauts
// TODO: replace trailing (yyyy) from film names if necessary
private static String cleanTitle(String it, Language lang=null)
{
return it.transliterate(UNIDECODE).trim()
}
@Memoized
List<String> getTitles()
{
def list = [ cleanTitle(n) ]; // n will be in the locale used to query the database
if( isLatin( self.primaryTitle ) )
{
def primaryAscii = cleanTitle(self.primaryTitle, primaryLanguage);
if( primaryAscii != list[0] ) // TODO: check for similarity to catch variants
{ // see source/net/filebot/similarity/NameSimilarityMetric.java
list << primaryAscii; // use both titles
if( primaryLanguage && LANGUAGE_MAP.containsKey( primaryLanguage.ISO2 ) )
list = list.reverse(); // prioritize primary language
}
}
return list;
}
String getFileName()
{
return titles[0] + " ($y)";
}
// use /$2, $1/ to re-append the first word
private static String sortTitle(String it, String replace=/$2, $1/)
{
return it.replaceFirst(RE_SORTNAME, replace).trim()
}
@Memoized
String getFolderName()
{
def t = titles;
def fn = sortTitle(t[0], /$2/); // use /$2, $1/ to append the first word
if( t.size() > 1 )
fn += "; " + sortTitle(t[1], /$2/) // use /$2, $1/ to append the first word
return fn + " ($y) [$tag]";
}
String getSortFolder()
{
if( info?.adult )
return "Adult...";
/*
float rating = any{binding.rating}{0.0} as float;
float gigabytes = any{binding.gigabytes}{0.0} as float;
if( gigabytes >= 5.0 && rating > 0.0 )
{
int score = (11.0 - rating) * gigabytes / 4.4;
return "== ${String.format( '%02d', score)} =="
}
*/
def collection = {
binding.collection
.transliterate(UNIDECODE)
.replaceFirst(RE_SORTNAME, /$2/) // use /$2, $1/ to append the first word
.removeAll(/(?i)\s?\W?(collection|reihe|sammlung)\W?/)
.trim()
}
def alnum = folderName =~ /\p{Alnum}/
def az = alnum ? alnum[0] : '#'; // use the first letter or number
if( az =~ /\d/ ) az = "0-9"; // use "0-9" for all numbers
return any{collection}{"$az..."}{"..."};
}
}
}