ISO 639-2/T vs ISO 639-2/B Subtitle language

Any questions? Need some help?
Post Reply
chronial
Power User
Posts: 9
Joined: 27 Aug 2014, 01:01

ISO 639-2/T vs ISO 639-2/B Subtitle language

Post by chronial »

The DetectLanguage() Api of OpenSubtitles does not return ISO 639-2/T language codes, but ISO 639-2/B ones. At the moment FileBot can’t handle this as it has only a list of ISO 639-2/T language codes.
This does not get noticed a lot, because B and T are only different for a few languages (See Wikipedia). But it does cause problems for these languages.

For example, the language of german subtitles is never detected. This patch fixes the issue by adding the B codes to the languages. It also completes the language list.

Code: Select all

Index: source/net/filebot/Language.java
===================================================================
--- source/net/filebot/Language.java	(revision 2524)
+++ source/net/filebot/Language.java	(working copy)
@@ -13,13 +13,22 @@
 
 public class Language implements Serializable {
 
+	// The ISO 639-1 code
 	private final String iso2;
+
+	// The ISO 639-2/T code
 	private final String iso3;
+
+	// The ISO 639-2/B code
+	private final String iso3b;
+
+	// The Language name
 	private final String name;
 
-	public Language(String iso2, String iso3, String name) {
+	public Language(String iso2, String iso3, String iso3b, String name) {
 		this.iso2 = iso2;
 		this.iso3 = iso3;
+		this.iso3b = iso3b;
 		this.name = name;
 	}
 
@@ -35,6 +44,10 @@
 		return iso3;
 	}
 
+	public String getISO3b() {
+		return iso3b;
+	}
+
 	public String getName() {
 		return name;
 	}
@@ -50,7 +63,7 @@
 
 	@Override
 	public Language clone() {
-		return new Language(iso2, iso3, name);
+		return new Language(iso2, iso3, iso3b, name);
 	}
 
 	public static final Comparator<Language> ALPHABETIC_ORDER = new Comparator<Language>() {
@@ -65,8 +78,8 @@
 		ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName());
 
 		try {
-			String[] values = bundle.getString(code).split("\\t", 2);
-			return new Language(code, values[0], values[1]);
+			String[] values = bundle.getString(code).split("\\t", 3);
+			return new Language(code, values[0], values[1], values[2]);
 		} catch (Exception e) {
 			return null;
 		}
@@ -86,7 +99,7 @@
 		if (locale != null) {
 			String code = locale.getLanguage();
 			for (Language it : availableLanguages()) {
-				if (it.getISO2().equals(code) || it.getISO3().equals(code)) {
+				if (it.getISO2().equals(code) || it.getISO3().equals(code) || it.getISO3b().equals(code)) {
 					return it;
 				}
 			}
@@ -96,7 +109,8 @@
 
 	public static Language findLanguage(String lang) {
 		for (Language it : availableLanguages()) {
-			if (lang.equalsIgnoreCase(it.getISO2()) || lang.equalsIgnoreCase(it.getISO3()) || lang.equalsIgnoreCase(it.getName())) {
+			if (lang.equalsIgnoreCase(it.getISO2()) || lang.equalsIgnoreCase(it.getISO3()) ||
+					lang.equalsIgnoreCase(it.getISO3b()) || lang.equalsIgnoreCase(it.getName())) {
 				return it;
 			}
 		}
Index: source/net/filebot/Language.properties
===================================================================
--- source/net/filebot/Language.properties	(revision 2524)
+++ source/net/filebot/Language.properties	(working copy)
@@ -1,45 +1,188 @@
 # available languages
-languages.all = sq,ar,hy,pb,bg,ca,zh,hr,cs,da,nl,en,et,fi,fr,de,el,he,hi,hu,id,it,ja,ko,lv,lt,mk,ms,no,fa,pl,pt,ro,ru,sr,sk,sl,es,sv,th,tr,vi
+languages.all = ab,aa,af,ak,sq,am,ar,an,hy,as,av,ae,ay,az,bm,ba,eu,be,bn,bh,bi,bs,pb,br,bg,my,ca,ch,ce,ny,zh,cu,cv,kw,co,cr,hr,cs,da,dv,nl,dz,en,eo,et,ee,fo,fj,fi,fr,ff,gd,gl,lg,ka,de,el,kl,gn,gu,ht,ha,he,hz,hi,ho,hu,is,io,ig,id,ia,ie,iu,ik,ga,it,ja,jv,kn,kr,ks,kk,km,ki,rw,rn,kv,kg,ko,ku,kj,ky,lo,la,lv,li,ln,lt,lu,lb,mk,mg,ms,ml,mt,gv,mi,mr,mh,mn,na,nv,ng,ne,nd,se,no,nb,nn,ii,oc,oj,or,om,os,pi,pa,ps,fa,pl,pt,qu,ro,rm,ru,sm,sg,sa,sc,sr,sn,sd,si,sk,sl,so,nr,st,es,su,sw,ss,sv,tl,ty,tg,ta,tt,te,th,bo,ti,to,ts,tn,tr,tk,tw,uk,ur,ug,uz,ve,vi,vo,wa,cy,fy,wo,xh,yi,yo,za,zu
 languages.common = en,de,fr,es,pt,ru,ja,zh
-sq: sqi	Albanian
-ar: ara	Arabic
-hy: hye	Armenian
-pb: pob	Brazilian
-bg: bul	Bulgarian
-ca: cat	Catalan
-zh: zho	Chinese
-hr: hrv	Croatian
-cs: ces	Czech
-da: dan	Danish
-nl: nld	Dutch
-en: eng	English
-et: est	Estonian
-fi: fin	Finnish
-fr: fra	French
-de: deu	German
-el: ell	Greek
-he: heb	Hebrew
-hi: hin	Hindi
-hu: hun	Hungarian
-id: ind	Indonesian
-it: ita	Italian
-ja: jpn	Japanese
-ko: kor	Korean
-lv: lav	Latvian
-lt: lit	Lithuanian
-mk: mkd	Macedonian
-ms: msa	Malay
-no: nor	Norwegian
-fa: fas	Persian
-pl: pol	Polish
-pt: por	Portuguese
-ro: ron	Romanian
-ru: rus	Russian
-sr: srp	Serbian
-sk: slk	Slovak
-sl: slv	Slovenian
-es: spa	Spanish
-sv: swe	Swedish
-th: tha	Thai
-tr: tur	Turkish
-vi: vie	Vietnamese
+ab: abk	abk	Abkhaz
+aa: aar	aar	Afar
+af: afr	afr	Afrikaans
+ak: aka	aka	Akan
+sq: sqi	alb	Albanian
+am: amh	amh	Amharic
+ar: ara	ara	Arabic
+an: arg	arg	Aragonese
+hy: hye	arm	Armenian
+as: asm	asm	Assamese
+av: ava	ava	Avaric
+ae: ave	ave	Avestan
+ay: aym	aym	Aymara
+az: aze	aze	Azerbaijani
+bm: bam	bam	Bambara
+ba: bak	bak	Bashkir
+eu: eus	baq	Basque
+be: bel	bel	Belarusian
+bn: ben	ben	Bengali
+bh: bih	bih	Bihari
+bi: bis	bis	Bislama
+bs: bos	bos	Bosnian
+pb: pob	pob	Brazilian
+br: bre	bre	Breton
+bg: bul	bul	Bulgarian
+my: mya	bur	Burmese
+ca: cat	cat	Catalan
+ch: cha	cha	Chamorro
+ce: che	che	Chechen
+ny: nya	nya	Chichewa
+zh: zho	chi	Chinese
+cu: chu	chu	Church Slavonic
+cv: chv	chv	Chuvash
+kw: cor	cor	Cornish
+co: cos	cos	Corsican
+cr: cre	cre	Cree
+hr: hrv	hrv	Croatian
+cs: ces	cze	Czech
+da: dan	dan	Danish
+dv: div	div	Dhivehi
+nl: nld	dut	Dutch
+dz: dzo	dzo	Dzongkha
+en: eng	eng	English
+eo: epo	epo	Esperanto
+et: est	est	Estonian
+ee: ewe	ewe	Ewe
+fo: fao	fao	Faroese
+fj: fij	fij	Fijian
+fi: fin	fin	Finnish
+fr: fra	fre	French
+ff: ful	ful	Fula
+gd: gla	gla	Gaelic
+gl: glg	glg	Galician
+lg: lug	lug	Ganda
+ka: kat	geo	Georgian
+de: deu	ger	German
+el: ell	gre	Greek
+kl: kal	kal	Greenlandic
+gn: grn	grn	Guaraní
+gu: guj	guj	Gujarati
+ht: hat	hat	Haitian
+ha: hau	hau	Hausa
+he: heb	heb	Hebrew
+hz: her	her	Herero
+hi: hin	hin	Hindi
+ho: hmo	hmo	Hiri Motu
+hu: hun	hun	Hungarian
+is: isl	ice	Icelandic
+io: ido	ido	Ido
+ig: ibo	ibo	Igbo
+id: ind	ind	Indonesian
+ia: ina	ina	Interlingua
+ie: ile	ile	Interlingue
+iu: iku	iku	Inuktitut
+ik: ipk	ipk	Inupiaq
+ga: gle	gle	Irish
+it: ita	ita	Italian
+ja: jpn	jpn	Japanese
+jv: jav	jav	Javanese
+kn: kan	kan	Kannada
+kr: kau	kau	Kanuri
+ks: kas	kas	Kashmiri
+kk: kaz	kaz	Kazakh
+km: khm	khm	Khmer
+ki: kik	kik	Kikuyu
+rw: kin	kin	Kinyarwanda
+rn: run	run	Kirundi
+kv: kom	kom	Komi
+kg: kon	kon	Kongo
+ko: kor	kor	Korean
+ku: kur	kur	Kurdish
+kj: kua	kua	Kwanyama
+ky: kir	kir	Kyrgyz
+lo: lao	lao	Lao
+la: lat	lat	Latin
+lv: lav	lav	Latvian
+li: lim	lim	Limburgish
+ln: lin	lin	Lingala
+lt: lit	lit	Lithuanian
+lu: lub	lub	Luba-Katanga
+lb: ltz	ltz	Luxembourgish
+mk: mkd	mac	Macedonian
+mg: mlg	mlg	Malagasy
+ms: msa	may	Malay
+ml: mal	mal	Malayalam
+mt: mlt	mlt	Maltese
+gv: glv	glv	Manx
+mi: mri	mao	Maori
+mr: mar	mar	Marathi
+mh: mah	mah	Marshallese
+mn: mon	mon	Mongolian
+na: nau	nau	Nauru
+nv: nav	nav	Navajo
+ng: ndo	ndo	Ndonga
+ne: nep	nep	Nepali
+nd: nde	nde	Northern Ndebele
+se: sme	sme	Northern Sami
+no: nor	nor	Norwegian
+nb: nob	nob	Norwegian Bokmål
+nn: nno	nno	Norwegian Nynorsk
+ii: iii	iii	Nuosu
+oc: oci	oci	Occitan
+oj: oji	oji	Ojibwe
+or: ori	ori	Oriya
+om: orm	orm	Oromo
+os: oss	oss	Ossetian
+pi: pli	pli	Pali
+pa: pan	pan	Panjabi
+ps: pus	pus	Pashto
+fa: fas	per	Persian
+pl: pol	pol	Polish
+pt: por	por	Portuguese
+qu: que	que	Quechua
+ro: ron	rum	Romanian
+rm: roh	roh	Romansh
+ru: rus	rus	Russian
+sm: smo	smo	Samoan
+sg: sag	sag	Sango
+sa: san	san	Sanskrit
+sc: srd	srd	Sardinian
+sr: srp	srp	Serbian
+sn: sna	sna	Shona
+sd: snd	snd	Sindhi
+si: sin	sin	Sinhala
+sk: slk	slo	Slovak
+sl: slv	slv	Slovenian
+so: som	som	Somali
+nr: nbl	nbl	Southern Ndebele
+st: sot	sot	Southern Sotho
+es: spa	spa	Spanish
+su: sun	sun	Sundanese
+sw: swa	swa	Swahili
+ss: ssw	ssw	Swati
+sv: swe	swe	Swedish
+tl: tgl	tgl	Tagalog
+ty: tah	tah	Tahitian
+tg: tgk	tgk	Tajik
+ta: tam	tam	Tamil
+tt: tat	tat	Tatar
+te: tel	tel	Telugu
+th: tha	tha	Thai
+bo: bod	tib	Tibetan
+ti: tir	tir	Tigrinya
+to: ton	ton	Tonga
+ts: tso	tso	Tsonga
+tn: tsn	tsn	Tswana
+tr: tur	tur	Turkish
+tk: tuk	tuk	Turkmen
+tw: twi	twi	Twi
+uk: ukr	ukr	Ukrainian
+ur: urd	urd	Urdu
+ug: uig	uig	Uyghur
+uz: uzb	uzb	Uzbek
+ve: ven	ven	Venda
+vi: vie	vie	Vietnamese
+vo: vol	vol	Volapük
+wa: wln	wln	Walloon
+cy: cym	wel	Welsh
+fy: fry	fry	Western Frisian
+wo: wol	wol	Wolof
+xh: xho	xho	Xhosa
+yi: yid	yid	Yiddish
+yo: yor	yor	Yoruba
+za: zha	zha	Zhuang
+zu: zul	zul	Zulu
\ No newline at end of file
Index: source/net/filebot/ui/LanguageComboBoxModel.java
===================================================================
--- source/net/filebot/ui/LanguageComboBoxModel.java	(revision 2524)
+++ source/net/filebot/ui/LanguageComboBoxModel.java	(working copy)
@@ -13,7 +13,7 @@
 
 public class LanguageComboBoxModel extends AbstractListModel implements ComboBoxModel {
 
-	public static final Language ALL_LANGUAGES = new Language("undefined", "undefined", "All Languages");
+	public static final Language ALL_LANGUAGES = new Language("undefined", "undefined", "undefined", "All Languages");
 
 	private Language defaultLanguage;
 	private Language selection;
Index: source/net/filebot/ui/subtitle/SubtitlePackage.java
===================================================================
--- source/net/filebot/ui/subtitle/SubtitlePackage.java	(revision 2524)
+++ source/net/filebot/ui/subtitle/SubtitlePackage.java	(working copy)
@@ -37,7 +37,7 @@
 		this.subtitle = subtitle;
 
 		// resolve language name
-		this.language = new Language(languageCodeByName.get(subtitle.getLanguageName()), Language.getISO3LanguageCodeByName(subtitle.getLanguageName()), subtitle.getLanguageName());
+		this.language = Language.findLanguage(subtitle.getLanguageName());
 
 		// initialize download worker
 		download = new Download(subtitle);
@@ -204,22 +204,4 @@
 			return current;
 		}
 	}
-
-	/**
-	 * Map english language name to language code.
-	 */
-	private static final Map<String, String> languageCodeByName = mapLanguageCodeByName();
-
-	private static Map<String, String> mapLanguageCodeByName() {
-		ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName(), Locale.ENGLISH);
-
-		Map<String, String> map = new HashMap<String, String>();
-
-		for (String code : bundle.keySet()) {
-			map.put(bundle.getString(code), code);
-		}
-
-		return map;
-	}
-
 }
User avatar
rednoah
The Source
Posts: 22999
Joined: 16 Nov 2011, 08:59
Location: Taipei
Contact:

Re: ISO 639-2/T vs ISO 639-2/B Subtitle language

Post by rednoah »

osx / eclipse sucks at applying unified diff patches, doesn't work for some reason. Better send me the files.
:idea: Please read the FAQ and How to Request Help.
chronial
Power User
Posts: 9
Joined: 27 Aug 2014, 01:01

Re: ISO 639-2/T vs ISO 639-2/B Subtitle language

Post by chronial »

Post Reply