ISO 639-2/T vs ISO 639-2/B Subtitle language
Posted: 27 Aug 2014, 17:12
The DetectLanguage() Api of OpenSubtitles does not return ISO 639-2/T language codes, but ISO 639-2/B ones. At the moment FileBot can’t handle this as it has only a list of ISO 639-2/T language codes.
This does not get noticed a lot, because B and T are only different for a few languages (See Wikipedia). But it does cause problems for these languages.
For example, the language of german subtitles is never detected. This patch fixes the issue by adding the B codes to the languages. It also completes the language list.
This does not get noticed a lot, because B and T are only different for a few languages (See Wikipedia). But it does cause problems for these languages.
For example, the language of german subtitles is never detected. This patch fixes the issue by adding the B codes to the languages. It also completes the language list.
Code: Select all
Index: source/net/filebot/Language.java
===================================================================
--- source/net/filebot/Language.java (revision 2524)
+++ source/net/filebot/Language.java (working copy)
@@ -13,13 +13,22 @@
public class Language implements Serializable {
+ // The ISO 639-1 code
private final String iso2;
+
+ // The ISO 639-2/T code
private final String iso3;
+
+ // The ISO 639-2/B code
+ private final String iso3b;
+
+ // The Language name
private final String name;
- public Language(String iso2, String iso3, String name) {
+ public Language(String iso2, String iso3, String iso3b, String name) {
this.iso2 = iso2;
this.iso3 = iso3;
+ this.iso3b = iso3b;
this.name = name;
}
@@ -35,6 +44,10 @@
return iso3;
}
+ public String getISO3b() {
+ return iso3b;
+ }
+
public String getName() {
return name;
}
@@ -50,7 +63,7 @@
@Override
public Language clone() {
- return new Language(iso2, iso3, name);
+ return new Language(iso2, iso3, iso3b, name);
}
public static final Comparator<Language> ALPHABETIC_ORDER = new Comparator<Language>() {
@@ -65,8 +78,8 @@
ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName());
try {
- String[] values = bundle.getString(code).split("\\t", 2);
- return new Language(code, values[0], values[1]);
+ String[] values = bundle.getString(code).split("\\t", 3);
+ return new Language(code, values[0], values[1], values[2]);
} catch (Exception e) {
return null;
}
@@ -86,7 +99,7 @@
if (locale != null) {
String code = locale.getLanguage();
for (Language it : availableLanguages()) {
- if (it.getISO2().equals(code) || it.getISO3().equals(code)) {
+ if (it.getISO2().equals(code) || it.getISO3().equals(code) || it.getISO3b().equals(code)) {
return it;
}
}
@@ -96,7 +109,8 @@
public static Language findLanguage(String lang) {
for (Language it : availableLanguages()) {
- if (lang.equalsIgnoreCase(it.getISO2()) || lang.equalsIgnoreCase(it.getISO3()) || lang.equalsIgnoreCase(it.getName())) {
+ if (lang.equalsIgnoreCase(it.getISO2()) || lang.equalsIgnoreCase(it.getISO3()) ||
+ lang.equalsIgnoreCase(it.getISO3b()) || lang.equalsIgnoreCase(it.getName())) {
return it;
}
}
Index: source/net/filebot/Language.properties
===================================================================
--- source/net/filebot/Language.properties (revision 2524)
+++ source/net/filebot/Language.properties (working copy)
@@ -1,45 +1,188 @@
# available languages
-languages.all = sq,ar,hy,pb,bg,ca,zh,hr,cs,da,nl,en,et,fi,fr,de,el,he,hi,hu,id,it,ja,ko,lv,lt,mk,ms,no,fa,pl,pt,ro,ru,sr,sk,sl,es,sv,th,tr,vi
+languages.all = ab,aa,af,ak,sq,am,ar,an,hy,as,av,ae,ay,az,bm,ba,eu,be,bn,bh,bi,bs,pb,br,bg,my,ca,ch,ce,ny,zh,cu,cv,kw,co,cr,hr,cs,da,dv,nl,dz,en,eo,et,ee,fo,fj,fi,fr,ff,gd,gl,lg,ka,de,el,kl,gn,gu,ht,ha,he,hz,hi,ho,hu,is,io,ig,id,ia,ie,iu,ik,ga,it,ja,jv,kn,kr,ks,kk,km,ki,rw,rn,kv,kg,ko,ku,kj,ky,lo,la,lv,li,ln,lt,lu,lb,mk,mg,ms,ml,mt,gv,mi,mr,mh,mn,na,nv,ng,ne,nd,se,no,nb,nn,ii,oc,oj,or,om,os,pi,pa,ps,fa,pl,pt,qu,ro,rm,ru,sm,sg,sa,sc,sr,sn,sd,si,sk,sl,so,nr,st,es,su,sw,ss,sv,tl,ty,tg,ta,tt,te,th,bo,ti,to,ts,tn,tr,tk,tw,uk,ur,ug,uz,ve,vi,vo,wa,cy,fy,wo,xh,yi,yo,za,zu
languages.common = en,de,fr,es,pt,ru,ja,zh
-sq: sqi Albanian
-ar: ara Arabic
-hy: hye Armenian
-pb: pob Brazilian
-bg: bul Bulgarian
-ca: cat Catalan
-zh: zho Chinese
-hr: hrv Croatian
-cs: ces Czech
-da: dan Danish
-nl: nld Dutch
-en: eng English
-et: est Estonian
-fi: fin Finnish
-fr: fra French
-de: deu German
-el: ell Greek
-he: heb Hebrew
-hi: hin Hindi
-hu: hun Hungarian
-id: ind Indonesian
-it: ita Italian
-ja: jpn Japanese
-ko: kor Korean
-lv: lav Latvian
-lt: lit Lithuanian
-mk: mkd Macedonian
-ms: msa Malay
-no: nor Norwegian
-fa: fas Persian
-pl: pol Polish
-pt: por Portuguese
-ro: ron Romanian
-ru: rus Russian
-sr: srp Serbian
-sk: slk Slovak
-sl: slv Slovenian
-es: spa Spanish
-sv: swe Swedish
-th: tha Thai
-tr: tur Turkish
-vi: vie Vietnamese
+ab: abk abk Abkhaz
+aa: aar aar Afar
+af: afr afr Afrikaans
+ak: aka aka Akan
+sq: sqi alb Albanian
+am: amh amh Amharic
+ar: ara ara Arabic
+an: arg arg Aragonese
+hy: hye arm Armenian
+as: asm asm Assamese
+av: ava ava Avaric
+ae: ave ave Avestan
+ay: aym aym Aymara
+az: aze aze Azerbaijani
+bm: bam bam Bambara
+ba: bak bak Bashkir
+eu: eus baq Basque
+be: bel bel Belarusian
+bn: ben ben Bengali
+bh: bih bih Bihari
+bi: bis bis Bislama
+bs: bos bos Bosnian
+pb: pob pob Brazilian
+br: bre bre Breton
+bg: bul bul Bulgarian
+my: mya bur Burmese
+ca: cat cat Catalan
+ch: cha cha Chamorro
+ce: che che Chechen
+ny: nya nya Chichewa
+zh: zho chi Chinese
+cu: chu chu Church Slavonic
+cv: chv chv Chuvash
+kw: cor cor Cornish
+co: cos cos Corsican
+cr: cre cre Cree
+hr: hrv hrv Croatian
+cs: ces cze Czech
+da: dan dan Danish
+dv: div div Dhivehi
+nl: nld dut Dutch
+dz: dzo dzo Dzongkha
+en: eng eng English
+eo: epo epo Esperanto
+et: est est Estonian
+ee: ewe ewe Ewe
+fo: fao fao Faroese
+fj: fij fij Fijian
+fi: fin fin Finnish
+fr: fra fre French
+ff: ful ful Fula
+gd: gla gla Gaelic
+gl: glg glg Galician
+lg: lug lug Ganda
+ka: kat geo Georgian
+de: deu ger German
+el: ell gre Greek
+kl: kal kal Greenlandic
+gn: grn grn Guaraní
+gu: guj guj Gujarati
+ht: hat hat Haitian
+ha: hau hau Hausa
+he: heb heb Hebrew
+hz: her her Herero
+hi: hin hin Hindi
+ho: hmo hmo Hiri Motu
+hu: hun hun Hungarian
+is: isl ice Icelandic
+io: ido ido Ido
+ig: ibo ibo Igbo
+id: ind ind Indonesian
+ia: ina ina Interlingua
+ie: ile ile Interlingue
+iu: iku iku Inuktitut
+ik: ipk ipk Inupiaq
+ga: gle gle Irish
+it: ita ita Italian
+ja: jpn jpn Japanese
+jv: jav jav Javanese
+kn: kan kan Kannada
+kr: kau kau Kanuri
+ks: kas kas Kashmiri
+kk: kaz kaz Kazakh
+km: khm khm Khmer
+ki: kik kik Kikuyu
+rw: kin kin Kinyarwanda
+rn: run run Kirundi
+kv: kom kom Komi
+kg: kon kon Kongo
+ko: kor kor Korean
+ku: kur kur Kurdish
+kj: kua kua Kwanyama
+ky: kir kir Kyrgyz
+lo: lao lao Lao
+la: lat lat Latin
+lv: lav lav Latvian
+li: lim lim Limburgish
+ln: lin lin Lingala
+lt: lit lit Lithuanian
+lu: lub lub Luba-Katanga
+lb: ltz ltz Luxembourgish
+mk: mkd mac Macedonian
+mg: mlg mlg Malagasy
+ms: msa may Malay
+ml: mal mal Malayalam
+mt: mlt mlt Maltese
+gv: glv glv Manx
+mi: mri mao Maori
+mr: mar mar Marathi
+mh: mah mah Marshallese
+mn: mon mon Mongolian
+na: nau nau Nauru
+nv: nav nav Navajo
+ng: ndo ndo Ndonga
+ne: nep nep Nepali
+nd: nde nde Northern Ndebele
+se: sme sme Northern Sami
+no: nor nor Norwegian
+nb: nob nob Norwegian Bokmål
+nn: nno nno Norwegian Nynorsk
+ii: iii iii Nuosu
+oc: oci oci Occitan
+oj: oji oji Ojibwe
+or: ori ori Oriya
+om: orm orm Oromo
+os: oss oss Ossetian
+pi: pli pli Pali
+pa: pan pan Panjabi
+ps: pus pus Pashto
+fa: fas per Persian
+pl: pol pol Polish
+pt: por por Portuguese
+qu: que que Quechua
+ro: ron rum Romanian
+rm: roh roh Romansh
+ru: rus rus Russian
+sm: smo smo Samoan
+sg: sag sag Sango
+sa: san san Sanskrit
+sc: srd srd Sardinian
+sr: srp srp Serbian
+sn: sna sna Shona
+sd: snd snd Sindhi
+si: sin sin Sinhala
+sk: slk slo Slovak
+sl: slv slv Slovenian
+so: som som Somali
+nr: nbl nbl Southern Ndebele
+st: sot sot Southern Sotho
+es: spa spa Spanish
+su: sun sun Sundanese
+sw: swa swa Swahili
+ss: ssw ssw Swati
+sv: swe swe Swedish
+tl: tgl tgl Tagalog
+ty: tah tah Tahitian
+tg: tgk tgk Tajik
+ta: tam tam Tamil
+tt: tat tat Tatar
+te: tel tel Telugu
+th: tha tha Thai
+bo: bod tib Tibetan
+ti: tir tir Tigrinya
+to: ton ton Tonga
+ts: tso tso Tsonga
+tn: tsn tsn Tswana
+tr: tur tur Turkish
+tk: tuk tuk Turkmen
+tw: twi twi Twi
+uk: ukr ukr Ukrainian
+ur: urd urd Urdu
+ug: uig uig Uyghur
+uz: uzb uzb Uzbek
+ve: ven ven Venda
+vi: vie vie Vietnamese
+vo: vol vol Volapük
+wa: wln wln Walloon
+cy: cym wel Welsh
+fy: fry fry Western Frisian
+wo: wol wol Wolof
+xh: xho xho Xhosa
+yi: yid yid Yiddish
+yo: yor yor Yoruba
+za: zha zha Zhuang
+zu: zul zul Zulu
\ No newline at end of file
Index: source/net/filebot/ui/LanguageComboBoxModel.java
===================================================================
--- source/net/filebot/ui/LanguageComboBoxModel.java (revision 2524)
+++ source/net/filebot/ui/LanguageComboBoxModel.java (working copy)
@@ -13,7 +13,7 @@
public class LanguageComboBoxModel extends AbstractListModel implements ComboBoxModel {
- public static final Language ALL_LANGUAGES = new Language("undefined", "undefined", "All Languages");
+ public static final Language ALL_LANGUAGES = new Language("undefined", "undefined", "undefined", "All Languages");
private Language defaultLanguage;
private Language selection;
Index: source/net/filebot/ui/subtitle/SubtitlePackage.java
===================================================================
--- source/net/filebot/ui/subtitle/SubtitlePackage.java (revision 2524)
+++ source/net/filebot/ui/subtitle/SubtitlePackage.java (working copy)
@@ -37,7 +37,7 @@
this.subtitle = subtitle;
// resolve language name
- this.language = new Language(languageCodeByName.get(subtitle.getLanguageName()), Language.getISO3LanguageCodeByName(subtitle.getLanguageName()), subtitle.getLanguageName());
+ this.language = Language.findLanguage(subtitle.getLanguageName());
// initialize download worker
download = new Download(subtitle);
@@ -204,22 +204,4 @@
return current;
}
}
-
- /**
- * Map english language name to language code.
- */
- private static final Map<String, String> languageCodeByName = mapLanguageCodeByName();
-
- private static Map<String, String> mapLanguageCodeByName() {
- ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName(), Locale.ENGLISH);
-
- Map<String, String> map = new HashMap<String, String>();
-
- for (String code : bundle.keySet()) {
- map.put(bundle.getString(code), code);
- }
-
- return map;
- }
-
}