Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 53 additions & 5 deletions Lib/encodings/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

# big5hkscs codec
'big5_hkscs' : 'big5hkscs',
'csbig5hkscs' : 'big5hkscs',
'hkscs' : 'big5hkscs',

# bz2_codec codec
Expand Down Expand Up @@ -71,6 +72,7 @@

# cp1140 codec
'1140' : 'cp1140',
'ccsid01140' : 'cp1140',
'cp01140' : 'cp1140',
'csibm01140' : 'cp1140',
'ebcdic_us_37_euro' : 'cp1140',
Expand All @@ -79,38 +81,47 @@

# cp1250 codec
'1250' : 'cp1250',
'cswindows1250' : 'cp1250',
'windows_1250' : 'cp1250',

# cp1251 codec
'1251' : 'cp1251',
'cswindows1251' : 'cp1251',
'windows_1251' : 'cp1251',

# cp1252 codec
'1252' : 'cp1252',
'cswindows1252' : 'cp1252',
'windows_1252' : 'cp1252',

# cp1253 codec
'1253' : 'cp1253',
'cswindows1253' : 'cp1253',
'windows_1253' : 'cp1253',

# cp1254 codec
'1254' : 'cp1254',
'cswindows1254' : 'cp1254',
'windows_1254' : 'cp1254',

# cp1255 codec
'1255' : 'cp1255',
'cswindows1255' : 'cp1255',
'windows_1255' : 'cp1255',

# cp1256 codec
'1256' : 'cp1256',
'cswindows1256' : 'cp1256',
'windows_1256' : 'cp1256',

# cp1257 codec
'1257' : 'cp1257',
'cswindows1257' : 'cp1257',
'windows_1257' : 'cp1257',

# cp1258 codec
'1258' : 'cp1258',
'cswindows1258' : 'cp1258',
'windows_1258' : 'cp1258',

# cp273 codec
Expand Down Expand Up @@ -163,6 +174,7 @@

# cp858 codec
'858' : 'cp858',
'ccsid00858' : 'cp858',
'cp00858' : 'cp858',
'csibm00858' : 'cp858',
'csibm858' : 'cp858',
Expand Down Expand Up @@ -214,11 +226,13 @@

# cp874 codec
'874' : 'cp874',
'cswindows874' : 'cp874',
'ms874' : 'cp874',
'windows_874' : 'cp874',

# cp932 codec
'932' : 'cp932',
'cswindows31j' : 'cp932',
'ms932' : 'cp932',
'mskanji' : 'cp932',
'ms_kanji' : 'cp932',
Expand All @@ -242,47 +256,58 @@
'eucjisx0213' : 'euc_jisx0213',

# euc_jp codec
'cseucpkdfmtjapanese' : 'euc_jp',
'eucjp' : 'euc_jp',
'extended_unix_code_packed_format_for_japanese' : 'euc_jp',
'ujis' : 'euc_jp',
'u_jis' : 'euc_jp',

# euc_kr codec
'cseuckr' : 'euc_kr',
'csksc56011987' : 'euc_kr',
'euckr' : 'euc_kr',
'iso_ir_149' : 'euc_kr',
'korean' : 'euc_kr',
'ks_c_5601_1987' : 'euc_kr',
'ks_c_5601_1989' : 'euc_kr',
'ksc5601' : 'euc_kr',
'ks_c_5601' : 'euc_kr',
'ks_c_5601_1987' : 'euc_kr',
'ksc_5601' : 'euc_kr',
'ksx1001' : 'euc_kr',
'ks_x_1001' : 'euc_kr',
'cseuckr' : 'euc_kr',

# gb18030 codec
'csgb18030' : 'gb18030',
'gb18030_2000' : 'gb18030',

# gb2312 codec
'chinese' : 'gb2312',
'csgb2312' : 'gb2312',
'csiso58gb231280' : 'gb2312',
'euc_cn' : 'gb2312',
'euccn' : 'gb2312',
'eucgb2312_cn' : 'gb2312',
'gb2312_1980' : 'gb2312',
'gb2312_80' : 'gb2312',
'gb_2312_80' : 'gb2312',
'iso_ir_58' : 'gb2312',

# gbk codec
'936' : 'gbk',
'cp936' : 'gbk',
'csgbk' : 'gbk',
'ms936' : 'gbk',
'windows_936' : 'gbk',

# hex_codec codec
'hex' : 'hex_codec',

# hp_roman8 codec
'roman8' : 'hp_roman8',
'r8' : 'hp_roman8',
'csHPRoman8' : 'hp_roman8',
'cp1051' : 'hp_roman8',
'cshproman8' : 'hp_roman8',
'ibm1051' : 'hp_roman8',
'r8' : 'hp_roman8',
'roman8' : 'hp_roman8',

# hz codec
'hzgb' : 'hz',
Expand All @@ -299,6 +324,7 @@
'iso_2022_jp_1' : 'iso2022_jp_1',

# iso2022_jp_2 codec
'csiso2022jp2' : 'iso2022_jp_2',
'iso2022jp_2' : 'iso2022_jp_2',
'iso_2022_jp_2' : 'iso2022_jp_2',

Expand Down Expand Up @@ -334,12 +360,14 @@
'iso_8859_11_2001' : 'iso8859_11',

# iso8859_13 codec
'csiso885913' : 'iso8859_13',
'iso_8859_13' : 'iso8859_13',
'l7' : 'iso8859_13',
'latin7' : 'iso8859_13',
'latin_7' : 'iso8859_13',

# iso8859_14 codec
'csiso885914' : 'iso8859_14',
'iso_8859_14' : 'iso8859_14',
'iso_8859_14_1998' : 'iso8859_14',
'iso_celtic' : 'iso8859_14',
Expand All @@ -349,12 +377,14 @@
'latin_8' : 'iso8859_14',

# iso8859_15 codec
'csiso885915' : 'iso8859_15',
'iso_8859_15' : 'iso8859_15',
'l9' : 'iso8859_15',
'latin9' : 'iso8859_15',
'latin_9' : 'iso8859_15',

# iso8859_16 codec
'csiso885916' : 'iso8859_16',
'iso_8859_16' : 'iso8859_16',
'iso_8859_16_2001' : 'iso8859_16',
'iso_ir_226' : 'iso8859_16',
Expand Down Expand Up @@ -416,6 +446,8 @@
'iso_ir_126' : 'iso8859_7',

# iso8859_8 codec
'csiso88598e' : 'iso8859_8',
'csiso88598i' : 'iso8859_8',
'csisolatinhebrew' : 'iso8859_8',
'hebrew' : 'iso8859_8',
'iso_8859_8' : 'iso8859_8',
Expand All @@ -440,7 +472,11 @@
# koi8_r codec
'cskoi8r' : 'koi8_r',

# koi8_u codec
'cskoi8u' : 'koi8_u',

# kz1048 codec
'cskz1048' : 'kz1048',
'kz_1048' : 'kz1048',
'rk1048' : 'kz1048',
'strk1048_2002' : 'kz1048',
Expand Down Expand Up @@ -480,7 +516,9 @@
'maclatin2' : 'mac_latin2',

# mac_roman codec
'csmacintosh' : 'mac_roman',
'macintosh' : 'mac_roman',
'mac' : 'mac_roman',
'macroman' : 'mac_roman',

# mac_turkish codec
Expand Down Expand Up @@ -521,40 +559,50 @@
's_jisx0213' : 'shift_jisx0213',

# tis_620 codec
'cstis620' : 'tis_620',
'tis620' : 'tis_620',
'tis_620_0' : 'tis_620',
'tis_620_2529_0' : 'tis_620',
'tis_620_2529_1' : 'tis_620',
'iso_ir_166' : 'tis_620',

# utf_16 codec
'csutf16' : 'utf_16',
'u16' : 'utf_16',
'utf16' : 'utf_16',

# utf_16_be codec
'csutf16be' : 'utf_16_be',
'unicodebigunmarked' : 'utf_16_be',
'utf_16be' : 'utf_16_be',

# utf_16_le codec
'csutf16le' : 'utf_16_le',
'unicodelittleunmarked' : 'utf_16_le',
'utf_16le' : 'utf_16_le',

# utf_32 codec
'csutf32' : 'utf_32',
'u32' : 'utf_32',
'utf32' : 'utf_32',

# utf_32_be codec
'csutf32be' : 'utf_32_be',
'utf_32be' : 'utf_32_be',

# utf_32_le codec
'csutf32le' : 'utf_32_le',
'utf_32le' : 'utf_32_le',

# utf_7 codec
'csunicode11utf7' : 'utf_7',
'csutf7' : 'utf_7',
'u7' : 'utf_7',
'utf7' : 'utf_7',
'unicode_1_1_utf_7' : 'utf_7',

# utf_8 codec
'csutf8' : 'utf_8',
'u8' : 'utf_8',
'utf' : 'utf_8',
'utf8' : 'utf_8',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for more encoding aliases `officially registered in IANA <https://www.iana.org/assignments/character-sets/character-sets.xhtml>`__.
Loading