匿名使用者
「模組:Language/data」:修訂間差異
無編輯摘要
imported>Taireich (建立內容為「-- This module converts a number into its written English form. -- For example, "2" becomes "two", and "79" becomes "seventy-nine". local getArgs = require('Module:Arguments').getArgs local p = {} local max = 100 -- The maximum number that can be parsed. local ones = { [0] = 'zero', [1] = 'one', [2] = 'two', [3] = 'three', [4] = 'four', [5] = 'five', [6] = 'six', [7] = 'seven', [8] = 'eight', [9] = 'nine' } local specials = { [10] = 'ten', […」的新頁面) |
imported>Taireich 無編輯摘要 |
||
第1行: | 第1行: | ||
local U = mw.ustring.char | |||
local | -- Diacritics, from the [[Combining Diacritical Marks]] block. | ||
local grave = U(0x300) | |||
local acute = U(0x301) | |||
local circumflex = U(0x302) | |||
local tilde = U(0x303) | |||
local macron = U(0x304) | |||
local breve = U(0x306) | |||
local dot = U(0x307) | |||
local diaeresis = U(0x308) | |||
local double_acute = U(0x30B) | |||
local caron = U(0x30C) | |||
local double_grave = U(0x30F) | |||
local invbreve = U(0x311) | |||
local dot_below = U(0x323) | |||
local undertie = U(0x35C) | |||
local | --[[ | ||
This is a table of Wiktionary language codes with data belonging to them. | |||
Name is the "canonical name" used on Wiktionary. | |||
Article is the Wikipedia article. | |||
Script is the ISO 15924 code. | |||
]] | |||
local data = { | |||
["languages"] = { | |||
["ab"] = { | |||
["name"] = "Abkhaz", | |||
}, | |||
["ang"] = { | |||
["name"] = "Old English", | |||
["article"] = {"Old English"}, | |||
-- Remove macrons, acutes, and overdots | |||
["replacements"] = { | |||
decompose = true, | |||
from = { "[" .. macron .. acute .. dot .. "]" }, | |||
}, | |||
}, | |||
["ar"] = { | |||
["name"] = "Arabic", | |||
["article"] = "Arabic language", | |||
["direction"] = "rtl", -- Should be in the script data module. | |||
["replacements"] = { | |||
-- ālif with wasla is replaced by ālif; | |||
[U(0x0671)] = U(0x0627), | |||
-- taṭwīl, fatḥatan, ḍammatan, kasratan, | |||
-- fatḥa, ḍamma, kasra, | |||
-- shadda, sukūn, and superscript (dagger) ālif are removed. | |||
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) | |||
..U(0x064E)..U(0x064F)..U(0x0650) | |||
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", | |||
}, | |||
}, | |||
["ara"] = { | |||
["name"] = "Arabic", | |||
["article"] = "Arabic language", | |||
["direction"] = "rtl", -- Should be in the script data module. | |||
["replacements"] = { | |||
-- ālif with wasla is replaced by ālif; | |||
[U(0x0671)] = U(0x0627), | |||
-- taṭwīl, fatḥatan, ḍammatan, kasratan, | |||
-- fatḥa, ḍamma, kasra, | |||
-- shadda, sukūn, and superscript (dagger) ālif are removed. | |||
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) | |||
..U(0x064E)..U(0x064F)..U(0x0650) | |||
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", | |||
}, | |||
}, | |||
["arb"] = { | |||
["name"] = "Modern Standard Arabic", | |||
["article"] = "Modern Standard Arabic", | |||
["direction"] = "rtl", -- Should be in the script data module. | |||
["replacements"] = { | |||
-- ālif with wasla is replaced by ālif; | |||
[U(0x0671)] = U(0x0627), | |||
-- taṭwīl, fatḥatan, ḍammatan, kasratan, | |||
-- fatḥa, ḍamma, kasra, | |||
-- shadda, sukūn, and superscript (dagger) ālif are removed. | |||
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) | |||
..U(0x064E)..U(0x064F)..U(0x0650) | |||
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", | |||
}, | |||
}, | |||
["apc"] = { | |||
["name"] = "North Levantine Arabic", | |||
["article"] = "North Levantine Arabic", | |||
["direction"] = "rtl", -- Should be in the script data module. | |||
["replacements"] = { | |||
-- ālif with wasla is replaced by ālif; | |||
[U(0x0671)] = U(0x0627), | |||
-- taṭwīl, fatḥatan, ḍammatan, kasratan, | |||
-- fatḥa, ḍamma, kasra, | |||
-- shadda, sukūn, and superscript (dagger) ālif are removed. | |||
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) | |||
..U(0x064E)..U(0x064F)..U(0x0650) | |||
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", | |||
}, | |||
}, | |||
["ajp"] = { | |||
["name"] = "South Levantine Arabic", | |||
["article"] = "South Levantine Arabic", | |||
["direction"] = "rtl", -- Should be in the script data module. | |||
["replacements"] = { | |||
-- ālif with wasla is replaced by ālif; | |||
[U(0x0671)] = U(0x0627), | |||
-- taṭwīl, fatḥatan, ḍammatan, kasratan, | |||
-- fatḥa, ḍamma, kasra, | |||
-- shadda, sukūn, and superscript (dagger) ālif are removed. | |||
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) | |||
..U(0x064E)..U(0x064F)..U(0x0650) | |||
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", | |||
}, | |||
}, | |||
["av"] = { | |||
["name"] = "Avar" | |||
}, | |||
["be"] = { | |||
["article"] = "Belarusian language", | |||
["replacements"] = { [acute] = "", }, | |||
}, | |||
["bn"] = { | |||
["name"] = "Bengali", | |||
["article"] = "Bengali language", | |||
}, | |||
["bua"] = { | |||
["name"] = "Buryat", | |||
}, | |||
["cel-pro"] = { -- Incorrect tag | |||
["name"] = "Proto-Celtic", | |||
["Wikipedia_code"] = "cel-x-proto", | |||
}, | |||
["cel-x-proto"] = { | |||
["name"] = "Proto-Celtic", | |||
}, | |||
["cel-bry-pro"] = { -- Incorrect tag | |||
["name"] = "Proto-Brythonic", | |||
["article"] = "Common Brittonic", | |||
["type"] = "reconstructed", | |||
}, | |||
["cu"] = { | |||
["name"] = "Old Church Slavonic", | |||
["article"] = "Old Church Slavonic", | |||
}, | |||
["de"] = { | |||
["name"] = "German", | |||
["article"] = "German language", | |||
}, | |||
["en"] = { | |||
["name"] = "English", | |||
["article"] = "English language", | |||
}, | |||
["es"] = { | |||
["name"] = "Spanish", | |||
["article"] = "Spanish language", | |||
}, | |||
["egy"] = { | |||
["name"] = "Egyptian", | |||
}, | |||
["fr"] = { | |||
["name"] = "French", | |||
["article"] = "French language", | |||
}, | |||
["frm"] = { | |||
["name"] = "Middle French", | |||
["article"] = "Middle French", | |||
}, | |||
["frp"] = { | |||
["name"] = "Franco-Provençal", | |||
}, | |||
["ff"] = { | |||
["name"] = "Fula", | |||
}, | |||
["gem-pro"] = { -- Incorrect tag | |||
["name"] = "Proto-Germanic", | |||
["article"] = "Proto-Germanic language", | |||
["type"] = "reconstructed", | |||
["replacements"] = {}, | |||
["Wikipedia_code"] = "gem-x-proto", | |||
}, | |||
["gem-x-proto"] = { | |||
["name"] = "Proto-Germanic", | |||
["article"] = "Proto-Germanic language", | |||
["type"] = "reconstructed", | |||
["replacements"] = {}, | |||
}, | |||
["gml"] = { | |||
["name"] = "Middle Low German", | |||
}, | |||
["gmw-ecg"] = { | |||
["name"] = "East Central German", | |||
}, | |||
["gmw-x-proto"] = { | |||
["name"] = "Proto-West Germanic", | |||
["article"] = "Proto-West Germanic language", | |||
["type"] = "reconstructed", | |||
["replacements"] = {}, | |||
}, | |||
["gmq-x-gut"] = { | |||
["name"] = "Gutnish", | |||
["article"] = "Gutnish", | |||
}, | |||
["goh"] = { | |||
["replacements"] = { | |||
decompose = true, | |||
from = { | |||
"[" .. macron .. circumflex .. diaeresis .. "]", | |||
}, | |||
}, | |||
}, | |||
["got"] = { | |||
["name"] = "Gothic", | |||
["article"] = "Gothic language", | |||
["replacements"] = { | |||
-- Latin to Gothic since people will not want to have to copy | |||
-- and paste Gothic letters in | |||
["[AÁaáĀā]"] = "𐌰", | |||
["[Bb]"] = "𐌱", | |||
["[Gg]"] = "𐌲", | |||
["[Dd]"] = "𐌳", | |||
["[EeĒē]"] = "𐌴", | |||
["[Qq]"] = "𐌵", | |||
["[Zz]"] = "𐌶", | |||
["[Hh]"] = "𐌷", | |||
["[Þþ]"] = "𐌸", | |||
["[IiÍí]"] = "𐌹", | |||
["[Kk]"] = "𐌺", | |||
["[Ll]"] = "𐌻", | |||
["[Mm]"] = "𐌼", | |||
["[Nn]"] = "𐌽", | |||
["[Jj]"] = "𐌾", | |||
["[UuÚúŪū]"] = "𐌿", | |||
["[Pp]"] = "𐍀", | |||
["[Rr]"] = "𐍂", | |||
["[Ss]"] = "𐍃", | |||
["[Tt]"] = "𐍄", | |||
["[WwYy]"] = "𐍅", | |||
["[Ff]"] = "𐍆", | |||
["[Xx]"] = "𐍇", | |||
["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted | |||
["[OoŌō]"] = "𐍉", | |||
}, | |||
}, | |||
["gsw"] = { | |||
["name"] = "Alemannic German", | |||
}, | |||
["grc"] = { | |||
["name"] = "Ancient Greek", | |||
["article"] = "Ancient Greek", | |||
["replacements"] = { | |||
decompose = true, | |||
from = { | |||
-- Replace variant letterforms with standard ones. | |||
"ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ", | |||
-- Remove macrons and breves. | |||
"[" .. macron .. breve .. undertie .. "]" | |||
}, | |||
to = { | |||
"β", "ε", "θ", "κ", "ρ", "σ", "φ", | |||
} | |||
}, | |||
}, | |||
["grk-pro"] = { -- Incorrect tag | |||
["name"] = "Proto-Hellenic", | |||
["Wikipedia_name"] = "Proto-Greek", | |||
["article"] = "Proto-Greek language", | |||
["type"] = "reconstructed", | |||
["replacements"] = {}, | |||
["Wikipedia_code"] = "gem-x-proto", | |||
}, | |||
["grk-x-proto"] = { | |||
["name"] = "Proto-Hellenic", | |||
["Wikipedia_name"] = "Proto-Greek", | |||
["article"] = "Proto-Greek language", | |||
["type"] = "reconstructed", | |||
["replacements"] = {}, | |||
}, | |||
["grt"] = { | |||
["name"] = "Garo", | |||
}, | |||
["ha"] = { | |||
["name"] = "Hausa", | |||
-- remove tilde, grave, acute, macron, circumflex | |||
["replacements"] = { | |||
decompose = true, | |||
from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" }, | |||
}, | |||
}, | |||
["hi"] = { | |||
["name"] = "Hindi", | |||
["article"] = "Hindi", | |||
}, | |||
["ine-bsl-pro"] = { | |||
["name"] = "Proto-Balto-Slavic", | |||
["article"] = "Proto-Balto-Slavic language", | |||
["type"] = "reconstructed", | |||
}, | |||
["ine-pro"] = { -- Incorrect tag | |||
["name"] = "Proto-Indo-European", | |||
["article"] = "Proto-Indo-European language", | |||
["type"] = "reconstructed", | |||
["replacements"] = {}, | |||
["Wikipedia_code"] = "ine-x-proto", | |||
}, | |||
["ine-x-proto"] = { | |||
["name"] = "Proto-Indo-European", | |||
["article"] = "Proto-Indo-European language", | |||
["type"] = "reconstructed", | |||
["replacements"] = {}, | |||
}, | |||
["ja"] = { | |||
["name"] = "Japanese", | |||
["article"] = "Japanese language", | |||
}, | |||
["jbo"] = { -- Lojban | |||
["type"] = "appendix", | |||
}, | |||
["la"] = { | |||
["name"] = "Latin", | |||
["article"] = "Latin", | |||
["replacements"] = { | |||
decompose = true, | |||
from = { "[" .. macron .. breve .. diaeresis .. "]" }, | |||
}, | |||
}, | |||
["lt"] = { | |||
["name"] = "Lithuanian", | |||
-- remove acute, tilde, grave | |||
["replacements"] = { | |||
decompose = true, | |||
from = { "[" .. acute .. tilde .. grave .. "]" }, | |||
}, | |||
}, | |||
["moe"] = { | |||
["name"] = "Cree", | |||
}, | |||
["mul"] = { | |||
["name"] = "Translingual", | |||
["article"] = "", | |||
}, | |||
["nci"] = { | |||
["name"] = "Classical Nahuatl", | |||
["article"] = "Classical Nahuatl", | |||
-- Remove macrons, acutes, circumflexes and graves | |||
["replacements"] = { | |||
decompose = true, | |||
-- Remove macrons, acutes, circumflexes, graves, and saltillo; | |||
-- see [[Saltillo (linguistics)]]. | |||
from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" }, | |||
}, | |||
}, | |||
["nds-de"] = { | |||
["name"] = "German Low German", | |||
}, | |||
["odt"] = { | |||
["name"] = "Old Dutch", | |||
}, | |||
["oge"] = { | |||
["name"] = "Old Georgian", | |||
}, | |||
["oj"] = { | |||
["name"] = "Ojibwe", | |||
}, | |||
["orv"] = { | |||
["name"] = "Old East Slavic", | |||
["article"] = "Old East Slavic", | |||
["replacements"] = { | |||
[U(0x484)] = "", | |||
}, | |||
}, | |||
["osx"] = { | |||
["name"] = "Old Saxon", | |||
}, | |||
["pt"] = { | |||
["name"] = "Portuguese", | |||
["article"] = "Portuguese language", | |||
-- ["scripts"] = { "Latn" }, | |||
}, | |||
["pa"] = { | |||
["name"] = "Punjabi", | |||
["article"] = "Punjabi language", | |||
}, | |||
["pis"] = { | |||
["name"] = "Pijin", | |||
["article"] = "Pijin language", | |||
}, | |||
["poz-x-poly-proto"] = { | |||
["name"] = "Proto-Nuclear Polynesian", | |||
["article"] = "Proto-Polynesian language", | |||
["type"] = "reconstructed", | |||
}, | |||
["rap"] = { | |||
["name"] = "Rapa Nui", | |||
["article"] = "Rapa Nui language", | |||
}, | |||
["ru"] = { | |||
["name"] = "Russian", | |||
["article"] = "Russian language", | |||
["replacements"] = { [acute] = "", }, | |||
}, | |||
["rw"] = { | |||
["name"] = "Rwanda-Rundi", | |||
}, | |||
["se"] = { | |||
["replacements"] = { | |||
["([đflmnŋrsšŧv])'%1"] = "%1%1", | |||
}, | |||
}, | |||
["sem-pro"] = { | |||
["name"] = "Proto-Semitic", | |||
["article"] = "Proto-Semitic", | |||
["type"] = "reconstructed", | |||
}, | |||
["sh"] = { | |||
["article"] = "Serbo-Croatian language", | |||
["replacements"] = { | |||
decompose = true, | |||
from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave | |||
.. grave .. invbreve .. acute .. macron .. tilde .. "]" }, | |||
to = { "%1" }, | |||
}, | |||
}, | |||
["sl"] = { | |||
["name"] = "Slovene", | |||
["replacements"] = { | |||
decompose = true, | |||
-- remove tonal orthography | |||
from = {"ł", "[" .. grave .. acute .. macron .. double_grave .. invbreve .. circumflex .. dot_below .. "]"}, | |||
to = {"l"}, | |||
}, | |||
}, | |||
["sla-pro"] = { | |||
["name"] = "Proto-Slavic", -- also Common Slavic | |||
["type"] = "reconstructed", | |||
["replacements"] = { | |||
["[ÀÁÃĀȀȂ]"] = "A", | |||
["[àáãāȁȃ]"] = "a", | |||
["[ÈÉẼĒȄȆ]"] = "E", | |||
["[èéẽēȅȇ]"] = "e", | |||
["[ÌÍĨĪȈȊ]"] = "I", | |||
["[ìíĩīȉȋ]"] = "i", | |||
["[ÒÓÕŌȌȎŐ]"] = "O", | |||
["[òóõōȍȏő]"] = "o", | |||
["[ÙÚŨŪȔȖŰ]"] = "U", | |||
["[ùúũūȕȗű]"] = "u", | |||
["[ỲÝỸȲ]"] = "Y", | |||
["[ỳýỹȳ]"] = "y", | |||
["Ǭ"] = "Ǫ", | |||
["ǭ"] = "ǫ", | |||
["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "", | |||
["ĭ"] = "ь", | |||
["ŭ"] = "ъ", | |||
}, | |||
}, | |||
["uk"] = { | |||
["article"] = "Ukrainian language", | |||
["replacements"] = { [acute] = "", } | |||
}, | |||
["ur"] = { | |||
["name"] = "Urdu", | |||
["article"] = "Urdu", | |||
}, | |||
["zh"] = { | |||
["name"] = "Chinese", | |||
["article"] = "Chinese language", | |||
-- ["scripts"] = { "Hani" }, | |||
}, | |||
["xcl"] = { | |||
["name"] = "Old Armenian", | |||
["article"] = "Classical Armenian", | |||
["replacements"] = { | |||
["[՞՜՛՟]"] = "", | |||
["և"] = "եւ", | |||
}, | |||
}, | |||
["xgf"] = { | |||
["name"] = "Tongva", -- not ISO name "Gabrielino-Fernandeño" | |||
["article"] = "Tongva language", | |||
["replacements"] = { | |||
["['`ʔ]"] = "ʼ", | |||
}, | |||
}, | |||
["xxt"] = { | |||
["name"] = "Tambora", | |||
["article"] = "Tambora language", | |||
}, | |||
["xvn"] = { | |||
["name"] = "Vandalic", | |||
["article"] = "Vandalic language", | |||
}, | |||
["yua"] = { | |||
["name"] = "Yucatec Maya", | |||
["article"] = "Yucatec Maya language", | |||
}, | |||
}, | |||
-- Here, keys (for example, "gem") are Wikipedia language codes used in | |||
-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary | |||
-- code. | |||
-- Subtags are not currently supported. | |||
["redirects"] = { | |||
["aae"] = "sq", | |||
["aiq"] = "fa", | |||
["aln"] = "sq", | |||
["als"] = "sq", | |||
["azb"] = "az", | |||
["azj"] = "az", | |||
["bgn"] = "bal", | |||
["bs"] = "sh", | |||
["bxr"] = "bua", | |||
["ciw"] = "oj", | |||
["cnr"] = "sh", | |||
["fil"] = "tl", | |||
["fuf"] = "ff", | |||
["gem"] = "gem-pro", -- Not correct, but is commonly used. | |||
["hak"] = "zh", | |||
["hbo"] = "he", | |||
["hr"] = "sh", | |||
["ine"] = "ine-pro", -- Not correct, but might be commonly used. | |||
["kjv"] = "sh", | |||
["nan"] = "zh", | |||
["prs"] = "fa", | |||
["rn"] = "rw", | |||
["sli"] = "gmw-ecg", | |||
["sr"] = "sh", | |||
["src"] = "sc", | |||
["sro"] = "sc", | |||
["tw"] = "ak", | |||
["wae"] = "gsw", | |||
["wep"] = "nds-de", | |||
["yue"] = "zh", | |||
["xno"] = "fro", | |||
}, | |||
} | } | ||
return data | |||