Modul:zh-translit

Dokumentasi untuk modul ini dapat dibuat di Modul:zh-translit/doc
local export = {}

local function fail(lang, request)
	local langObj, req, cat = require("Module:languages").getByCode(lang)
	if request then
		cat = {"Requests for transliteration of " .. langObj:getCanonicalName() .. " terms"}
	end
	return nil, true, cat
end

-- These need to be pattern-escaped (e.g. "-" as "%-").
local zhpron_lect_code = {
	["cdo"] = "md",			-- Min Dong
	["cjy"] = "j",			-- Jin
	["cmn"] = "m",			-- Mandarin
	["cmn-sic"] = "m%-s",	-- Sichuanese
	["gan"] = "g",			-- Gan
	["hak"] = "h",			-- Hakka
	["hsn"] = "x",			-- Xiang
	["ltc"] = "mc",			-- Middle Chinese
	["mnp"] = "mb",			-- Min Bei
	["nan"] = "mn",			-- Min Nan
	["och"] = "oc",			-- Old Chinese
	["wuu"] = "w",			-- Wu
	["yue"] = "c",			-- Cantonese
	["zh"] = "m",			-- Chinese (general); uses Mandarin pinyin
	["zhx-tai"] = "c%-t",	-- Taishanese
	["zhx-teo"] = "mn%-t",	-- Teochew
}

function export.tr(text, lang, sc)
	if (not text) or text == "" then
		return text
	end
	
	if not zhpron_lect_code[lang] then
		lang = require("Module:languages").getByCode(lang, nil, true):getNonEtymologicalCode()
	end
	
	local content, tr = mw.title.new(text)
	content = content and content:getContent()
	
	local function process_content(content)
		if lang == "ltc" and lang == "och" then
			return content
		elseif not content then
			return false
		else
			-- Remove HTML comments, convert template brackets to individual characters and remove any templates nested within {{zh-pron}}.
			content = content
				:gsub("<!%-%-", "\1")
				:gsub("%-%->", "\2")
				:gsub("\1[^\2]*%f[%z\2]\2?", "")
				:gsub("\2", "-->")
				:gsub("{{", "\1")
				:gsub("}}", "\2")
			local subs
			repeat
				content, subs = content:gsub("(\1zh%-pron[^\2]*)\1[^\2]*\2", "%1")
			until subs == 0
			if lang == "cmn" or lang == "wuu" or lang == "yue" or lang == "zh" or lang == "zhx-tai" then
				return content:gsub(",([^ ])", ";%1")
			else
				return content:gsub("/([^ ])", ";%1")
			end
		end
	end
	
	content = process_content(content)
	if content == false then
		return fail(lang)
	end
	
	if content then
		if lang == "ltc" or lang == "och" then
			local pron
			for pron_sect in content:gmatch("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=[^|\2\n]-([^=|\2\n]+)") do
				if pron and pron ~= pron_sect then
					return fail(lang)
				end
				pron = pron_sect
			end
			tr = pron
		else
			tr = content:match("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=[^|\2\n]-;*([^;=|\2\n]+){default}")
			
			if not tr then
				local function get_prons(content)
					local lower = mw.ustring.lower
					local prons, ret
					for pron_sect in content:gmatch("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=([^\2|\n]*[^%s\2|\n][^\2|\n]*)") do
						
						pron_sect = mw.text.trim(pron_sect)
						prons = mw.text.split(pron_sect, "%s*;%s*")
						for i, pron in ipairs(prons) do
							if pron:match("=") then
								prons[i] = nil
							end
						end
						
						if #prons > 2 then
							return false
						elseif #prons > 1 then
							if prons[1]:gsub("^.", lower) == prons[2]:gsub("^.", lower) then
								prons[1] = prons[1]:gsub("^.", lower)
							else
								return false
							end
						elseif ret and prons[1] ~= ret then
							if prons[1]:gsub("^.", lower) == ret:gsub("^.", lower) then
								prons[1] = prons[1]:gsub("^.", lower)
							else
								return false
							end
						end
						
						ret = prons[1]
					end
					return ret
				end
				tr = get_prons(content)
				if tr == false then
					return fail(lang, true)
				elseif not tr then
					local pages, seen_pages = {}, {}
					local function get_sees(content)
						for pron_see in content:gmatch("\1zh%-see|[^\2]*\2") do
							local page = pron_see:match("|1=([^|\2]+)[|\2]") or pron_see:match("\1zh%-see|([^|\2]+)[|\2]")
							-- If we've seen this page before, stop.
							if not seen_pages[page] then
								seen_pages[page] = true
								-- Otherwise, get the page content and repeat.
								page = mw.title.new(page)
								page = page and process_content(page:getContent())
								if page:match("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=[^\2|\n]*[^%s\2|\n][^\2|\n]*") then
									table.insert(pages, page)
								elseif page then
									get_sees(page)
								end
							end
						end
					end
					
					get_sees(content)
					content = table.concat(pages)
					tr = get_prons(content)
					if tr == false then
						return fail(lang, true)
					end
				end
				if not tr then
					return fail(lang)
				end
			end
		end
	end
	
	if lang == "cmn" or lang == "zh" then
		local Hani = require("Module:scripts").getByCode("Hani"):getCharacters()
		tr = tr:gsub("#", "")
		if mw.ustring.match(tr, "[" .. Hani .. "]") then
			local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
			local tag = mw.loadData("Module:zh/data/cmn-tag").MT
			tr = tr
				:gsub("一", "yī")
				:gsub("不", "bù")
				:gsub(UTF8_char, function(c1)
					return tag[c1] and tag[c1][1] or c1
				end)
		end
	elseif lang == "cmn-sic" then
		tr = tr
			:gsub("([%d-])(%a)", "%1 %2")
			:gsub("[%d-]+", "<sup>%0</sup>")
	elseif lang == "hak" then
		-- TODO
	elseif lang == "ltc" or lang == "och" then
		if tr == "n" then
			return fail(lang)
		end
		local index = {}
		if tr then
			if lang == "ltc" then
				index = mw.text.split(tr, ",")
			else
				index = mw.text.split(tr, ";")
			end
		end
		for i = 1, mw.ustring.len(text) do
			local module_type = lang .. "-pron"
			if lang == "och" then
				module_type = module_type .. "-ZS"
			end
			
			local success, data_module = pcall(require, "Module:zh/data/" .. module_type .. "/" .. mw.ustring.sub(text, i, i))
			
			if not success or (((not index[i]) or index[i] == "y") and #data_module > 1) then
				return fail(lang)
			end
			
			if index[i] == "y" then
				index[i] = 1
			elseif index[i] then
				index[i] = tonumber(index[i])
			end
			
			index[i] = index[i] and data_module[index[i]] or data_module[1]
			
			if lang == "ltc" then
				local data = mw.loadData("Module:ltc-pron/data")
				local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i])
				index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone
			else
				index[i] = index[i][6]
			end
		end
		tr = table.concat(index, " ")
		if lang == "och" then
			tr = "*" .. tr
		end
	elseif lang == "nan" then
		-- TODO
	elseif lang == "yue" then
		tr = tr:gsub("[%d-]+", "<sup>%0</sup>")
	elseif lang == "zhx-tai" then
		tr = tr:gsub("[%d*]+%-?[%d*]*", "<sup>%0</sup>")
	elseif lang == "zhx-teo" then
		-- TODO
	else
		tr = require("Module:" .. lang .. "-pron").rom(tr)
	end
	
	-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
	return tr .. " "
end

return export