Jump to content

Module:bcl-pron/sandbox

Hali sa Wiksyunaryo

Documentation for this module may be created at Module:bcl-pron/sandbox/doc

local export = {}
local ustring = require("mw.ustring")

-- A table for consonant and vowel mapping, with special cases handled first.
-- Note: Accented vowels are not in this table as they are handled in a separate step.
local mappings = {
	-- Digraphs must be handled before single letters.
	["ng"] = "ŋ",
	["ch"] = "t͡ʃ",
	["rr"] = "ɾ", -- For Spanish loanwords

	-- Single consonants
	["b"] = "b",
	["d"] = "d",
	["f"] = "p", -- 'f' is often pronounced as /p/ in Bikol
	["g"] = "ɡ",
	["h"] = "h",
	["j"] = "d͡ʒ", -- This is a common pronunciation for 'j' in Bikol, especially in loanwords
	["k"] = "k",
	["l"] = "l",
	["m"] = "m",
	["n"] = "n",
	["p"] = "p",
	["r"] = "ɾ",
	["s"] = "s",
	["t"] = "t",
	["v"] = "b",
	["w"] = "w",
	["y"] = "j",
	["z"] = "s",
	["ñ"] = "ɲ",

	-- Vowels
	["a"] = "a",
	["e"] = "e",
	["i"] = "i",
	["o"] = "o",
	["u"] = "u",
}

-- A table for explicit stress and glottal stops
local accent_mappings = {
	["á"] = {ipa="a", stress=true, glottal=false},
	["é"] = {ipa="e", stress=true, glottal=false},
	["í"] = {ipa="i", stress=true, glottal=false},
	["ó"] = {ipa="o", stress=true, glottal=false},
	["ú"] = {ipa="u", stress=true, glottal=false},
	
	["â"] = {ipa="a", stress=true, glottal=true},
	["ê"] = {ipa="e", stress=true, glottal=true},
	["î"] = {ipa="i", stress=true, glottal=true},
	["ô"] = {ipa="o", stress=true, glottal=true},
	["û"] = {ipa="u", stress=true, glottal=true},

	["à"] = {ipa="a", stress=false, glottal=true},
	["è"] = {ipa="e", stress=false, glottal=true},
	["ì"] = {ipa="i", stress=false, glottal=true},
	["ò"] = {ipa="o", stress=false, glottal=true},
	["ù"] = {ipa="u", stress=false, glottal=true},
}

local VOWELS = "aeiouáâàéêèíîìóôòúûù"

local function is_vowel(char)
	return ustring.find(VOWELS, char)
end

-- A robust syllabification function based on Bikol orthography.
local function syllabify_orthography(word)
	local syllables = {}
	local current_syllable = ""
	local i = 1
	while i <= ustring.len(word) do
		local char = ustring.sub(word, i, i)
		local next_char = ustring.sub(word, i + 1, i + 1)
		
		current_syllable = current_syllable .. char
		
		-- Simple syllabification rules based on V/C structure
		local is_current_vowel = is_vowel(char)
		local is_next_vowel = is_vowel(next_char)
		
		if is_current_vowel and not is_next_vowel and is_vowel(ustring.sub(word, i + 2, i + 2)) then
			-- V.CV pattern
			table.insert(syllables, current_syllable)
			current_syllable = ""
		elseif is_current_vowel and is_next_vowel then
			-- V.V pattern
			table.insert(syllables, current_syllable)
			current_syllable = ""
		end
		
		i = i + 1
	end
	table.insert(syllables, current_syllable)
	return syllables
end

function export.show(frame)
	local word = ""
	local phonetic = false
	local do_debug = false
	
	if type(frame) == 'table' and frame.args then
		word = frame.args[1] or ""
		phonetic = frame.args[2] == 'yes'
		do_debug = frame.args[4]
	else
		word = frame or ""
	end

	local orig_word = word
	word = ustring.gsub(word, "7", "ʔ") -- Replace '7' with the glottal stop character

	local orig_syllables = syllabify_orthography(word)
	local ipa_syllables = {}
	local has_explicit_stress = false

	for i, syllable in ipairs(orig_syllables) do
		local ipa_syllable = ""
		local stress_here = false
		local glottal_stop_here = false

		-- Pre-process the word to replace special cases like 'qu', 'cu' and 'gu' in Spanish loanwords.
		syllable = ustring.gsub(syllable, "qu([aeiou])", "k%1")
		syllable = ustring.gsub(syllable, "cu([aeiou])", "k%1")
		syllable = ustring.gsub(syllable, "gu([aeiou])", "ɡw%1")
		syllable = ustring.gsub(syllable, "que", "ke")
		syllable = ustring.gsub(syllable, "qui", "ki")
		syllable = ustring.gsub(syllable, "gue", "ɡe")
		syllable = ustring.gsub(syllable, "gui", "ɡi")
		
		local j = 1
		while j <= ustring.len(syllable) do
			local char = ustring.sub(syllable, j, j)
			local two_char_sub = ustring.sub(syllable, j, j + 1)
			
			-- Check for explicit accents first
			if accent_mappings[char] then
				local accent_info = accent_mappings[char]
				ipa_syllable = ipa_syllable .. accent_info.ipa
				if accent_info.stress then
					stress_here = true
					has_explicit_stress = true
				end
				if accent_info.glottal then
					glottal_stop_here = true
				end
				j = j + 1
			-- Check for digraphs and single characters
			elseif mappings[two_char_sub] then
				ipa_syllable = ipa_syllable .. mappings[two_char_sub]
				j = j + 2
			elseif mappings[char] then
				ipa_syllable = ipa_syllable .. mappings[char]
				j = j + 1
			else
				ipa_syllable = ipa_syllable .. char
				j = j + 1
			end
		end

		if stress_here then
			ipa_syllable = "ˈ" .. ipa_syllable
		end
		if glottal_stop_here then
			ipa_syllable = ipa_syllable .. "ʔ"
		end
		table.insert(ipa_syllables, ipa_syllable)
	end
	
	-- Automatically insert glottal stops at the beginning of vowel-initial words.
	if ustring.find(ipa_syllables[1], "^[aeiou]") then
		ipa_syllables[1] = "ʔ" .. ipa_syllables[1]
	end

	-- Default to penultimate stress if no explicit stress was found.
	if not has_explicit_stress and #ipa_syllables > 1 then
		ipa_syllables[#ipa_syllables - 1] = "ˈ" .. ipa_syllables[#ipa_syllables - 1]
	end
	
	local ipa_word = table.concat(ipa_syllables, ".")
	
	if phonetic then
		-- Diphthong allophony
		ipa_word = ustring.gsub(ipa_word, "aj", "aɪ̯")
		ipa_word = ustring.gsub(ipa_word, "aw", "aʊ̯")

		-- Vowel allophony
		ipa_word = ustring.gsub(ipa_word, "a", "ɐ")
		ipa_word = ustring.gsub(ipa_word, "i", "ɪ")
		ipa_word = ustring.gsub(ipa_word, "u", "ʊ")
		ipa_word = ustring.gsub(ipa_word, "o", "ɔ")
		ipa_word = ustring.gsub(ipa_word, "e", "ɛ")
	end

	-- Final cleanup.
	ipa_word = ustring.gsub(ipa_word, "([aeiou])([j])", "%1̯") -- turn semivowel to non-syllabic marker
	ipa_word = ustring.gsub(ipa_word, "([aeiou])([w])", "%1̯")

	return "[" .. ipa_word .. "]"
end

return export