Module:Str find word

require('strict')
local p = {}
local getArgs = require('Module:Arguments').getArgs
local str = require('Module:String')
local yesno = require('Module:Yesno')
local defaultSep = ','
local iMaxWords = 16
local warningIMaxWordsReached = nil
local xpLitWordCount = 0
local report -- to be initinated when explain needed

-- Initialise the /report subpage.
-- only invoked when 'explain' asked
local function initReport()
	report = require('Module:Str find word/report')
end

-- Turn "A" into "A" etc. asap
-- and reduce multi-spaces (including nbsp etc.) into single space
local function decodeUnicode(str)
	return mw.ustring.gsub(mw.text.decode(str), '%s+', ' ')
end

-- %-Escape any word (character string) before feeding it into a string pattern function
-- all punctuation (%p) will be %-escaped
local function escape_word(word)
	return str._escapePattern(word)
end

-- Reads and parses a word list and returns a table with words (simple array)
-- words list can be: source, andwords-to-check, orwords-to-check
-- step 1: when case-insensitive, turn string into lowercase
-- step 2: read & remove Literals ("..")
-- step 3: read comma-separated words
-- step 4: when booleans=T, change boolean words into true/false (module:yesno rules)
--		all words returned are trimmed, TODO and all ws into single-plainspace?
--		only T/F words are edited, other words remain, untouched
-- return the table (a straight array)
local function buildWordTable(tArgs, sWordlist)
local wordTable = {}
local hitWord	= ''
local hitCount	= 0
	if sWordlist == '' then return wordTable end

	-- Step 1: case-sensitive
	if yesno(tArgs.case, true) == false then
		sWordlist = string.lower(sWordlist)
	end

	-- Step 2: read "literals", 
	-- then remove them from the string:
	-- replaced by single comma; idle & keeps word separation
	--- if yesno(tArgs.literals, false) then
	if false then
		local _, sCount
		_, sCount = mw.ustring.gsub(sWordlist, '"', '')
		if sCount > 1 then
			local litWord = ''
			local i, j

			while sCount > 1 do -- could do here: only when even?
				i = string.find(sWordlist, '%"', 1, false)
				j = string.find(sWordlist, '%"', i+1, false)
				litWord = mw.text.trim(string.sub(sWordlist, i+1, j-1))
				if #litWord > 0 then -- not an empty string or spaces only
					xpLitWordCount = xpLitWordCount + 1
					table.insert(wordTable, litWord)
				end
				-- remove from source, and do next gsub search:
				sWordlist = string.gsub(sWordlist, '%"%s*'
												.. escape_word(litWord) 
												.. '%s*%"', ',')
				_, sCount = mw.ustring.gsub(sWordlist, '"', '')
			end
		end
	end
	
	-- Step 3: parse comma-delimited words
	hitCount = 0
	sWordlist = tArgs.sep .. sWordlist .. tArgs.sep
	local eSep
	eSep = escape_word(tArgs.sep)
	local patstring = '%f[^' .. eSep .. '][^' .. eSep .. ']+%f[' .. eSep .. ']'
	if yesno(tArgs.explain, false) then
		report.xpMessage('1.eSep: ' .. eSep) -- dev
		report.xpMessage('2.pattern: ' .. patstring) -- dev
	end
 	while hitCount <= iMaxWords do
		hitCount = hitCount + 1
		
		hitWord = str._match(sWordlist, patstring, 1, hitCount, false, tArgs.sep)
		hitWord = mw.text.trim(hitWord)
		if hitWord == tArgs.sep then
			-- no more words found in the string
			break
		elseif hitWord ~= '' then
			table.insert(wordTable, hitWord)
		end
 	end
 	if hitCount > iMaxWords then 
	 	warningIMaxWordsReached = 'Max number of words (' .. tostring(iMaxWords) .. ') reached. Extra words are ignored.'
	 								.. ' (' .. mw.ustring.sub(mw.text.trim(sWordlist), 1, 90) .. '&nbsp;...). '
 	end

	-- Step 4: when read booleans, converse words to true/false
	-- todo: check parameter here not elsewhere
	if tArgs.booleans then -- TODO if Yesno(tArgs.booleans) ... 
		local sBool
		for i, v in ipairs(wordTable) do
			sBool = yesno(v)
			if sBool ~= nil then
				wordTable[i] = tostring(sBool)
			end
		end
	end

	return wordTable
end

-- Check whether a single word is in a table (a simple array of words)
-- returns hitword or nil
local function findWordInTable(sourceWordTable, word)
	local bHit = false
	for i, v in ipairs(sourceWordTable) do
		if v == word then
			bHit = true
			break
		end
	end
	if bHit then
		return word
	else
		return nil
	end
end

-- AND-logic with andWordTable words: ALL words must be found
-- returns {T/F, hittable}
--		T when *all* AND words are found
--		hittable with all hit words
-- note 1: when F, the hittable still contains the words that were found
-- note 2: empty AND-wordlist => True by logic (because: not falsified)
local function checkANDwords(sourceWordTable, andWordTable)
local result1
local bAND
local tHits

	bAND = true
	tHits = {}
	result1 = nil
	if #andWordTable > 0 then
		for i, word in ipairs(andWordTable) do
			result1 = findWordInTable(sourceWordTable, word) or nil
			if result1 == nil then
				bAND = false -- Falsified!
				-- could break after this logically but 
				-- continue to complete the table (bAND remains false)
			else
				table.insert(tHits, result1)
			end
		end
	else
		bAND = true
	end

	return bAND, tHits
end

-- OR-logic with orWordTable words: at least one word must be found
-- returns {T/F, hittable}
--		True when at least one OR word is found
--		hittable has all hit words
-- note 1: empty OR-wordlist => True by logic (because: not falsified)
-- note 2: while just one hitword is a True result, the hittable contains all words found
local function checkORwords(sourceWordTable, orWordTable)
local result1
local bOR
local tHits

	bOR = false
	tHits = {}
	result1 = nil
	if #orWordTable > 0 then
		for i, word in ipairs(orWordTable) do
			result1 = findWordInTable(sourceWordTable, word) or nil
			if result1 == nil then
				-- this one is false; bOR unchanged; do next
			else
				bOR = true -- Confirmed!
				table.insert(tHits, result1)
				-- could break here logically, but complete the check
			end
		end
	else
		bOR = true
	end

	return bOR, tHits
end

-- Determine the requested return value (string).
-- sYeslist is the _main return value (logically defined value)
-- this function applies tArgs.yes / tArgs.no return value
-- note: yes='' implies: blank return value
-- note: no parameter yes= (that is, yes=nil) implies: by default, return the sYeslist
local function yesnoReturnstring(tArgs, sYeslist)
	if sYeslist == '' then -- False 
		return tArgs.no or ''
	else -- True
		if tArgs.yes == nil then
			return sYeslist
		else -- some |yes= value is entered, could be ''
			return tArgs.yes
		end
	end
end

local function isPreview()
local ifPreview = require('Module:If preview')
	return not (ifPreview._warning( {'is_preview'} ) == '')
end

-- Explain options (=report info), interprets parameter explain=
-- returns true/false/'testcases'
-- explain=true => show report in Preview
-- explain=testcases => WHEN in ns: template: or user: AND subpage = '/testcases' THEN show permanently
local function checkExplain(tArgs)
	return false  -- never. 22Mar2023 checkExplain(newArgs)
end

-- ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== =====
-- _main function: check for presence of words in source string
-- Checks and returns:
--		when T: the string of all hitwords (default), or the |yes=... input
--		when F: empty string '' (default), or the |no=... input
-- steps:
-- 1. input word strings are prepared (parsed into an array of words)
-- 2. words checks are made (applying AND-logic, OR-logic)
-- 3. final conclusion drawn (T/F)
-- 4. optionally, the preview report is prepared (debug, feedback)
-- 5. based on T or F status, the return value (string) is established and returned
-- note 1: each return value (yes=.., no=..) can be '' (nulstring)
function p._main(tArgs)
local sourceWordTable	= {}
local andWordTable		= {}
local orWordTable		= {}
local tANDhits
local tORhits
-- logical finding:
local bANDresult	= false
local bORresult 	= false
local resultALL 	= false
local sYeslist		= ''

	sourceWordTable	= buildWordTable(tArgs, tArgs.source)
	andWordTable	= buildWordTable(tArgs, tArgs.andString)
	orWordTable		= buildWordTable(tArgs, tArgs.orString)

	if (#sourceWordTable == 0) or (#andWordTable + #orWordTable == 0) then
		-- No words to check
		resultALL = false
		if yesno(tArgs.explain, false) then
			report.xpNoWords(tArgs, sourceWordTable, andWordTable, orWordTable)
		end
	else
		bANDresult, tANDhits	= checkANDwords(sourceWordTable, andWordTable)
		bORresult, tORhits		= checkORwords(sourceWordTable, orWordTable)
		resultALL = (bANDresult) and (bORresult)
	end

	sYeslist = ''
	if resultALL then
		-- concat the sYeslist (= all hit words; from 2 tables)
		if bANDresult then
			sYeslist = sYeslist .. table.concat(tANDhits, tArgs.sep)
		end

		if #tORhits > 0 then
			if #tANDhits > 0 then
				sYeslist = sYeslist .. tArgs.sep
			end
			sYeslist = sYeslist .. table.concat(tORhits, tArgs.sep)
		end
	end
	
	if yesno(tArgs.explain, false) then
		if tArgs.yes ~= nil then
			if (tArgs.yes == '') and (tArgs.no == '') then
				report.xpYesNoBothBlank()
			end
		end
		if warningIMaxWordsReached ~= nil then
			report.xpMessage(warningIMaxWordsReached)
		end
		report.xpBuildReport(tArgs, sourceWordTable, 
						bANDresult, andWordTable, tANDhits,
						bORresult, orWordTable, tORhits,
						sYeslist, xpLitWordCount)
	end
	return yesnoReturnstring(tArgs, sYeslist)
end

-- set wordt separator 
local function setSep(sSep)
	if sSep == nil then return defaultSep end
	local msg = ''
	-- todo what with {{!}}
	local newSep = defaultSep

	newSep  = sSep
	sSep = decodeUnicode(sSep)
	if string.match(sSep, '[%s%w%d]') ~= nil then -- not ok
		msg = 'Irregular characters in sep: ' .. sSep
		newSep = defaultSep
	end
	newSep = string.sub(sSep, 1, 1)
	if newSep == '' then --- ???
		newSep = defaultSep
	end
	
	return newSep
end

local function concatAndLists(s1, s2, newSep)
	local tLists = {} -- working table: both s1 and s2 to concat
	table.insert(tLists, s1)
	table.insert(tLists, s2)
	return table.concat(tLists, newSep)
end

local function parseArgs(origArgs)
local newArgs = {}
	newArgs['sep']		= setSep(origArgs['sep']) -- do first, needed below
	newArgs['source']	= decodeUnicode(origArgs['s'] or origArgs['source'] or '')
	newArgs['andString'] = decodeUnicode(concatAndLists(
									origArgs['w'] or origArgs['word'] or nil,
									origArgs['andw'] or origArgs['andwords'] or nil, 
									newArgs.sSep)
									)
	newArgs['orString']	= decodeUnicode(origArgs['orw'] or origArgs['orwords'] or '')
	-- boolean options: catch both parameters, also handle nil & nonsense input values:
	newArgs['case']		= yesno(origArgs['case'] or origArgs['casesensitive'] or true, true) -- defaults to True
	newArgs['booleans']	= yesno(origArgs['bool'] or origArgs['booleans'] or false, false) -- defaults to False
	newArgs['literals']	= yesno(origArgs['literals'] or origArgs['lit'] or true, true) -- defaults to True
	newArgs['yes']		= origArgs['yes'] or nil -- nil; default so return sYeslist; keep '' as legal input & return value
	newArgs['no']		= origArgs['no'] or ''
	newArgs['explain']	= false -- never. 22Mar2023 checkExplain(newArgs)

	newArgs.explain = false -- never. 22Mar2023 checkExplain(newArgs)
	
	return newArgs
end

function p.main(frame)
local origArgs = getArgs(frame)
local sReturn = ''
local tArgs = {}

	tArgs = parseArgs(origArgs)
	if yesno(tArgs.explain, false) then
		initReport()
		report.xpListArguments(origArgs)
	end

	sReturn = p._main(tArgs)
	
	if warningIMaxWordsReached ~=nil then
		local preview = require('Module:If preview')
		sReturn = sReturn .. preview._warning({warningIMaxWordsReached})
	end

	if yesno(tArgs.explain, false) then
		return sReturn .. report.xpPresent(tArgs.explain)
	else
		return sReturn
	end
end

return p