
package org.ml.bp.misc;

/**
 * Wildcard string matching
 *
 * Most methods assume the common wildcards of asterisk
 * for any number of characters, question mark for 
 * matching a single character, and backslash for 
 * an escape char.  
 *
 * One method lets you specify all special characters so 
 * you could do an SQL type match with percent and underscore
 * as the wildcard characters
 */

import java.util.*;

public class Wildcard
	{
	
	/**
	 * Compare two characters.
	 * @param isCaseSensitive true if the comparison should take case into consideration, false if not
	 * @return true if the characters are equal, false if not
	 */
	private final static boolean charMatch(char a, char b, boolean isCaseSensitive)
		{
		if (isCaseSensitive)
			return (a == b);
		else
			return (Character.toUpperCase(Character.toLowerCase(a)) == Character.toUpperCase(Character.toLowerCase(b)));
			// the wild gyrations with case are recommended in "The Java Programming Language", section 13.5 to deal
			// with complications in Unicode and non-english languages like Georgian
		}
	/**
	 * Find the next occurence of a character at or after the start position
	 * @param a the character array
	 * @param start the first position in the array to start looking for ch
	 * @param ch the character to look for
	 * @param isCaseSensitive should the search be case sensitive
	 * @return the position of the next match, -1 if no match
	 */
	private final static int findNext(char[] a, int start, char ch, boolean isCaseSensitive)
		{
		for(int i = start; i < a.length; i++)
			if (charMatch(a[i], ch, isCaseSensitive))
				return i;
		return -1;
		}
/**
 * Simple check if strings match, case is not sensitive (the only sane way to deal with strings)
 * no escape chars
 */
public static boolean match(String s, String mask)
	{
	return (match(s, mask, "y", false, '*', '?', (char) 0) != null);
	}
/**
 * check if strings match, case is not sensitive (the only sane way to deal with strings)
 */
public static String match(String s, String mask, String resultMask)
	{
	return match(s, mask, resultMask, false, '*', '?', '\\');
	}
	/**
	 * Compare two strings
	 * @param s the string to match
	 * @param mask the string possibly containing wildcards we want to match
	 * @param resultMask a string describing how to format the return value
	 * @param isCaseSensitive should the match be case-sensitive
	 * @param multiChar Character that matches any number of characters (usually an '*')
	 * @param singleChar Character that matches any -one- character (usually a '?')
	 * @param escapeChar Character that escapes itself and the single/multi chars (usually a '\')
	 * @return null if no match, or the result mask with any wildcard characters is has; replaced
	 *  with matched sections from the original string. ('*' matches and '?' matches are stored separately)
	 *
	 * for example: match("Abcdef", "a*d?f", "x?x*", false, '*', '?', '\\') should return "xexbc"
	 */
	public static String match(String s, String mask, String resultMask, boolean isCaseSensitive, char multiChar, char singleChar, char escapeChar)
		{
		// handle silly trivial cases
		
		// both null
		if ((s == null) && (mask == null))
			return resultMask;
		
		// (one or the other (but not both)) or resultMask null	
		if ((s == null) || (mask == null) || (resultMask == null))
			return null;			

		// both zero-length
		if ((s.length() == 0) && (mask.length() == 0))
			return resultMask;
			
		// mask zero-length (but string not zero-length)			
		if (mask.length() == 0)
			return null;	
						
		char[] sa = s.toCharArray();
		char[] ma = mask.toCharArray();
		
		Stack asterisks = new Stack();
		StringBuffer questionMarks = new StringBuffer();
		int i = 0;
		int j = 0;

		while ((i < ma.length))
			{
			char ch = ma[i];
			boolean misMatch = false;
			while (true)
				{
				if (ch == multiChar)
					{
					if (i == (ma.length-1)) // asterisk at end of mask?
						{
						asterisks.push(new WildcardState(0, 0, 0, questionMarks, new String(sa, j, sa.length-j)));
						i = ma.length;
						j = sa.length;
						}
					else
						{
						char next = ma[i+1];
						int j2 = findNext(sa, j, next, isCaseSensitive);
						if (j2 == -1)
							misMatch = true;
						else
							{
							asterisks.push(new WildcardState(i, j, j2, questionMarks, new String(sa, j, j2-j)));
							i+=2;       // we've already matched the next char
							j = j2+1;   // "
							}
						}
					break;
					}


				if (ch == singleChar)
					{
					if (j < sa.length)
						{
						questionMarks.append(sa[j]);
						i++;
						j++;
						}
					else
						misMatch = true;
					break;
					}

				if (ch == escapeChar)
					{
					i++;
					ch = ma[i];
					// fall through to next section
					}

//				default:
				if ((j < sa.length) && charMatch(ch, sa[j], isCaseSensitive))
					{
					i++;
					j++;
					}
				else
					misMatch = true;
					
				break;					
				} //the while(true) pseudo-switch

			while (misMatch || ((i == ma.length) && (j < sa.length)))
				{
				if (asterisks.empty())
					return null;
					
				misMatch = true;  // in case we came in because i == ma.length					
				// restore the scanner to an earlier state, and continue on
				// assuming the last wildcard match quit too early
				WildcardState state = (WildcardState) asterisks.pop();
				i = state.fi;
				j = state.fj;
				questionMarks = state.fQmarks;
				char next = ma[i+1];
				int j2 = findNext(sa, state.fj2+1, next, isCaseSensitive);
				if (j2 != -1)
					{
					asterisks.push(new WildcardState(i, j, j2, questionMarks, new String(sa, j, j2-j)));
					i+=2;       // we've already matched the next char
					j = j2+1;   // "
					misMatch = false;
					}
				}

			} // while

		// at this point, the strings must have matched
		int asteriskPointer = 0;
		int qmarkPointer = 0;
		StringBuffer result = new StringBuffer();
		char[] ra = resultMask.toCharArray();
		for (i = 0; i < ra.length; i++)
			{
			char ch = ra[i];
			while (true)  // kind of like a switch, but without constants
				{
				if (ch == singleChar)
					{				
					if (qmarkPointer < questionMarks.length())
						result.append(questionMarks.charAt(qmarkPointer++));
					break;						
					}		

				if (ch == multiChar)
					{
					if (asteriskPointer < asterisks.size())
						{
						WildcardState state = (WildcardState) asterisks.elementAt(asteriskPointer++);
						result.append(state.fMatchedChars);
						}
					break;						
					}
					
				if ((ch == escapeChar) && (i < (ra.length-1)))
					ch = ra[++i];
					
				result.append(ch);
				break;
				}					
			}
		return result.toString();
		} // match()
/**
 * Just a simple true/false check if the strings match
 */
public static boolean match(String s, String mask, boolean isCaseSensitive)
	{
	return (match(s, mask, "y", isCaseSensitive, '*', '?', '\\') != null);
	}
}