/* -------------------------------------------------------------------------- WILDCARD.C by Alessandro Felice Cantatore - 2003/25/04 Notes: * The code mentioned below is the copyright property of Alessandro Felice Cantatore, with the exception of those algorithms which are, as specified in source comments, property of the respective owners. * You are not allowed to patent or copyright this code as your! * If you work for Microsoft you are NOT allowed to use this code! * If you work for IBM you can use this code only if the target OS is OS/2 or linux. * You are not allowed to use this code for projects or programs owned or used by the army, the department of defense or the government of any country violating human rights and international laws (including those western countries pretending to "export" democracy). If you are unsure just check what Amnesty International reports about your country. * You are not allowed to use this code in programs used in activities causing harm to living beeings or to the environment. * You are free to use the algorithms described below in your programs, or modify them to suit your needs, provided that you include their source, including these notes and my name, (you are not required to include the whole source of your program) in your program package. * You are not allowed to sell this code, but you can include it into a commercial program. * If you have any doubt ask the author. * The previous notes only apply to the algorithms written by Alessandro Felice Cantatore: they do not apply to the other public domain algorithms mentioned in this file (szWildMatch3 and szWildMatch5 ). --------------------------------------------------------------------------- This program shows various methods of matching a string against a pattern using '*' and '?' wildcards. The matching is done accordingly to what specified in the OS/2 "Control Program Programming Guide and Reference": Metacharacters are characters that can be used to represent placeholders in a file name. The asterisk (*) and the question mark (?) are the two metacharacters. The asterisk matches one or more characters, including blanks. The question mark matches exactly one character, unless that character is a period. To match a period, the original name must contain a period. Metacharacters are illegal in all but the last component of a path. The only relevant difference is that the asterisk cam also match no character as in '*.zip' matching ".zip" or "A*.bbb" matching "a.bbb". Note : The program has been compiled with IBM V.Age C++ 3.08, with the following options: icc /B"/EXEPACK:2 /NOLOGO" /Q /Fa /Ss /O+ /G5 /Ol /Oc+ /Rn /Gu /Oi+ -------------------------------------------------------------------------- */ #define INCL_DOS #define INCL_DOSPROFILE #define INCL_DOSERRORS #include #include #include #include // definitions -------------------------------------------------------------- #define ITERATIONS 1000 // iterations for the speed test #define COUNTPROC 8 // count of tested routines typedef BOOL (*PPATMATCH)(PSZ, PSZ); // globals ------------------------------------------------------------------ // char table initialized via DosMapCase() as this routine is for a // subsystem library and toupper() is not available in that context CHAR mapCaseTable[256]; // array of test patterns and string (these are just for speed tests) PSZ apszPatt[] = { "?.zip", "*zip", "*.zip", "*?.zip", "?*?.zip", "*?*?.zip", "*sa?a*a?a*a.zip", "a*a?a*a?a*a.zip", "*zi?", "*.zi?", "*zi*", "*.zi*", "a*?*?", "a*b*c*l*m*n*?*.?*"}; PSZ apszStr[] = { "ZIP", "accbddcrrfddhuulggnffphhqggyyyrnnvhgflllmmnnnnkpi.iuuuiyt", "A.bkdfadfasfa.faskfa.sfaf•kl.A.ZIP", "Asdhgerlthwegjsdklgjsdgjsdkgjsdgjsdg.ZIP", "AAgsdjtweoruterjtertweiutwejtwejtwetwoejtwejtrwleAA.ZIP", "Agjsdgjdsjgsdkjgsdjgsjd•gjsd•gjsd•gj•sdgj.A.ZIdgjsdkjglsdjgPPO"}; PSZ apszPatt2[] = { "?.zip", "*zip", "*.zip", "*?.zip", "?*?.zip", "*?*?.zip", "*?*?*.zip", "a*?*?*.zip", "*zi?", "*.zi?", "*zi*", "*.zi*", "a*?*?", "?*.?*"}; PSZ apszStr2[] = { "ZIP", ".ZIP", "A.A.ZIP", "A.ZIP", "AAAA.ZIP", "A.A.ZIPPO"}; /* -------------------------------------------------------------------------- Print usage notes. --------------------------------------------------------------------------- */ VOID printHelp(VOID) { printf( " WILDCARD.EXE\n" " comparison of wildcard matching algorithms\n" " by Alessandro Felice Cantatore 2003/04/25\n" " Usage:\n" " WILDCARD (without parameters)\n" " to get a speed test of all the available algorithms\n" " WILDCARD \n" " to test the algorithm \n" " where:\n" " is a number ranging from 1 to 8 for verbose output\n" " or algoritm number + 10 (11-18) for minimal output;\n" " is the pattern to test (e.g. \"*A*B*C*.ZIP\");\n" " is the string to test (e.g. \"zAxBwCy.zip\").\n" ); } /* -------------------------------------------------------------------------- Get the high resolution timer count. --------------------------------------------------------------------------- */ long double timeQryTime(VOID) { QWORD qw; long double ldTime; DosTmrQueryTime(&qw); ldTime = qw.ulHi; ldTime *= 1.09951e+12; ldTime += qw.ulLo; return ldTime; } /* -------------------------------------------------------------------------- Print the elapsed time. --------------------------------------------------------------------------- */ VOID printElapsed(long double ldEnd, long double ldStart) { printf(" Elapsed time : %Lf milliSeconds\n", (ldEnd - ldStart) * 0.838105647e-3); } /* -------------------------------------------------------------------------- Init the case mapping table. --------------------------------------------------------------------------- */ BOOL initTable(VOID) { INT i; COUNTRYCODE cc = {0, 0}; for (i = 0; i < 256; ++i) mapCaseTable[i] = i; return !DosMapCase(256, &cc, mapCaseTable); } /* -------------------------------------------------------------------------- START OF PATTERN MATCHING ROUTINES. ALL THE ROUTINES ARE NAMED. szWildMatchX where X is a progressive number starting from 0. --------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- 1st routine. I designed it a few years ago, but the original was buggy and slower than this. The logic is quite obvious. This is quite slow as it uses recursion. --------------------------------------------------------------------------- */ BOOL szWildMatch1(PSZ pat, PSZ str) { while (*str) { switch (*pat) { case '?': if (*str == '.') return FALSE; break; case '*': do { ++pat; } while (*pat == '*'); /* enddo */ if (!*pat) return TRUE; while (*str) if (szWildMatch1(pat, str++)) return TRUE; return FALSE; default: if (mapCaseTable[*str] != mapCaseTable[*pat]) return FALSE; break; } /* endswitch */ ++pat, ++str; } /* endwhile */ while (*pat == '*') ++pat; return !*pat; } /* -------------------------------------------------------------------------- 2nd routine. It is basically as the previous routine. It uses a different flow just to check if that would perform faster (it doesn't). --------------------------------------------------------------------------- */ BOOL szWildMatch2(PSZ pat, PSZ str) { while (*str) { if (mapCaseTable[*str] == mapCaseTable[*pat]) { ++pat, ++str; } else if (*pat == '?') { if (*str == '.') return FALSE; ++pat, ++str; } else if (*pat == '*') { do { ++pat; } while (*pat == '*'); /* enddo */ if (!*pat) return TRUE; while (*str) if (szWildMatch2(pat, str++)) return TRUE; return FALSE; } else { return FALSE; } /* endif */ } /* endwhile */ while (*pat == '*') ++pat; return !*pat; } /* -------------------------------------------------------------------------- 3rd routine. This was suggested by an anonymous on a comp.os.os2.programmer.misc post. It is published by http://www.snippets.org (xstrcmp.c). The original file was : "Derived from code by Arjan Kentner (submitted by Steve Summit), modified by Bob Stout." No use restriction is mentioned in the source file. I modified it to prevent matching between '?' and '.' and to use the mapCaseTable[] array rather than toupper(). This performs around twice slower than the previous routines I designed as it uses more extensively recursion. --------------------------------------------------------------------------- */ BOOL szWildMatch3(PSZ pat, PSZ str) { switch (*pat) { case '\0': return !*str; case '*' : return szWildMatch3(pat+1, str) || *str && szWildMatch3(pat, str+1); case '?' : return *str && (*str != '.') && szWildMatch3(pat+1, str+1); default : return (mapCaseTable[*str] == mapCaseTable[*pat]) && szWildMatch3(pat+1, str+1); } /* endswitch */ } /* -------------------------------------------------------------------------- 4th routine. This was inspired by the previous routine. I modified it trying to reduce recursion as much as possible. While this is faster than the snippets.org routine it is not as fast as the first two ones. --------------------------------------------------------------------------- */ BOOL szWildMatch4(PSZ pat, PSZ str) { while (*str) { switch (*pat) { case '?': if (*str == '.') return FALSE; break; case '*': return !*(pat + 1) || szWildMatch4(pat + 1, str) || szWildMatch4(pat, str + 1); default: if (mapCaseTable[*str] != mapCaseTable[*pat]) return FALSE; break; } /* endswitch */ ++str, ++pat; } /* endwhile */ while (*pat == '*') ++pat; return !*pat; } /* -------------------------------------------------------------------------- 5th routine. This is the first NON RECURSIVE routine. I got it from a Walnut Creek CD (C/C++ user group library). The original code is from "C/C++ Users Journal". The author is Mike Cornelison. No use restriction is mentioned in the source file or other documentation I found in the CD. I modified it to prevent matching between '?' and '.' and to use the mapCaseTable[] array (the original routine didn't perform case insensitive matching) . On my PC this performs almost 100 times faster than the routine from snippets.org . But I was sure it would have been possible to do better... --------------------------------------------------------------------------- */ BOOL szWildMatch5(PSZ pat, PSZ str) { int i, star; new_segment: star = 0; if (*pat == '*') { star = 1; do { pat++; } while (*pat == '*'); /* enddo */ } /* endif */ test_match: for (i = 0; pat[i] && (pat[i] != '*'); i++) { if (mapCaseTable[str[i]] != mapCaseTable[pat[i]]) { if (!str[i]) return 0; if ((pat[i] == '?') && (str[i] != '.')) continue; if (!star) return 0; str++; goto test_match; } } if (pat[i] == '*') { str += i; pat += i; goto new_segment; } if (!str[i]) return 1; if (i && pat[i - 1] == '*') return 1; if (!star) return 0; str++; goto test_match; } /* -------------------------------------------------------------------------- 6th routine. This is based on the logic of the first routine, which is the faster among the recursive ones. I used array indexes as in the previous routine, but the flow is completely different and much simpler and straighter. Do not be scared by the goto (you also find in the previous routine) they are the only way to convert slow recursive procedures to fast non-recursive ones. This routine is in up to 65 % faster than the previous one. --------------------------------------------------------------------------- */ BOOL szWildMatch6(PSZ pat, PSZ str) { int i; BOOL star = FALSE; loopStart: for (i = 0; str[i]; i++) { switch (pat[i]) { case '?': if (str[i] == '.') goto starCheck; break; case '*': star = TRUE; str += i, pat += i; do { ++pat; } while (*pat == '*'); if (!*pat) return TRUE; goto loopStart; default: if (mapCaseTable[str[i]] != mapCaseTable[pat[i]]) goto starCheck; break; } /* endswitch */ } /* endfor */ while (pat[i] == '*') ++i; return (!pat[i]); starCheck: if (!star) return FALSE; str++; goto loopStart; } /* -------------------------------------------------------------------------- 7th routine. This is not different from the previous one... I thought it would have been possible to squeeze some more power by using pointers rather than array indexes and it looks like I was right although the speed gain is really minimum. --------------------------------------------------------------------------- */ BOOL szWildMatch7(PSZ pat, PSZ str) { PSZ s, p; BOOL star = FALSE; loopStart: for (s = str, p = pat; *s; ++s, ++p) { switch (*p) { case '?': if (*s == '.') goto starCheck; break; case '*': star = TRUE; str = s, pat = p; do { ++pat; } while (*pat == '*'); if (!*pat) return TRUE; goto loopStart; default: if (mapCaseTable[*s] != mapCaseTable[*p]) goto starCheck; break; } /* endswitch */ } /* endfor */ while (*p == '*') ++p; return (!*p); starCheck: if (!star) return FALSE; str++; goto loopStart; } /* -------------------------------------------------------------------------- 8th routine. This is the same as the 7th routine, but it assumes that pattern has been preprocessed to remove consecutive asterisks. --------------------------------------------------------------------------- */ BOOL szWildMatch8(PSZ pat, PSZ str) { PSZ s, p; BOOL star = FALSE; loopStart: for (s = str, p = pat; *s; ++s, ++p) { switch (*p) { case '?': if (*s == '.') goto starCheck; break; case '*': star = TRUE; str = s, pat = p; if (!*++pat) return TRUE; goto loopStart; default: if (mapCaseTable[*s] != mapCaseTable[*p]) goto starCheck; break; } /* endswitch */ } /* endfor */ if (*p == '*') ++p; return (!*p); starCheck: if (!star) return FALSE; str++; goto loopStart; } /* -------------------------------------------------------------------------- Speed test. Run in a loop tests against various patterns and matching strings. PPATMATCH pFunc : is a function pointer. PSZ funcName : is the function name. --------------------------------------------------------------------------- */ VOID makeSpeedTest(PPATMATCH pFunc, PSZ funcName) { long double startTime, endTime; int i, j, k, l; printf("\ntesting %s:\n", funcName); startTime = timeQryTime(); for (i = 0, l = 0; i < ITERATIONS; ++i) { for (j = 0; j < 14; ++j) { for (k = 0; k < 6; ++k) { pFunc(apszPatt[j], apszStr[k]); ++l; } /* endfor */ } /* endfor */ } /* endfor */ endTime = timeQryTime(); printElapsed(endTime, startTime); startTime = timeQryTime(); for (i = 0, l = 0; i < ITERATIONS; ++i) { for (j = 0; j < 14; ++j) { for (k = 0; k < 6; ++k) { pFunc(apszPatt[j], apszStr2[k]); ++l; } /* endfor */ } /* endfor */ } /* endfor */ endTime = timeQryTime(); printElapsed(endTime, startTime); startTime = timeQryTime(); for (i = 0, l = 0; i < ITERATIONS; ++i) { for (j = 0; j < 14; ++j) { for (k = 0; k < 6; ++k) { pFunc(apszPatt2[j], apszStr2[k]); ++l; } /* endfor */ } /* endfor */ } /* endfor */ endTime = timeQryTime(); printElapsed(endTime, startTime); startTime = timeQryTime(); for (i = 0, l = 0; i < ITERATIONS; ++i) { for (j = 0; j < 14; ++j) { for (k = 0; k < 6; ++k) { pFunc(apszPatt2[j], apszStr[k]); ++l; } /* endfor */ } /* endfor */ } /* endfor */ endTime = timeQryTime(); printElapsed(endTime, startTime); } /* -------------------------------------------------------------------------- Speed test. Test all the available matching routines. --------------------------------------------------------------------------- */ VOID testSpeed(VOID) { makeSpeedTest(szWildMatch1, "szWildMatch1"); makeSpeedTest(szWildMatch2, "szWildMatch2"); makeSpeedTest(szWildMatch3, "szWildMatch3"); makeSpeedTest(szWildMatch4, "szWildMatch4"); makeSpeedTest(szWildMatch5, "szWildMatch5"); makeSpeedTest(szWildMatch6, "szWildMatch6"); makeSpeedTest(szWildMatch7, "szWildMatch7"); makeSpeedTest(szWildMatch8, "szWildMatch8"); } /* -------------------------------------------------------------------------- Test the correctness of the pattern matching of the various algorithms. PPATMATCH pFunc : is a procedure address PSZ funcName : is the procedure name PSZ pat : is the tested pattern PSZ str : is the tested string BOOL verbose : display verbose or minimal output. Note minimal output is comfortable for comparing the output of the various routines (running tests via a batch file). --------------------------------------------------------------------------- */ VOID makeTest(PPATMATCH pFunc, PSZ funcName, PSZ pat, PSZ str, BOOL verbose) { if (verbose) { printf("testing function : %s\n" "pattern is : %s\n" "tested string is : %s\n" "result : the string %s the pattern\n", funcName, pat, str, (pFunc(pat, str) ? "matches" : "does not match")); } else { printf("%s %20s %20s\n", (pFunc(pat, str)? "==" : "! "), pat, str); } /* endif */ } /* -------------------------------------------------------------------------- According to the input parameters test a pattern matching algorithm. PSZ pat : is the tested pattern PSZ str : is the tested string ULONG ifn : function index BOOL verbose : display verbose or minimal output. --------------------------------------------------------------------------- */ VOID testMatch(PSZ pat, PSZ str, ULONG ifn, BOOL verbose) { PPATMATCH apFunc[] = {szWildMatch1, szWildMatch2, szWildMatch3, szWildMatch4, szWildMatch5, szWildMatch6, szWildMatch7, szWildMatch8}; CHAR buf[32]; sprintf(buf, "szWildMatch%u", ifn + 1); makeTest(apFunc[ifn], buf, pat, str, verbose); } /* -------------------------------------------------------------------------- MAIN. Usage: -1) no parameters The program runs the speed tests on all routines. -2) The program compares pattern to string through the first routine in verbose mode. -3) Where index is an integer ranging from 1 to 7. The program compares pattern to string using the routine indexTH routine in verbose mode. -4) Where index is an integer ranging from 11 to 17. The program compares pattern to string using the routine indexTH - 10 routine displaying minimal output. --------------------------------------------------------------------------- */ int main(int argc, char** argv) { ULONG ul = 0; // set time critical priority to get sure that speed tests are not // affected by other processes running on the system. DosSetPriority(PRTYS_THREAD, PRTYC_TIMECRITICAL, + 31, 0); initTable(); if (argc == 1) { testSpeed(); } else if (argc == 4) { ul = strtoul(argv[1], NULL, 10); if ((ul >= 11) && (ul <= (10 + COUNTPROC))) { testMatch(argv[argc - 2], argv[argc - 1], ul - 11, FALSE); } else if ((ul >= 1) && (ul <= COUNTPROC)) { testMatch(argv[argc - 2], argv[argc - 1], ul - 1, TRUE); } else { printHelp(); } /* endif */ } else { printHelp(); } /* endif */ return 0; }