CUBRID Engine  latest
cas_str_like.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 
20 /*
21  * cas_str_like.c -
22  */
23 
24 #ident "$Id$"
25 
26 /*
27  * str_like
28  *
29  * Arguments:
30  * src: (IN) Source string.
31  * pattern: (IN) Pattern match string.
32  * esc_char: (IN) Pointer to escape character. This pointer should
33  * be NULL when an escape character is not used.
34  * case_sensitive : (IN) 1 - case sensitive, 0 - case insensitive
35  *
36  * Returns: int
37  * B_TRUE(match), B_FALSE(not match), B_ERROR(error)
38  *
39  * Errors:
40  *
41  * Description:
42  * Perform a "like" regular expression pattern match between the pattern
43  * string and the source string. The pattern string may contain the
44  * '%' character to match 0 or more characters, or the '_' character
45  * to match exactly one character. These special characters may be
46  * interpreted as normal characters my escaping them. In this case the
47  * escape character is none NULL. It is assumed that all strings are
48  * of the same codeset.
49  *
50  *
51  */
52 
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include "cas_common.h"
57 #include "cas_util.h"
58 #include "cas_str_like.h"
59 
60 #define STK_SIZE 100
61 #define B_ERROR -1
62 #define B_TRUE 1
63 #define B_FALSE 0
64 
65 static int str_eval_like (const unsigned char *tar, const unsigned char *expr, unsigned char escape);
66 static int is_korean (unsigned char ch);
67 #if 0
68 static void str_tolower (char *str);
69 #endif
70 
71 int
72 str_like (char *src, char *pattern, char esc_char)
73 {
74  int result;
75  char *low_src;
76  char *low_pattern;
77 
78  ALLOC_COPY (low_src, src);
79  ALLOC_COPY (low_pattern, pattern);
80 
81  if (low_src == NULL || low_pattern == NULL)
82  {
83  FREE_MEM (low_src);
84  FREE_MEM (low_pattern);
85  return B_FALSE;
86  }
87 
88  ut_tolower (low_src);
89  ut_tolower (low_pattern);
90 
91  result =
92  str_eval_like ((const unsigned char *) low_src, (const unsigned char *) low_pattern, (unsigned char) esc_char);
93 
94  FREE_MEM (low_src);
95  FREE_MEM (low_pattern);
96 
97  return result;
98 }
99 
100 
101 static int
102 str_eval_like (const unsigned char *tar, const unsigned char *expr, unsigned char escape)
103 {
104  const int IN_CHECK = 0;
105  const int IN_PERCENT = 1;
106  const int IN_PERCENT_UNDERSCORE = 2;
107 
108  int status = IN_CHECK;
109  const unsigned char *tarstack[STK_SIZE], *exprstack[STK_SIZE];
110  int stackp = -1;
111  int inescape = 0;
112 
113  if (escape == 0)
114  {
115  escape = 2;
116  }
117  while (1)
118  {
119  if (status == IN_CHECK)
120  {
121  if (*expr == escape)
122  {
123  expr++;
124  if (*expr == '%' || *expr == '_')
125  {
126  inescape = 1;
127  continue;
128  }
129  else if (*tar
130  && ((!is_korean (*tar) && *tar == *expr)
131  || (is_korean (*tar) && *tar == *expr && *(tar + 1) == *(expr + 1))))
132  {
133  if (is_korean (*tar))
134  {
135  tar += 2;
136  }
137  else
138  {
139  tar++;
140  }
141  if (is_korean (*expr))
142  {
143  expr += 2;
144  }
145  else
146  {
147  expr++;
148  }
149  continue;
150  }
151  }
152 
153  if (inescape)
154  {
155  if (*tar == *expr)
156  {
157  tar++;
158  expr++;
159  }
160  else
161  {
162  if (stackp >= 0 && stackp < STK_SIZE)
163  {
164  tar = tarstack[stackp];
165  if (is_korean (*tar))
166  {
167  tar += 2;
168  }
169  else
170  {
171  tar++;
172  }
173  expr = exprstack[stackp--];
174  }
175  else
176  {
177  return B_FALSE;
178  }
179  }
180  inescape = 0;
181  continue;
182  }
183 
184  /* goto check */
185  if (*expr == 0)
186  {
187  while (*tar == ' ')
188  {
189  tar++;
190  }
191 
192  if (*tar == 0)
193  {
194  return B_TRUE;
195  }
196  else
197  {
198  if (stackp >= 0 && stackp < STK_SIZE)
199  {
200  tar = tarstack[stackp];
201  if (is_korean (*tar))
202  {
203  tar += 2;
204  }
205  else
206  {
207  tar++;
208  }
209  expr = exprstack[stackp--];
210  }
211  else
212  {
213  return B_FALSE;
214  }
215  }
216  }
217  else if (*expr == '%')
218  {
219  status = IN_PERCENT;
220  while (*(expr + 1) == '%')
221  {
222  expr++;
223  }
224  }
225  else if ((*expr == '_') || (!is_korean (*tar) && *tar == *expr)
226  || (is_korean (*tar) && *tar == *expr && *(tar + 1) == *(expr + 1)))
227  {
228  if (is_korean (*tar))
229  {
230  tar += 2;
231  }
232  else
233  {
234  tar++;
235  }
236  if (is_korean (*expr))
237  {
238  expr += 2;
239  }
240  else
241  {
242  expr++;
243  }
244  }
245  else if (stackp >= 0 && stackp < STK_SIZE)
246  {
247  tar = tarstack[stackp];
248  if (is_korean (*tar))
249  {
250  tar += 2;
251  }
252  else
253  {
254  tar++;
255  }
256 
257  expr = exprstack[stackp--];
258  }
259  else if (stackp >= STK_SIZE)
260  {
261  return B_ERROR;
262  }
263  else
264  {
265  return B_FALSE;
266  }
267  }
268  else if (status == IN_PERCENT)
269  {
270  if (*(expr + 1) == '_')
271  {
272  if (stackp >= STK_SIZE - 1)
273  {
274  return B_ERROR;
275  }
276  tarstack[++stackp] = tar;
277  exprstack[stackp] = expr;
278  expr++;
279 
280  inescape = 0;
281  status = IN_PERCENT_UNDERSCORE;
282  continue;
283  }
284 
285  if (*(expr + 1) == escape)
286  {
287  expr++;
288  inescape = 1;
289  if (*(expr + 1) != '%' && *(expr + 1) != '_')
290  {
291  return B_ERROR;
292  }
293  }
294 
295  while (*tar && *tar != *(expr + 1))
296  {
297  if (is_korean (*tar))
298  {
299  tar += 2;
300  }
301  else
302  {
303  tar++;
304  }
305  }
306 
307  if (*tar == *(expr + 1))
308  {
309  if (stackp >= STK_SIZE - 1)
310  {
311  return B_ERROR;
312  }
313  tarstack[++stackp] = tar;
314  exprstack[stackp] = expr;
315  if (is_korean (*expr))
316  {
317  expr += 2;
318  }
319  else
320  {
321  expr++;
322  }
323 
324  inescape = 0;
325  status = IN_CHECK;
326  }
327  }
328  if (status == IN_PERCENT_UNDERSCORE)
329  {
330  if (*expr == escape)
331  {
332  expr++;
333  inescape = 1;
334  if (*expr != '%' && *expr != '_')
335  {
336  return B_ERROR;
337  }
338  continue;
339  }
340 
341  if (inescape)
342  {
343  if (*tar == *expr)
344  {
345  tar++;
346  expr++;
347  }
348  else
349  {
350  if (stackp >= 0 && stackp < STK_SIZE)
351  {
352  tar = tarstack[stackp];
353  if (is_korean (*tar))
354  {
355  tar += 2;
356  }
357  else
358  {
359  tar++;
360  }
361  expr = exprstack[stackp--];
362  }
363  else
364  {
365  return B_FALSE;
366  }
367  }
368  inescape = 0;
369  continue;
370  }
371 
372  /* goto check */
373  if (*expr == 0)
374  {
375  while (*tar == ' ')
376  {
377  tar++;
378  }
379 
380  if (*tar == 0)
381  {
382  return B_TRUE;
383  }
384  else
385  {
386  if (stackp >= 0 && stackp < STK_SIZE)
387  {
388  tar = tarstack[stackp];
389  if (is_korean (*tar))
390  {
391  tar += 2;
392  }
393  else
394  {
395  tar++;
396  }
397  expr = exprstack[stackp--];
398  }
399  else
400  {
401  return B_FALSE;
402  }
403  }
404  }
405  else if (*expr == '%')
406  {
407  status = IN_PERCENT;
408  while (*(expr + 1) == '%')
409  {
410  expr++;
411  }
412  }
413  else if ((*expr == '_') || (!is_korean (*tar) && *tar == *expr)
414  || (is_korean (*tar) && *tar == *expr && *(tar + 1) == *(expr + 1)))
415  {
416  if (is_korean (*tar))
417  {
418  tar += 2;
419  }
420  else
421  {
422  tar++;
423  }
424  if (is_korean (*expr))
425  {
426  expr += 2;
427  }
428  else
429  {
430  expr++;
431  }
432  }
433  else if (stackp >= 0 && stackp < STK_SIZE)
434  {
435  tar = tarstack[stackp];
436  if (is_korean (*tar))
437  {
438  tar += 2;
439  }
440  else
441  {
442  tar++;
443  }
444 
445  expr = exprstack[stackp--];
446  }
447  else if (stackp >= STK_SIZE)
448  {
449  return B_ERROR;
450  }
451  else
452  {
453  return B_FALSE;
454  }
455  }
456 
457  if (*tar == 0)
458  {
459  if (*expr)
460  {
461  while (*expr == '%')
462  {
463  expr++;
464  }
465  }
466 
467  if (*expr == 0)
468  {
469  return B_TRUE;
470  }
471  else
472  {
473  return B_FALSE;
474  }
475  }
476  }
477 }
478 
479 static int
480 is_korean (unsigned char ch)
481 {
482  return (ch >= 0xb0 && ch <= 0xc8) || (ch >= 0xa1 && ch <= 0xfe);
483 }
484 
485 #if 0
486 static void
487 str_tolower (char *str)
488 {
489  char *p;
490  for (p = str; *p; p++)
491  {
492  if (*p >= 'A' && *p <= 'Z')
493  *p = *p - 'A' + 'a';
494  }
495 }
496 #endif
#define B_TRUE
Definition: cas_str_like.c:62
#define B_FALSE
Definition: cas_str_like.c:63
#define B_ERROR
Definition: cas_str_like.c:61
#define NULL
Definition: freelistheap.h:34
static int is_korean(unsigned char ch)
Definition: cas_str_like.c:480
#define FREE_MEM(PTR)
Definition: cas_common.h:58
static int str_eval_like(const unsigned char *tar, const unsigned char *expr, unsigned char escape)
Definition: cas_str_like.c:102
#define ALLOC_COPY(PTR, STR)
Definition: cas_common.h:66
#define STK_SIZE
Definition: cas_str_like.c:60
void ut_tolower(char *str)
int str_like(char *src, char *pattern, char esc_char)
Definition: cas_str_like.c:72
const char ** p
Definition: dynamic_load.c:945