Skip to content

File cas_str_like.c

File List > broker > cas_str_like.c

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */


/*
 * cas_str_like.c -
 */

#ident "$Id$"

/*
 * str_like
 *
 * Arguments:
 *               src: (IN) Source string.
 *           pattern: (IN) Pattern match string.
 *          esc_char: (IN) Pointer to escape character.  This pointer should
 *                         be NULL when an escape character is not used.
 *   case_sensitive : (IN) 1 - case sensitive, 0 - case insensitive
 *
 * Returns: int
 *   B_TRUE(match), B_FALSE(not match), B_ERROR(error)
 *
 * Errors:
 *
 * Description:
 *     Perform a "like" regular expression pattern match between the pattern
 *     string and the source string.  The pattern string may contain the
 *     '%' character to match 0 or more characters, or the '_' character
 *     to match exactly one character.  These special characters may be
 *     interpreted as normal characters my escaping them.  In this case the
 *     escape character is none NULL.  It is assumed that all strings are
 *     of the same codeset.
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cas_common.h"
#include "cas_util.h"
#include "cas_str_like.h"

#define STK_SIZE 100
#define B_ERROR -1
#define B_TRUE  1
#define B_FALSE 0

static int str_eval_like (const unsigned char *tar, const unsigned char *expr, unsigned char escape);
static int is_korean (unsigned char ch);
#if 0
static void str_tolower (char *str);
#endif

int
str_like (char *src, char *pattern, char esc_char)
{
  int result;
  char *low_src;
  char *low_pattern;

  ALLOC_COPY_STRLEN (low_src, src);
  ALLOC_COPY_STRLEN (low_pattern, pattern);

  if (low_src == NULL || low_pattern == NULL)
    {
      FREE_MEM (low_src);
      FREE_MEM (low_pattern);
      return B_FALSE;
    }

  ut_tolower (low_src);
  ut_tolower (low_pattern);

  result =
    str_eval_like ((const unsigned char *) low_src, (const unsigned char *) low_pattern, (unsigned char) esc_char);

  FREE_MEM (low_src);
  FREE_MEM (low_pattern);

  return result;
}


static int
str_eval_like (const unsigned char *tar, const unsigned char *expr, unsigned char escape)
{
  const int IN_CHECK = 0;
  const int IN_PERCENT = 1;
  const int IN_PERCENT_UNDERSCORE = 2;

  int status = IN_CHECK;
  const unsigned char *tarstack[STK_SIZE], *exprstack[STK_SIZE];
  int stackp = -1;
  int inescape = 0;

  if (escape == 0)
    {
      escape = 2;
    }
  while (1)
    {
      if (status == IN_CHECK)
    {
      if (*expr == escape)
        {
          expr++;
          if (*expr == '%' || *expr == '_')
        {
          inescape = 1;
          continue;
        }
          else if (*tar
               && ((!is_korean (*tar) && *tar == *expr)
               || (is_korean (*tar) && *tar == *expr && *(tar + 1) == *(expr + 1))))
        {
          if (is_korean (*tar))
            {
              tar += 2;
            }
          else
            {
              tar++;
            }
          if (is_korean (*expr))
            {
              expr += 2;
            }
          else
            {
              expr++;
            }
          continue;
        }
        }

      if (inescape)
        {
          if (*tar == *expr)
        {
          tar++;
          expr++;
        }
          else
        {
          if (stackp >= 0 && stackp < STK_SIZE)
            {
              tar = tarstack[stackp];
              if (is_korean (*tar))
            {
              tar += 2;
            }
              else
            {
              tar++;
            }
              expr = exprstack[stackp--];
            }
          else
            {
              return B_FALSE;
            }
        }
          inescape = 0;
          continue;
        }

      /* goto check */
      if (*expr == 0)
        {
          while (*tar == ' ')
        {
          tar++;
        }

          if (*tar == 0)
        {
          return B_TRUE;
        }
          else
        {
          if (stackp >= 0 && stackp < STK_SIZE)
            {
              tar = tarstack[stackp];
              if (is_korean (*tar))
            {
              tar += 2;
            }
              else
            {
              tar++;
            }
              expr = exprstack[stackp--];
            }
          else
            {
              return B_FALSE;
            }
        }
        }
      else if (*expr == '%')
        {
          status = IN_PERCENT;
          while (*(expr + 1) == '%')
        {
          expr++;
        }
        }
      else if ((*expr == '_') || (!is_korean (*tar) && *tar == *expr)
           || (is_korean (*tar) && *tar == *expr && *(tar + 1) == *(expr + 1)))
        {
          if (is_korean (*tar))
        {
          tar += 2;
        }
          else
        {
          tar++;
        }
          if (is_korean (*expr))
        {
          expr += 2;
        }
          else
        {
          expr++;
        }
        }
      else if (stackp >= 0 && stackp < STK_SIZE)
        {
          tar = tarstack[stackp];
          if (is_korean (*tar))
        {
          tar += 2;
        }
          else
        {
          tar++;
        }

          expr = exprstack[stackp--];
        }
      else if (stackp >= STK_SIZE)
        {
          return B_ERROR;
        }
      else
        {
          return B_FALSE;
        }
    }
      else if (status == IN_PERCENT)
    {
      if (*(expr + 1) == '_')
        {
          if (stackp >= STK_SIZE - 1)
        {
          return B_ERROR;
        }
          tarstack[++stackp] = tar;
          exprstack[stackp] = expr;
          expr++;

          inescape = 0;
          status = IN_PERCENT_UNDERSCORE;
          continue;
        }

      if (*(expr + 1) == escape)
        {
          expr++;
          inescape = 1;
          if (*(expr + 1) != '%' && *(expr + 1) != '_')
        {
          return B_ERROR;
        }
        }

      while (*tar && *tar != *(expr + 1))
        {
          if (is_korean (*tar))
        {
          tar += 2;
        }
          else
        {
          tar++;
        }
        }

      if (*tar == *(expr + 1))
        {
          if (stackp >= STK_SIZE - 1)
        {
          return B_ERROR;
        }
          tarstack[++stackp] = tar;
          exprstack[stackp] = expr;
          if (is_korean (*expr))
        {
          expr += 2;
        }
          else
        {
          expr++;
        }

          inescape = 0;
          status = IN_CHECK;
        }
    }
      if (status == IN_PERCENT_UNDERSCORE)
    {
      if (*expr == escape)
        {
          expr++;
          inescape = 1;
          if (*expr != '%' && *expr != '_')
        {
          return B_ERROR;
        }
          continue;
        }

      if (inescape)
        {
          if (*tar == *expr)
        {
          tar++;
          expr++;
        }
          else
        {
          if (stackp >= 0 && stackp < STK_SIZE)
            {
              tar = tarstack[stackp];
              if (is_korean (*tar))
            {
              tar += 2;
            }
              else
            {
              tar++;
            }
              expr = exprstack[stackp--];
            }
          else
            {
              return B_FALSE;
            }
        }
          inescape = 0;
          continue;
        }

      /* goto check */
      if (*expr == 0)
        {
          while (*tar == ' ')
        {
          tar++;
        }

          if (*tar == 0)
        {
          return B_TRUE;
        }
          else
        {
          if (stackp >= 0 && stackp < STK_SIZE)
            {
              tar = tarstack[stackp];
              if (is_korean (*tar))
            {
              tar += 2;
            }
              else
            {
              tar++;
            }
              expr = exprstack[stackp--];
            }
          else
            {
              return B_FALSE;
            }
        }
        }
      else if (*expr == '%')
        {
          status = IN_PERCENT;
          while (*(expr + 1) == '%')
        {
          expr++;
        }
        }
      else if ((*expr == '_') || (!is_korean (*tar) && *tar == *expr)
           || (is_korean (*tar) && *tar == *expr && *(tar + 1) == *(expr + 1)))
        {
          if (is_korean (*tar))
        {
          tar += 2;
        }
          else
        {
          tar++;
        }
          if (is_korean (*expr))
        {
          expr += 2;
        }
          else
        {
          expr++;
        }
        }
      else if (stackp >= 0 && stackp < STK_SIZE)
        {
          tar = tarstack[stackp];
          if (is_korean (*tar))
        {
          tar += 2;
        }
          else
        {
          tar++;
        }

          expr = exprstack[stackp--];
        }
      else if (stackp >= STK_SIZE)
        {
          return B_ERROR;
        }
      else
        {
          return B_FALSE;
        }
    }

      if (*tar == 0)
    {
      if (*expr)
        {
          while (*expr == '%')
        {
          expr++;
        }
        }

      if (*expr == 0)
        {
          return B_TRUE;
        }
      else
        {
          return B_FALSE;
        }
    }
    }
}

static int
is_korean (unsigned char ch)
{
  return (ch >= 0xb0 && ch <= 0xc8) || (ch >= 0xa1 && ch <= 0xfe);
}

#if 0
static void
str_tolower (char *str)
{
  char *p;
  for (p = str; *p; p++)
    {
      if (*p >= 'A' && *p <= 'Z')
    *p = *p - 'A' + 'a';
    }
}
#endif