Skip to content

File language_support.c

File List > base > language_support.c

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/*
 * language_support.c : Multi-language and character set support
 */

#ident "$Id$"

#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#if !defined(WINDOWS)
#include <langinfo.h>
#endif

#include "language_support.h"

#include "chartype.h"
#include "environment_variable.h"
#include "memory_hash.h"
#include "object_primitive.h"
#include "util_func.h"
#if !defined(WINDOWS)
#include <dlfcn.h>
#endif /* !defined (WINDOWS) */
#include "tz_support.h"
#include "db_date.h"
#include "string_opfunc.h"

#if !defined (SERVER_MODE)
#include "authenticate.h"
#include "db.h"
#endif /* !defined (SERVER_MODE) */
#include "dbtype.h"
// XXX: SHOULD BE THE LAST INCLUDE HEADER
#include "memory_wrapper.hpp"

#define PAD ' '         /* str_pad_char(INTL_CODESET_ISO88591, pad, &pad_size) */
#define SPACE PAD       /* smallest character in the collation sequence */
#define ZERO '\0'       /* space is treated as zero */

#define EUC_SPACE 0xa1      /* for euckr */
#define ASCII_SPACE 0x20

static INTL_LANG lang_Lang_id = INTL_LANG_ENGLISH;
static char lang_Loc_name[LANG_MAX_LANGNAME] = LANG_NAME_DEFAULT;
static char lang_Msg_loc_name[LANG_MAX_LANGNAME] = LANG_NAME_DEFAULT;
static char lang_Lang_name[LANG_MAX_LANGNAME] = LANG_NAME_DEFAULT;
static DB_CURRENCY lang_Loc_currency = DB_CURRENCY_DOLLAR;
INTL_CODESET lang_Loc_charset = INTL_CODESET_ISO88591;
LANG_COLLATION *lang_Collations[LANG_MAX_COLLATIONS] = { NULL };

/* built-in collations */
/* number of characters in the (extended) alphabet per language */
#define LANG_CHAR_COUNT_EN 256
#define LANG_CHAR_COUNT_TR 352

#define LANG_COLL_GENERIC_SORT_OPT \
  {TAILOR_UNDEFINED, false, false, 1, false, CONTR_IGNORE, false, \
   MATCH_CONTR_BOUND_ALLOW}
#define LANG_COLL_NO_EXP 0, NULL, NULL, NULL
#define LANG_COLL_NO_CONTR NULL, 0, 0, NULL, 0, 0

#define LANG_NO_NORMALIZATION {NULL, 0, NULL, NULL, 0}

static unsigned int lang_Weight_EN_cs[LANG_CHAR_COUNT_EN];
static unsigned int lang_Next_alpha_char_EN_cs[LANG_CHAR_COUNT_EN];

static unsigned int lang_Weight_EN_ci[LANG_CHAR_COUNT_EN];
static unsigned int lang_Next_alpha_char_EN_ci[LANG_CHAR_COUNT_EN];

static unsigned int lang_Weight_EN_cs_ti[LANG_CHAR_COUNT_EN];
static unsigned int lang_Next_alpha_char_EN_cs_ti[LANG_CHAR_COUNT_EN];

static unsigned int lang_Weight_EN_ci_ti[LANG_CHAR_COUNT_EN];
static unsigned int lang_Next_alpha_char_EN_ci_ti[LANG_CHAR_COUNT_EN];

static unsigned int lang_Weight_TR[LANG_CHAR_COUNT_TR];
static unsigned int lang_Next_alpha_char_TR[LANG_CHAR_COUNT_TR];

static unsigned int lang_Weight_TR_ti[LANG_CHAR_COUNT_TR];
static unsigned int lang_Next_alpha_char_TR_ti[LANG_CHAR_COUNT_TR];

#define DEFAULT_COLL_OPTIONS {true, true, true}
#define CI_COLL_OPTIONS {false, false, true}


static bool lang_Builtin_initialized = false;
static bool lang_Initialized = false;
static bool lang_Init_w_error = false;
static bool lang_Charset_initialized = false;
static bool lang_Language_initialized = false;
static bool lang_Msg_env_initialized = false;

typedef struct lang_defaults LANG_DEFAULTS;
struct lang_defaults
{
  const char *lang_name;
  const INTL_LANG lang;
  const INTL_CODESET codeset;
};

/* Order of language/charset pair is important: first encoutered charset is
 * the default for a language */
static const LANG_DEFAULTS builtin_Langs[] = {
  /* English - ISO-8859-1 - default lang and charset */
  {LANG_NAME_ENGLISH, INTL_LANG_ENGLISH, INTL_CODESET_ISO88591},
  /* English - UTF-8 */
  {LANG_NAME_ENGLISH, INTL_LANG_ENGLISH, INTL_CODESET_UTF8},
  /* Korean - UTF-8 */
  {LANG_NAME_KOREAN, INTL_LANG_KOREAN, INTL_CODESET_UTF8},
  /* Korean - EUC-KR */
  {LANG_NAME_KOREAN, INTL_LANG_KOREAN, INTL_CODESET_KSC5601_EUC},
  /* Korean - ISO-8859-1 : contains romanized names for months, days */
  {LANG_NAME_KOREAN, INTL_LANG_KOREAN, INTL_CODESET_ISO88591},
  /* Turkish - UTF-8 */
  {LANG_NAME_TURKISH, INTL_LANG_TURKISH, INTL_CODESET_UTF8},
  /* Turkish - ISO-8859-1 : contains romanized names for months, days */
  {LANG_NAME_TURKISH, INTL_LANG_TURKISH, INTL_CODESET_ISO88591}
};

/* Turkish collation */
static unsigned int lang_upper_TR[LANG_CHAR_COUNT_TR];
static unsigned int lang_lower_TR[LANG_CHAR_COUNT_TR];
static unsigned int lang_upper_i_TR[LANG_CHAR_COUNT_TR];
static unsigned int lang_lower_i_TR[LANG_CHAR_COUNT_TR];

static char lang_time_format_TR[] = "HH24:MI:SS";
static char lang_date_format_TR[] = "DD.MM.YYYY";
static char lang_datetime_format_TR[] = "HH24:MI:SS.FF DD.MM.YYYY";
static char lang_timestamp_format_TR[] = "HH24:MI:SS DD.MM.YYYY";
static char lang_datetimetz_format_TR[] = "HH24:MI:SS.FF DD.MM.YYYY TZR";
static char lang_timestamptz_format_TR[] = "HH24:MI:SS DD.MM.YYYY TZR";

static void **loclib_Handle = NULL;
static int loclib_Handle_size = 0;
static int loclib_Handle_count = 0;

static TEXT_CONVERSION *console_Conv = NULL;
extern TEXT_CONVERSION con_Iso_8859_1_conv;
extern TEXT_CONVERSION con_Iso_8859_9_conv;

/* all loaded locales */
static LANG_LOCALE_DATA *lang_Loaded_locales[LANG_MAX_LOADED_LOCALES] = { NULL };

static int lang_Count_locales = 0;

static int lang_Count_collations = 0;

/* normalization data */
static UNICODE_NORMALIZATION *generic_Unicode_norm = NULL;

static const DB_CHARSET lang_Db_charsets[] = {
  {"ascii", "US English charset - ASCII encoding", " ", "",
   "", INTL_CODESET_ASCII, 1},
  {"raw-bits", "Uninterpreted bits - Raw encoding", "", "",
   "", INTL_CODESET_RAW_BITS, 1},
  {"raw-bytes", "Uninterpreted bytes - Raw encoding", "", "_binary",
   "binary", INTL_CODESET_BINARY, 1},
  {"iso8859-1", "Latin 1 charset - ISO 8859 encoding", " ", "_iso88591",
   "iso88591", INTL_CODESET_ISO88591, 1},
  {"ksc-euc", "KSC 5601 1990 charset - EUC encoding", "\241\241", "_euckr",
   "euckr", INTL_CODESET_KSC5601_EUC, 2},
  {"utf-8", "UNICODE charset - UTF-8 encoding", " ", "_utf8",
   "utf8", INTL_CODESET_UTF8, 1},
  {"", "", "", "", "", INTL_CODESET_NONE, 0}
};


/*
 * Locales data
 */

#define LOCALE_DUMMY_ALPHABET(codeset)  \
  {ALPHABET_TAILORED, (codeset), 0, 0, NULL, 0, NULL, false}

#define LOCALE_NULL_DATE_FORMATS NULL, NULL, NULL, NULL, NULL, NULL

/* Calendar names and parsing order of these names */
#define LOCALE_NULL_CALENDAR_NAMES  \
  {NULL}, {NULL}, {NULL}, {NULL}, {NULL}, \
  NULL, NULL, NULL, NULL, NULL

static int set_current_locale (void);
static int set_msg_lang_from_env (void);
static int check_env_lang_val (char *env_val, char *lang_name, char **charset_ptr, INTL_CODESET * codeset);
static void set_default_lang (void);
static void lang_unload_libraries (void);
static void destroy_user_locales (void);
static int init_user_locales (void);
static LANG_LOCALE_DATA *find_lang_locale_data (const char *name, const INTL_CODESET codeset,
                        LANG_LOCALE_DATA ** last_lang_locale);
static int register_lang_locale_data (LANG_LOCALE_DATA * lld);
static void free_lang_locale_data (LANG_LOCALE_DATA * lld);
static int register_collation (LANG_COLLATION * coll);

static bool lang_is_codeset_allowed (const INTL_LANG intl_id, const INTL_CODESET codeset);
static int lang_get_builtin_lang_id_from_name (const char *lang_name, INTL_LANG * lang_id);
static INTL_CODESET lang_get_default_codeset (const INTL_LANG intl_id);

static int lang_strmatch_byte (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1,
                   int size1, const unsigned char *str2, int size2, const unsigned char *escape,
                   const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
static int lang_fastcmp_byte (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
                  const unsigned char *string2, const int size2, bool ignore_trailing_space);
static int lang_fastcmp_binary (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
                const unsigned char *string2, const int size2, bool ignore_trailing_space);
static int lang_strmatch_binary (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
                 const unsigned char *str2, int size2, const unsigned char *escape,
                 const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
static int lang_next_alpha_char_iso88591 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
                      unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
static int lang_next_coll_byte (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
                unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
static int lang_strcmp_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
                 const unsigned char *str2, const int size2, bool ignore_trailing_space);
static int lang_strmatch_utf8 (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
                   const unsigned char *str2, int size2, const unsigned char *escape,
                   const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
static int lang_strcmp_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
                     const unsigned char *str2, const int size2, bool ignore_trailing_space);
static unsigned int lang_get_w_first_el (const COLL_DATA * coll, const unsigned char *str, const int str_size,
                     unsigned char **next_char, bool ignore_trailing_space);
static int lang_strmatch_utf8_w_contr (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1,
                       int size1, const unsigned char *str2, int size2, const unsigned char *escape,
                       const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
static COLL_CONTRACTION *lang_get_contr_for_string (const COLL_DATA * coll_data, const unsigned char *str,
                            const int str_size, unsigned int cp);
static void lang_get_uca_w_l13 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str,
                const int size, UCA_L13_W ** uca_w_l13, int *num_ce, unsigned char **str_next,
                unsigned int *cp_out);
static void lang_get_uca_back_weight_l13 (const COLL_DATA * coll_data, const bool use_contractions,
                      const unsigned char *str_start, const unsigned char *str_last,
                      UCA_L13_W ** uca_w_l13, int *num_ce, unsigned char **str_prev,
                      unsigned int *cp_out);
static void lang_get_uca_w_l4 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str,
                   const int size, UCA_L4_W ** uca_w_l4, int *num_ce, unsigned char **str_next,
                   unsigned int *cp_out);
static int lang_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, const int level, bool is_match,
                       const unsigned char *str1, const int size1, const unsigned char *str2,
                       const int size2, const unsigned char *escape, const bool has_last_escape,
                       int *offset_next_level, int *str1_match_size, bool ignore_trailing_space);
static int lang_back_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, bool is_match, const unsigned char *str1,
                        const int size1, const unsigned char *str2, const int size2,
                        const unsigned char *escape, const bool has_last_escape,
                        int *offset_next_level, int *str1_match_size,
                        bool ignore_trailing_space);
static int lang_strcmp_utf8_uca (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
                 const unsigned char *str2, const int size2, bool ignore_trailing_space);
static int lang_strmatch_utf8_uca (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1,
                   const int size1, const unsigned char *str2, const int size2,
                   const unsigned char *escape, const bool has_last_escape, int *str1_match_size,
                   bool ignore_trailing_space);
static int lang_str_utf8_trail_zero_weights (const LANG_COLLATION * lang_coll, const unsigned char *str, int size);
static int lang_str_utf8_trail_zero_weights_w_exp (const COLL_DATA * coll_data, const int level,
                           const unsigned char *str, int size);
static int lang_next_coll_char_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
                     unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
static int lang_next_coll_seq_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
                        unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
static int lang_split_key_iso (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
                   const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
                   int *byte_size, bool ignore_trailing_space);
static int lang_split_key_byte (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
                const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
                int *byte_size, bool ignore_trailing_space);
static int lang_split_key_binary (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
                  const int size1, const unsigned char *str2, const int size2,
                  const unsigned char **key, int *byte_size, bool ignore_trailing_space);
static int lang_split_key_utf8 (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
                const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
                int *byte_size, bool ignore_trailing_space);
static int lang_split_key_w_exp (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
                 const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
                 int *byte_size, bool ignore_trailing_space);
static int lang_split_key_euckr (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
                 const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
                 int *byte_size, bool ignore_trailing_space);
static unsigned int lang_mht2str_byte (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
static unsigned int lang_mht2str_default (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
static unsigned int lang_mht2str_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
static unsigned int lang_mht2str_utf8_exp (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
static unsigned int lang_mht2str_ko (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
static void lang_init_coll_en_ci (LANG_COLLATION * lang_coll);
static void lang_init_coll_en_cs (LANG_COLLATION * lang_coll);
static void lang_init_coll_Utf8_tr_cs (LANG_COLLATION * lang_coll);
static int lang_fastcmp_ko (const LANG_COLLATION * lang_coll, const unsigned char *string1, int size1,
                const unsigned char *string2, int size2, bool ignore_trailing_space);
static int lang_strmatch_ko (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
                 const unsigned char *str2, int size2, const unsigned char *escape,
                 const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
static int lang_next_alpha_char_ko (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
                    unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
static int lang_locale_load_alpha_from_lib (ALPHABET_DATA * a, bool load_w_identifier_name, const char *alpha_suffix,
                        void *lib_handle, const LOCALE_FILE * lf);
static int lang_locale_load_normalization_from_lib (UNICODE_NORMALIZATION * norm, void *lib_handle,
                            const LOCALE_FILE * lf);
static void lang_free_collations (void);

/* English collation */
static unsigned int lang_upper_EN[LANG_CHAR_COUNT_EN];
static unsigned int lang_lower_EN[LANG_CHAR_COUNT_EN];

#if !defined(LANG_W_MAP_COUNT_EN)
#define LANG_W_MAP_COUNT_EN 256
#endif
static int lang_w_map_EN[LANG_W_MAP_COUNT_EN];


static void lang_initloc_en_iso88591 (LANG_LOCALE_DATA * ld);

static void lang_initloc_en_binary (LANG_LOCALE_DATA * ld);

static LANG_COLLATION coll_Utf8_en_cs = {
  INTL_CODESET_UTF8, 1, 1, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_UTF8_EN_CS, "utf8_en_cs",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_cs, lang_Next_alpha_char_EN_cs,
   lang_Weight_EN_cs_ti, lang_Next_alpha_char_EN_cs_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "1bdb1b1f630edc508be37f66dfdce7b0"},
  lang_fastcmp_byte,
  lang_strmatch_utf8,
  lang_next_coll_char_utf8,
  lang_split_key_utf8,
  lang_mht2str_byte,
  lang_init_coll_en_cs
};

static void lang_initloc_en_utf8 (LANG_LOCALE_DATA * ld);

static void lang_initloc_tr_iso (LANG_LOCALE_DATA * ld);

static void lang_initloc_ko_iso (LANG_LOCALE_DATA * ld);

static void lang_initloc_ko_utf8 (LANG_LOCALE_DATA * ld);

static void lang_initloc_ko_euc (LANG_LOCALE_DATA * ld);

static void lang_initloc_tr_utf8 (LANG_LOCALE_DATA * ld);

static LANG_COLLATION coll_Iso88591_en_cs = {
  INTL_CODESET_ISO88591, 1, 0, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_ISO_EN_CS, "iso88591_en_cs",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_cs, lang_Next_alpha_char_EN_cs,
   lang_Weight_EN_cs_ti, lang_Next_alpha_char_EN_cs_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "707cef004e58be204d999d8a2abb4cc3"},
  lang_fastcmp_byte,
  lang_strmatch_byte,
  lang_next_alpha_char_iso88591,
  lang_split_key_iso,
  lang_mht2str_default,
  NULL
};

/* locale data */
static LANG_LOCALE_DATA lc_English_iso88591 = {
  NULL,
  LANG_NAME_ENGLISH,
  INTL_LANG_ENGLISH,
  INTL_CODESET_ISO88591,
  /* alphabet for user strings */
  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
  /* alphabet for identifiers strings */
  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
  &coll_Iso88591_en_cs,
  NULL,             /* console text conversion */
  false,
  NULL,             /* time, date, date-time, timestamp */
  NULL,             /* datetimetz, timestamptz format */
  NULL,
  NULL,
  NULL,
  NULL,
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  '.',
  ',',
  DB_CURRENCY_DOLLAR,
  LANG_NO_NORMALIZATION,
  (char *) "6ae1bf7f15e6f132c4361761d203c1b4",
  lang_initloc_en_iso88591,
  false
};

/* locale data */
static LANG_LOCALE_DATA lc_English_utf8 = {
  NULL,
  LANG_NAME_ENGLISH,
  INTL_LANG_ENGLISH,
  INTL_CODESET_UTF8,
  {ALPHABET_ASCII, INTL_CODESET_UTF8, LANG_CHAR_COUNT_EN, 1, lang_lower_EN, 1,
   lang_upper_EN,
   false},
  {ALPHABET_ASCII, INTL_CODESET_UTF8, LANG_CHAR_COUNT_EN, 1, lang_lower_EN, 1,
   lang_upper_EN,
   false},
  &coll_Utf8_en_cs,
  &con_Iso_8859_1_conv,     /* text conversion */
  false,
  NULL,             /* time, date, date-time, timestamp */
  NULL,             /* datetimetz, timestamptz format */
  NULL,
  NULL,
  NULL,
  NULL,
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  '.',
  ',',
  DB_CURRENCY_DOLLAR,
  LANG_NO_NORMALIZATION,
  (char *) "945bead220ece6f4d020403835308785",
  lang_initloc_en_utf8,
  false
};

/* Turkish in ISO-8859-1 charset : limited support (only date - formats) */
static LANG_LOCALE_DATA lc_Turkish_iso88591 = {
  NULL,
  LANG_NAME_TURKISH,
  INTL_LANG_TURKISH,
  INTL_CODESET_ISO88591,
  /* user alphabet : same as English ISO */
  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
  /* identifiers alphabet : same as English ISO */
  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
  &coll_Iso88591_en_cs,     /* collation : same as English ISO */
  NULL,             /* console text conversion */
  false,
  lang_time_format_TR,
  lang_date_format_TR,
  lang_datetime_format_TR,
  lang_timestamp_format_TR,
  lang_datetimetz_format_TR,
  lang_timestamptz_format_TR,
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  ',',
  '.',
  DB_CURRENCY_TL,
  LANG_NO_NORMALIZATION,
  (char *) "b9ac135bdf8100b205ebb6b7e0e9c3df",
  lang_initloc_tr_iso,
  false
};


static LANG_LOCALE_DATA lc_Korean_iso88591 = {
  NULL,
  LANG_NAME_KOREAN,
  INTL_LANG_KOREAN,
  INTL_CODESET_ISO88591,
  /* alphabet : same as English ISO */
  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
  /* identifiers alphabet : same as English ISO */
  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
  &coll_Iso88591_en_cs,     /* collation : same as English ISO */
  NULL,             /* console text conversion */
  false,
  NULL,             /* time, date, date-time, timestamp */
  NULL,             /* datetimetz, timestamptz format */
  NULL,
  NULL,
  NULL,
  NULL,
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  '.',
  ',',
  DB_CURRENCY_WON,
  LANG_NO_NORMALIZATION,
  (char *) "8710ffb79b191c2158d4c498e8bc7dea",
  lang_initloc_ko_iso,
  false
};

static LANG_COLLATION coll_Utf8_ko_cs = {
  INTL_CODESET_UTF8, 1, 1, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data - same as en_US.utf8 */
  {LANG_COLL_UTF8_KO_CS, "utf8_ko_cs",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_cs, lang_Next_alpha_char_EN_cs,
   lang_Weight_EN_cs_ti, lang_Next_alpha_char_EN_cs_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "422c85ede1e265a761078763d2240c81"},
  lang_strcmp_utf8,
  lang_strmatch_utf8,
  lang_next_coll_char_utf8,
  lang_split_key_utf8,
  lang_mht2str_utf8,
  lang_init_coll_en_cs
};

/* built-in support of Korean in UTF-8 : date-time conversions as in English
 * collation : by codepoints
 * this needs to be overriden by user defined locale */
static LANG_LOCALE_DATA lc_Korean_utf8 = {
  NULL,
  LANG_NAME_KOREAN,
  INTL_LANG_KOREAN,
  INTL_CODESET_UTF8,
  {ALPHABET_ASCII, INTL_CODESET_UTF8, LANG_CHAR_COUNT_EN, 1, lang_lower_EN, 1,
   lang_upper_EN, false},
  {ALPHABET_ASCII, INTL_CODESET_UTF8, LANG_CHAR_COUNT_EN, 1, lang_lower_EN, 1,
   lang_upper_EN, false},
  &coll_Utf8_ko_cs,     /* collation */
  NULL,             /* console text conversion */
  false,
  NULL,             /* time, date, date-time, timestamp */
  NULL,             /* datetimetz, timestamptz format */
  NULL,
  NULL,
  NULL,
  NULL,
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  '.',
  ',',
  DB_CURRENCY_WON,
  LANG_NO_NORMALIZATION,
  (char *) "802cff8e10d857952241d19b50a13a27",
  lang_initloc_ko_utf8,
  false
};


static LANG_COLLATION coll_Euckr_bin = {
  INTL_CODESET_KSC5601_EUC, 1, 0, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_EUCKR_BINARY, "euckr_bin",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_cs, lang_Next_alpha_char_EN_cs,
   lang_Weight_EN_cs_ti, lang_Next_alpha_char_EN_cs_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "18fb633e87f0a3a785ef38cf2a6a7789"},
  lang_fastcmp_ko,
  lang_strmatch_ko,
  lang_next_alpha_char_ko,
  lang_split_key_euckr,
  lang_mht2str_ko,
  lang_init_coll_en_cs
};

/* built-in support of Korean in EUC-KR : date-time conversions as in English
 * collation : binary */
static LANG_LOCALE_DATA lc_Korean_euckr = {
  NULL,
  LANG_NAME_KOREAN,
  INTL_LANG_KOREAN,
  INTL_CODESET_KSC5601_EUC,
  /* alphabet */
  {ALPHABET_TAILORED, INTL_CODESET_KSC5601_EUC, 0, 0, NULL, 0, NULL, false},
  /* identifiers alphabet */
  {ALPHABET_TAILORED, INTL_CODESET_KSC5601_EUC, 0, 0, NULL, 0, NULL, false},
  &coll_Euckr_bin,      /* collation */
  NULL,             /* console text conversion */
  false,
  NULL,             /* time, date, date-time, timestamp */
  NULL,             /* datetimetz, timestamptz */
  NULL,
  NULL,
  NULL,
  NULL,
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  '.',
  ',',
  DB_CURRENCY_WON,
  LANG_NO_NORMALIZATION,
  (char *) "c46ff948b4147323edfba0c51f96fe47",
  lang_initloc_ko_euc,
  false
};

static LANG_COLLATION coll_Binary = {
  INTL_CODESET_BINARY, 1, 0, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_BINARY, "binary",
   LANG_COLL_GENERIC_SORT_OPT,
   NULL, NULL,
   NULL, NULL,
   0,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "93fbdcc87193d2783b2396c6bec068bb"},
  lang_fastcmp_binary,
  lang_strmatch_binary,
  lang_next_alpha_char_iso88591,
  lang_split_key_binary,
  lang_mht2str_default,
  NULL
};

static LANG_LOCALE_DATA lc_English_binary = {
  NULL,
  LANG_NAME_ENGLISH,
  INTL_LANG_ENGLISH,
  INTL_CODESET_BINARY,
  LOCALE_DUMMY_ALPHABET (INTL_CODESET_BINARY),
  LOCALE_DUMMY_ALPHABET (INTL_CODESET_BINARY),
  &coll_Binary,         /* collation */
  NULL,             /* console text conversion */
  false,
  LOCALE_NULL_DATE_FORMATS, /* time, date, date-time, timestamp format */
  LOCALE_NULL_CALENDAR_NAMES,
  '.',
  ',',
  DB_CURRENCY_DOLLAR,
  LANG_NO_NORMALIZATION,
  (char *) "390462b716493cbd74c77f545a77a2bf",
  lang_initloc_en_binary,
  false
};

static LANG_LOCALE_DATA *lang_Loc_data = &lc_English_iso88591;

static LANG_COLLATION coll_Iso_binary = {
  INTL_CODESET_ISO88591, 1, 0, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_ISO_BINARY, "iso88591_bin",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_cs, lang_Next_alpha_char_EN_cs,
   lang_Weight_EN_cs_ti, lang_Next_alpha_char_EN_cs_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "54735f231842c3a673161fc90670989b"},
  lang_fastcmp_byte,
  lang_strmatch_byte,
  lang_next_alpha_char_iso88591,
  lang_split_key_iso,
  lang_mht2str_default,
  NULL
};

static LANG_COLLATION coll_Utf8_binary = {
  INTL_CODESET_UTF8, 1, 0, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_UTF8_BINARY, "utf8_bin",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_cs, lang_Next_alpha_char_EN_cs,
   lang_Weight_EN_cs_ti, lang_Next_alpha_char_EN_cs_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "d16a9a3825e263f76028c1e8c3cd043d"},
  /* compare functions handles bytes, no need to handle UTF-8 chars */
  lang_fastcmp_byte,
  lang_strmatch_utf8,
  /* 'next' and 'split_point' functions must handle UTF-8 chars */
  lang_next_coll_char_utf8,
  lang_split_key_utf8,
  lang_mht2str_byte,
  NULL
};

static LANG_COLLATION coll_Iso88591_en_ci = {
  INTL_CODESET_ISO88591, 1, 0, CI_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_ISO_EN_CI, "iso88591_en_ci",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_ci, lang_Next_alpha_char_EN_ci,
   lang_Weight_EN_ci_ti, lang_Next_alpha_char_EN_ci_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "b3fb4c073fbc76c5ec302da9128d9542"},
  lang_fastcmp_byte,
  lang_strmatch_byte,
  lang_next_coll_byte,
  lang_split_key_byte,
  lang_mht2str_byte,
  lang_init_coll_en_ci
};

static LANG_COLLATION coll_Utf8_en_ci = {
  INTL_CODESET_UTF8, 1, 1, CI_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_UTF8_EN_CI, "utf8_en_ci",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_EN_ci, lang_Next_alpha_char_EN_ci,
   lang_Weight_EN_ci_ti, lang_Next_alpha_char_EN_ci_ti,
   LANG_CHAR_COUNT_EN,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "3050bc8e9814b196f4bbb84759aab77c"},
  lang_fastcmp_byte,
  lang_strmatch_utf8,
  lang_next_coll_char_utf8,
  lang_split_key_utf8,
  lang_mht2str_byte,
  lang_init_coll_en_ci
};

static LANG_COLLATION coll_Utf8_tr_cs = {
  INTL_CODESET_UTF8, 1, 1, DEFAULT_COLL_OPTIONS, NULL,
  /* collation data */
  {LANG_COLL_UTF8_TR_CS, "utf8_tr_cs",
   LANG_COLL_GENERIC_SORT_OPT,
   lang_Weight_TR, lang_Next_alpha_char_TR,
   lang_Weight_TR_ti, lang_Next_alpha_char_TR_ti,
   LANG_CHAR_COUNT_TR,
   LANG_COLL_NO_EXP,
   LANG_COLL_NO_CONTR,
   "52f12f045d2fc90c3a818d0b334485d7"},
  lang_strcmp_utf8,
  lang_strmatch_utf8,
  lang_next_coll_char_utf8,
  lang_split_key_utf8,
  lang_mht2str_utf8,
  lang_init_coll_Utf8_tr_cs
};

static LANG_LOCALE_DATA lc_Turkish_utf8 = {
  NULL,
  LANG_NAME_TURKISH,
  INTL_LANG_TURKISH,
  INTL_CODESET_UTF8,
  {ALPHABET_ASCII, INTL_CODESET_UTF8, LANG_CHAR_COUNT_TR, 1, lang_lower_TR, 1,
   lang_upper_TR, false},
  {ALPHABET_TAILORED, INTL_CODESET_UTF8, LANG_CHAR_COUNT_TR, 1,
   lang_lower_i_TR, 1, lang_upper_i_TR, false},
  &coll_Utf8_tr_cs,
  &con_Iso_8859_9_conv,     /* console text conversion */
  false,
  lang_time_format_TR,
  lang_date_format_TR,
  lang_datetime_format_TR,
  lang_timestamp_format_TR,
  lang_datetimetz_format_TR,
  lang_timestamptz_format_TR,
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  {NULL},
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  ',',
  '.',
  DB_CURRENCY_TL,
  LANG_NO_NORMALIZATION,
  (char *) "a6c90a844ad44f78d0b1a3a9a87ddb2f",
  lang_initloc_tr_utf8,
  false
};

static LANG_COLLATION *built_In_collations[] = {
  &coll_Iso_binary,
  &coll_Utf8_binary,
  &coll_Iso88591_en_cs,
  &coll_Iso88591_en_ci,
  &coll_Utf8_en_cs,
  &coll_Utf8_en_ci,
  &coll_Utf8_tr_cs,
  &coll_Utf8_ko_cs,
  &coll_Euckr_bin,
  &coll_Binary
};

/*
 * lang_init_builtin - Initializes the built-in available languages and sets
 *             message catalog language according to env
 *
 *   return: error code
 *
 */
void
lang_init_builtin (void)
{
  int i;

  if (lang_Builtin_initialized)
    {
      return;
    }

  (void) set_msg_lang_from_env ();

  /* init all collation placeholders with ISO binary collation */
  for (i = 0; i < LANG_MAX_COLLATIONS; i++)
    {
      lang_Collations[i] = &coll_Iso_binary;
    }

  /* built-in collations : order of registration should match colation ID */
  for (i = 0; i < (int) (sizeof (built_In_collations) / sizeof (built_In_collations[0])); i++)
    {
      (void) register_collation (built_In_collations[i]);
    }

  /* register all built-in locales allowed in current charset Support for multiple locales is required for switching
   * function context string - data/time , string - number conversions */

  /* built-in locales with ISO codeset */
  (void) register_lang_locale_data (&lc_English_iso88591);
  (void) register_lang_locale_data (&lc_Korean_iso88591);
  (void) register_lang_locale_data (&lc_Turkish_iso88591);

  (void) register_lang_locale_data (&lc_Korean_euckr);

  /* built-in locales with UTF-8 codeset : should be loaded last */
  (void) register_lang_locale_data (&lc_English_utf8);
  (void) register_lang_locale_data (&lc_Korean_utf8);
  (void) register_lang_locale_data (&lc_Turkish_utf8);
  (void) register_lang_locale_data (&lc_English_binary);

  lang_Builtin_initialized = true;
}

/*
 * lang_init - Initializes the multi-language module
 *
 *   return: error code
 *
 *  Note : Initializes available built-in and LDML locales.
 *     System charset and language information is not available and is not
 *     set here.
 */
int
lang_init (void)
{
  int error = NO_ERROR;

  if (lang_Initialized)
    {
      return (lang_Init_w_error) ? ER_LOC_INIT : NO_ERROR;
    }

  lang_init_builtin ();

  assert (!lang_Charset_initialized && !lang_Language_initialized);

  /* load & register user locales (no matter the default DB codeset) */
  error = init_user_locales ();
  if (error != NO_ERROR)
    {
      lang_Init_w_error = true;
    }

  lang_Initialized = true;

  return error;
}

/*
 * lang_init_console_txt_conv - Initializes console text conversion
 *
 */
void
lang_init_console_txt_conv (void)
{
  char *sys_id = NULL;
  char *conv_sys_ids = NULL;
#if defined(WINDOWS)
  UINT cp;
  char win_codepage_str[32];
#endif

  assert (lang_Initialized);
  assert (lang_Loc_data != NULL);

  if (lang_Loc_data == NULL || lang_Loc_data->txt_conv == NULL)
    {
#if !defined(WINDOWS)
      (void) setlocale (LC_CTYPE, "");
#endif
      return;
    }

#if defined(WINDOWS)
  cp = GetConsoleCP ();
  snprintf (win_codepage_str, sizeof (win_codepage_str) - 1, "%d", cp);

  sys_id = win_codepage_str;
  conv_sys_ids = lang_Loc_data->txt_conv->win_codepages;
#else
  /* setlocale with empty string forces the current locale : this is required to retrieve codepage id, but as a
   * side-effect modifies the behavior of string utility functions such as 'snprintf' to support current locale charset
   */
  if (setlocale (LC_CTYPE, "") != NULL)
    {
      sys_id = nl_langinfo (CODESET);
      conv_sys_ids = lang_Loc_data->txt_conv->nl_lang_str;
    }
#endif

  if (sys_id != NULL && conv_sys_ids != NULL)
    {
      char *conv_sys_end = conv_sys_ids + strlen (conv_sys_ids);
      char *found_token;

      /* supported system identifiers for conversion are separated by comma */
      do
    {
      found_token = strstr (conv_sys_ids, sys_id);
      if (found_token == NULL)
        {
          break;
        }

      if (found_token + strlen (sys_id) >= conv_sys_end || *(found_token + strlen (sys_id)) == ','
          || *(found_token + strlen (sys_id)) == ' ')
        {
          if (lang_Loc_data->txt_conv->init_conv_func != NULL)
        {
          lang_Loc_data->txt_conv->init_conv_func ();
        }
          console_Conv = lang_Loc_data->txt_conv;
          break;
        }
      else
        {
          conv_sys_ids = conv_sys_ids + strlen (sys_id);
        }
    }
      while (conv_sys_ids < conv_sys_end);
    }
}

/*
 * set_current_locale - Initializes current locale from global variables
 *          'lang_Lang_name' and 'lang_Loc_charset';
 *          if these are invalid current locale is initialized
 *          with default locale (en_US.iso88591), and error is
 *          returned.
 *
 *  return : error code
 */
static int
set_current_locale (void)
{
  bool found = false;

  lang_get_lang_id_from_name (lang_Lang_name, &lang_Lang_id);

  for (lang_Loc_data = lang_Loaded_locales[lang_Lang_id]; lang_Loc_data != NULL;
       lang_Loc_data = lang_Loc_data->next_lld)
    {
      assert (lang_Loc_data != NULL);

      if (lang_Loc_data->codeset == lang_Loc_charset && strcasecmp (lang_Lang_name, lang_Loc_data->lang_name) == 0)
    {
      found = true;
      break;
    }
    }

  if (!found)
    {
      char err_msg[ERR_MSG_SIZE];

      lang_Init_w_error = true;
      snprintf_dots_truncate (err_msg, sizeof (err_msg) - 1, "Locale %s.%s was not loaded.\n"
                  " %s not found in cubrid_locales.txt", lang_Lang_name,
                  lang_get_codeset_name (lang_Loc_charset), lang_Lang_name);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      set_default_lang ();
    }

  /* at this point we have locale : either the user selected or default one */
  assert (lang_Loc_data != NULL);
  lang_Loc_currency = lang_Loc_data->default_currency_code;

  /* static globals in db_date.c should also be initialized with the current locale (for parsing local am/pm strings
   * for times) */
  db_date_locale_init ();

  return lang_Init_w_error ? ER_LOC_INIT : NO_ERROR;
}

/*
 * set_msg_lang_from_env - Initializes language for catalog messages from
 *             environment
 *
 *   return: NO_ERROR if success
 *
 *  Note : This function sets the following global variables according to
 *      - lang_Msg_loc_name : <lang>.<charset>; en_US.utf8;
 *        if $CUBRID_MSG_LANG is not set, then en_US is used
 */
static int
set_msg_lang_from_env (void)
{
  const char *env;
  char *charset = NULL;
  char err_msg[ERR_MSG_SIZE];
  int status = NO_ERROR;

  if (lang_Msg_env_initialized)
    {
      return status;
    }

  /* set flag as set; this function will set the messages language either to environment or leave it default value */
  lang_Msg_env_initialized = true;

  /*
   * Determines the messages language by examining environment variables.
   * We check the optional variable CUBRID_MSG_LANG, which decides the
   * locale for catalog messages; if not set, en_US is used for catalog
   * messages
   */

  env = envvar_get ("MSG_LANG");
  if (env != NULL)
    {
      INTL_CODESET dummy_cs;
      char msg_lang[LANG_MAX_LANGNAME];

      strncpy_bufsize (lang_Msg_loc_name, env);

      status = check_env_lang_val (lang_Msg_loc_name, msg_lang, &charset, &dummy_cs);
      if (status != NO_ERROR)
    {
      sprintf (err_msg, "invalid value '%s' for CUBRID_MSG_LANG", lang_Msg_loc_name);
      strcpy (lang_Msg_loc_name, LANG_NAME_ENGLISH "." LANG_CHARSET_UTF8);
      return ER_LOC_INIT;
    }
      else
    {
      if (charset == NULL && strcasecmp (msg_lang, "en_US") != 0)
        {
          /* by default all catalog message folders are in .utf8, unless otherwise specified */
          assert (strlen (lang_Msg_loc_name) == 5);
          strcat (lang_Msg_loc_name, ".utf8");
        }
    }
    }

  lang_Msg_env_initialized = true;

  return NO_ERROR;
}

/*
 * lang_set_charset_lang - Initializes language and charset from a locale
 *             string
 *
 *   return: NO_ERROR if success
 *
 *  Note : This function sets the following global variables according to
 *     input:
 *      - lang_Loc_name : resolved locale string: <lang>.<charset>
 *      - lang_Lang_name : <lang> string part (without <charset>)
 *      - lang_Lang_id: id of language
 *      - lang_Loc_charset : charset id : ISO-8859-1, UTF-8 or EUC-KR
 *      - lang_Loc_data: pointer to locale (struct) used by sistem
 */
int
lang_set_charset_lang (const char *lang_charset)
{
  char *charset = NULL;
  char err_msg[ERR_MSG_SIZE];
  int status = NO_ERROR;

  assert (lang_Initialized);
  assert (!lang_Init_w_error);

  lang_Charset_initialized = true;
  lang_Language_initialized = true;

  if (lang_charset != NULL)
    {
      strncpy_bufsize (lang_Loc_name, lang_charset);
    }
  else
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOC_INIT, 1, "Invalid language initialization string");
      return ER_LOC_INIT;
    }

  lang_Loc_charset = INTL_CODESET_NONE;
  status = check_env_lang_val (lang_Loc_name, lang_Lang_name, &charset, &lang_Loc_charset);
  if (status != NO_ERROR)
    {
      sprintf (err_msg, "invalid value %s for charset", lang_Loc_name);
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOC_INIT, 1, err_msg);
      return ER_LOC_INIT;
    }

  if (lang_Loc_charset == INTL_CODESET_NONE)
    {
      /* no charset provided in $CUBRID_MSG_LANG */
      (void) lang_get_builtin_lang_id_from_name (lang_Lang_name, &lang_Lang_id);
      lang_Loc_charset = lang_get_default_codeset (lang_Lang_id);
      if (!lang_is_codeset_allowed (lang_Lang_id, lang_Loc_charset))
    {
      set_default_lang ();
      goto error_codeset;
    }
    }
  else if (lang_Loc_charset != INTL_CODESET_UTF8)
    {
      /* not UTF-8 charset, it has to be a built-in language */
      (void) lang_get_builtin_lang_id_from_name (lang_Loc_name, &lang_Lang_id);
      if (!lang_is_codeset_allowed (lang_Lang_id, lang_Loc_charset))
    {
      goto error_codeset;
    }
    }

  status = set_current_locale ();
  tp_apply_sys_charset ();

  return status;

error_codeset:
  sprintf (err_msg, "codeset %s for language %s is not supported", charset, lang_Lang_name);
  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOC_INIT, 1, err_msg);

  return ER_LOC_INIT;
}

/*
 * lang_set_charset - Set system charset
 *
 *  return : error code
 *
 */
int
lang_set_charset (const INTL_CODESET codeset)
{
  if (codeset < INTL_CODESET_ISO88591 || codeset > INTL_CODESET_LAST)
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOC_INIT, 1, "Codeset is not valid");
      return ER_LOC_INIT;
    }

  lang_Loc_charset = codeset;
  lang_Charset_initialized = true;

  tp_apply_sys_charset ();
  return NO_ERROR;
}

/*
 * lang_set_language - Set system language, and system locale
 *             The system charset must be set prior to this.
 *
 *  return : error code
 *
 */
int
lang_set_language (const char *lang_str)
{
  char full_locale_name[LANG_MAX_LANGNAME];

  assert (lang_str != NULL);

  if (!lang_Charset_initialized)
    {
      assert (false);
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOC_INIT, 1,
          "Internal error: must set charset first before setting " "language");
      return ER_LOC_INIT;
    }

  (void) lang_get_charset_env_string (full_locale_name, sizeof (full_locale_name), lang_str, lang_charset ());

  return lang_set_charset_lang (full_locale_name);
}

/*
 * check_env_lang_val - check and normalizes the environment variable value;
 *          gets the language and charset parts
 *
 *   return: NO_ERROR if success
 *
 *   env_val(in/out): value; Example : "En_US.UTF8" -> en_US.utf8
 *   lang_name(out): language part : en_US
 *   charset_ptr(out): pointer in env_val to charset part : utf8
 *   codeset(out): codeset value, according to charset part or
 *         INTL_CODESET_NODE, if charset part is empty
 *
 */
static int
check_env_lang_val (char *env_val, char *lang_name, char **charset_ptr, INTL_CODESET * codeset)
{
  char *charset;

  assert (env_val != NULL);
  assert (lang_name != NULL);
  assert (charset_ptr != NULL);

  /* strip quotas : */
  envvar_trim_char (env_val, (int) '\"');

  /* Locale should be formated like xx_XX.charset or xx_XX */
  charset = strchr (env_val, '.');
  *charset_ptr = charset;
  if (charset != NULL)
    {
      strncpy (lang_name, env_val, charset - env_val);
      lang_name[charset - env_val] = '\0';

      charset++;
      if (strcasecmp (charset, LANG_CHARSET_EUCKR) == 0 || strcasecmp (charset, LANG_CHARSET_EUCKR_ALIAS1) == 0)
    {
      *codeset = INTL_CODESET_KSC5601_EUC;
      strcpy (charset, LANG_CHARSET_EUCKR);
    }
      else if (strcasecmp (charset, LANG_CHARSET_UTF8) == 0 || strcasecmp (charset, LANG_CHARSET_UTF8_ALIAS1) == 0)
    {
      *codeset = INTL_CODESET_UTF8;
      strcpy (charset, LANG_CHARSET_UTF8);
    }
      else if (strcasecmp (charset, LANG_CHARSET_ISO88591) == 0
           || strcasecmp (charset, LANG_CHARSET_ISO88591_ALIAS1) == 0
           || strcasecmp (charset, LANG_CHARSET_ISO88591_ALIAS2) == 0)
    {
      *codeset = INTL_CODESET_ISO88591;
      strcpy (charset, LANG_CHARSET_ISO88591);
    }
      else
    {
      return ER_FAILED;
    }
    }
  else
    {
      strcpy (lang_name, env_val);
    }

  if (strlen (lang_name) == 5)
    {
      intl_toupper_iso8859 ((unsigned char *) lang_name + 3, 2);
      intl_tolower_iso8859 ((unsigned char *) lang_name, 2);
    }
  else
    {
      return ER_FAILED;
    }

  memcpy (env_val, lang_name, strlen (lang_name));

  return NO_ERROR;
}

/*
 * set_default_lang -
 *   return:
 *
 */
static void
set_default_lang (void)
{
  lang_Lang_id = INTL_LANG_ENGLISH;
  strncpy (lang_Loc_name, LANG_NAME_DEFAULT, sizeof (lang_Loc_name));
  strncpy (lang_Lang_name, LANG_NAME_DEFAULT, sizeof (lang_Lang_name));
  lang_Loc_data = &lc_English_iso88591;
  lang_Loc_charset = lang_Loc_data->codeset;
  lang_Loc_currency = lang_Loc_data->default_currency_code;
}

/*
 * lang_locales_count -
 *   return: number of locales in the system
 */
int
lang_locales_count (bool check_codeset)
{
  int i;
  int count;

  if (!check_codeset)
    {
      return lang_Count_locales;
    }

  count = 0;
  for (i = 0; i < lang_Count_locales; i++)
    {
      LANG_LOCALE_DATA *lld = lang_Loaded_locales[i];
      do
    {
      count++;
      lld = lld->next_lld;
    }
      while (lld != NULL);
    }

  return count;
}

/*
 * init_user_locales -
 *   return: error code
 *
 */
static int
init_user_locales (void)
{
  LOCALE_FILE *user_lf = NULL;
  int num_user_loc = 0, i;
  int er_status = NO_ERROR;

  er_status = locale_get_cfg_locales (&user_lf, &num_user_loc, true);
  if (er_status != NO_ERROR)
    {
      goto error;
    }

  loclib_Handle_size = num_user_loc;
  loclib_Handle_count = 0;

  if (num_user_loc == 0)
    {
      /* no extra locales : nothing to do */
      er_status = NO_ERROR;
      goto exit;
    }
  assert (num_user_loc > 0);

  loclib_Handle = (void **) malloc (loclib_Handle_size * sizeof (void *));
  if (loclib_Handle == NULL)
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_OUT_OF_VIRTUAL_MEMORY, 1, loclib_Handle_size * sizeof (void *));
      er_status = ER_OUT_OF_VIRTUAL_MEMORY;
      goto error;
    }

  for (i = 0; i < num_user_loc; i++)
    {
      /* load user locale */
      LANG_LOCALE_DATA *lld = NULL;
      LANG_LOCALE_DATA *last_lang_locale = NULL;
      INTL_LANG l_id;
      bool is_new_locale = false;

      er_status = locale_check_and_set_default_files (&(user_lf[i]), true);
      if (er_status != NO_ERROR)
    {
      goto error;
    }

      loclib_Handle[loclib_Handle_count] = NULL;
      er_status = lang_load_library (user_lf[i].lib_file, &(loclib_Handle[loclib_Handle_count]));
      if (er_status != NO_ERROR)
    {
      goto error;
    }
      loclib_Handle_count++;

      lld = find_lang_locale_data (user_lf[i].locale_name, INTL_CODESET_UTF8, &last_lang_locale);

      if (lld != NULL)
    {
      /* user customization : overwrite built-in locale */
      if (lld->is_user_data)
        {
          char err_msg[ERR_MSG_SIZE];

          snprintf (err_msg, sizeof (err_msg) - 1, "Duplicate user locale : %s", lld->lang_name);
          er_status = ER_LOC_INIT;
          LOG_LOCALE_ERROR (err_msg, er_status, false);
          goto error;
        }
      l_id = lld->lang_id;
    }
      else
    {
      /* locale not found */
      if (last_lang_locale != NULL)
        {
          /* existing language, but new locale (another charset) */
          l_id = last_lang_locale->lang_id;
        }
      else
        {
          /* new language */
          l_id = lang_Count_locales;

          assert (l_id >= INTL_LANG_USER_DEF_START);

          if (l_id >= LANG_MAX_LOADED_LOCALES)
        {
          er_status = ER_LOC_INIT;
          LOG_LOCALE_ERROR ("too many locales", er_status, false);
          goto error;
        }
        }

      lld = (LANG_LOCALE_DATA *) malloc (sizeof (LANG_LOCALE_DATA));
      if (lld == NULL)
        {
          er_status = ER_LOC_INIT;
          LOG_LOCALE_ERROR ("memory allocation failed", er_status, false);
          goto error;
        }

      memset (lld, 0, sizeof (LANG_LOCALE_DATA));
      lld->codeset = INTL_CODESET_UTF8;
      lld->lang_id = l_id;

      is_new_locale = true;
    }

      assert (lld->codeset == INTL_CODESET_UTF8);
      assert (lld->lang_id == l_id);

      lld->is_user_data = true;

      er_status = lang_locale_data_load_from_lib (lld, loclib_Handle[loclib_Handle_count - 1], &(user_lf[i]), false);
      if (er_status != NO_ERROR)
    {
      goto error;
    }

      assert (strcmp (lld->lang_name, user_lf[i].locale_name) == 0);

      /* initialization alphabet */
      lld->alphabet.codeset = INTL_CODESET_UTF8;
      lld->ident_alphabet.codeset = INTL_CODESET_UTF8;

      /* initialize text conversion */
      if (lld->txt_conv != NULL)
    {
      if (lld->txt_conv->conv_type == TEXT_CONV_GENERIC_2BYTE)
        {
          lld->txt_conv->init_conv_func = NULL;
          lld->txt_conv->text_to_utf8_func = intl_text_dbcs_to_utf8;
          lld->txt_conv->utf8_to_text_func = intl_text_utf8_to_dbcs;
        }
      else if (lld->txt_conv->conv_type == TEXT_CONV_GENERIC_1BYTE)
        {
          lld->txt_conv->init_conv_func = NULL;
          lld->txt_conv->text_to_utf8_func = intl_text_single_byte_to_utf8;
          lld->txt_conv->utf8_to_text_func = intl_text_utf8_to_single_byte;
        }
      else
        {
          assert (lld->txt_conv->conv_type == TEXT_CONV_ISO_88591_BUILTIN
              || lld->txt_conv->conv_type == TEXT_CONV_ISO_88599_BUILTIN);
        }
    }

      if (lang_get_generic_unicode_norm () == NULL)
    {
      lang_set_generic_unicode_norm (&(lld->unicode_norm));
    }

      if (is_new_locale)
    {
      er_status = register_lang_locale_data (lld);

      if (er_status != NO_ERROR)
        {
          goto error;
        }
    }

      lld->is_initialized = true;
    }

exit:
  /* free user defined locale files struct */
  for (i = 0; i < num_user_loc; i++)
    {
      free_and_init (user_lf[i].locale_name);
      free_and_init (user_lf[i].ldml_file);
      free_and_init (user_lf[i].lib_file);
    }

  if (user_lf != NULL)
    {
      free (user_lf);
    }

  return er_status;

error:
  destroy_user_locales ();
  lang_free_collations ();
  lang_unload_libraries ();

  goto exit;
}

/*
 * register_collation - registers a collation
 *   return: error code
 *   coll(in): collation structure
 */
static int
register_collation (LANG_COLLATION * coll)
{
  int id;
  assert (coll != NULL);
  assert (lang_Count_collations < LANG_MAX_COLLATIONS);

  id = coll->coll.coll_id;

  if (id < ((coll->built_in) ? 0 : LANG_MAX_BUILTIN_COLLATIONS) || id >= LANG_MAX_COLLATIONS)
    {
      char err_msg[ERR_MSG_SIZE];
      snprintf (err_msg, sizeof (err_msg) - 1,
        "Invalid collation numeric identifier : %d" " for collation '%s'. Expecting greater than %d and lower "
        "than %d.", id, coll->coll.coll_name, ((coll->built_in) ? 0 : LANG_MAX_BUILTIN_COLLATIONS),
        LANG_MAX_COLLATIONS);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      return ER_LOC_INIT;
    }

  assert (lang_Collations[id] != NULL);

  if (lang_Collations[id]->coll.coll_id != LANG_COLL_DEFAULT)
    {
      char err_msg[ERR_MSG_SIZE];
      snprintf (err_msg, sizeof (err_msg) - 1,
        "Invalid collation numeric identifier : %d for collation '%s'"
        ". This id is already used by collation '%s'", id, coll->coll.coll_name,
        lang_Collations[id]->coll.coll_name);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      return ER_LOC_INIT;
    }

  lang_Collations[id] = coll;

  lang_Count_collations++;

  if (coll->init_coll != NULL)
    {
      coll->init_coll (coll);
    }

  return NO_ERROR;
}

/*
 * lang_is_coll_name_allowed - checks if collation name is allowed
 *   return: true if allowed
 *   name(in): collation name
 */
bool
lang_is_coll_name_allowed (const char *name)
{
  int i;

  if (name == NULL || *name == '\0')
    {
      return false;
    }

  if (strchr (name, (int) ' ') || strchr (name, (int) '\t'))
    {
      return false;
    }

  for (i = 0; i < (int) (sizeof (built_In_collations) / sizeof (built_In_collations[0])); i++)
    {
      if (strcasecmp (built_In_collations[i]->coll.coll_name, name) == 0)
    {
      return false;
    }
    }

  return true;
}

/*
 * lang_get_collation - access a collation by id
 *   return: pointer to collation data or NULL
 *   coll_id(in): collation identifier
 */
LANG_COLLATION *
lang_get_collation (const int coll_id)
{
  assert (coll_id >= 0 && coll_id < LANG_MAX_COLLATIONS);

  return lang_Collations[coll_id];
}


/*
 * lang_get_collation_name - return collation name
 *   return: collation name
 *   coll_id(in): collation identifier
 */
const char *
lang_get_collation_name (const int coll_id)
{
  if (coll_id < 0 || coll_id >= LANG_MAX_COLLATIONS)
    {
      return NULL;
    }

  return lang_Collations[coll_id]->coll.coll_name;
}

/*
 * lang_get_collation_by_name - access a collation by name
 *   return: pointer to collation data or NULL
 *   coll_name(in): collation name
 */
LANG_COLLATION *
lang_get_collation_by_name (const char *coll_name)
{
  int i;
  assert (coll_name != NULL);

  for (i = 0; i < LANG_MAX_COLLATIONS; i++)
    {
      if (strcmp (coll_name, lang_Collations[i]->coll.coll_name) == 0)
    {
      return lang_Collations[i];
    }
    }

  return NULL;
}

/*
 * lang_collation_count -
 *   return: number of collations in the system
 */
int
lang_collation_count (void)
{
  return lang_Count_collations;
}

/*
 * lang_get_codeset_name - get charset string equivalent
 *   return: charset string or empty string
 *   codeset_id(in): charset/codeset id
 */
const char *
lang_get_codeset_name (int codeset_id)
{
  switch (codeset_id)
    {
    case INTL_CODESET_UTF8:
      return "utf8";
    case INTL_CODESET_ISO88591:
      return "iso88591";
    case INTL_CODESET_KSC5601_EUC:
      return "euckr";
    case INTL_CODESET_BINARY:
      return "binary";
    }

  /* codeset_id is propagated downwards from the grammar, so it is either INTL_CODESET_UTF8, INTL_CODESET_KSC5601_EUC
   * or INTL_CODESET_ISO88591 */
  assert (false);

  return "";
}

/*
 * lang_user_alphabet_w_coll -
 *   return: id of default collation
 */
const ALPHABET_DATA *
lang_user_alphabet_w_coll (const int collation_id)
{
  LANG_COLLATION *lang_coll;

  lang_coll = lang_get_collation (collation_id);

  assert (lang_coll->default_lang != NULL);

  return &(lang_coll->default_lang->alphabet);
}

/*
 * find_lang_locale_data - searches a locale with a given name and codeset
 *   return: locale or NULL if the name+codeset combination was not found
 *   name(in): name of locale
 *   codeset(in): codeset to search
 *   last_locale(out): last locale whith this name or NULL if no locale was
 *             found
 */
static LANG_LOCALE_DATA *
find_lang_locale_data (const char *name, const INTL_CODESET codeset, LANG_LOCALE_DATA ** last_lang_locale)
{
  LANG_LOCALE_DATA *first_lang_locale = NULL;
  LANG_LOCALE_DATA *curr_lang_locale;
  LANG_LOCALE_DATA *found_lang_locale = NULL;
  int i;

  assert (last_lang_locale != NULL);

  for (i = 0; i < lang_Count_locales; i++)
    {
      if (strcasecmp (lang_Loaded_locales[i]->lang_name, name) == 0)
    {
      first_lang_locale = lang_Loaded_locales[i];
      break;
    }
    }

  for (curr_lang_locale = first_lang_locale; curr_lang_locale != NULL; curr_lang_locale = curr_lang_locale->next_lld)
    {
      if (codeset == curr_lang_locale->codeset)
    {
      found_lang_locale = curr_lang_locale;
    }

      if (curr_lang_locale->next_lld == NULL)
    {
      *last_lang_locale = curr_lang_locale;
      break;
    }
    }

  return found_lang_locale;
}

/*
 * register_lang_locale_data - registers a language locale data in the system
 *   return: error status
 *   lld(in): language locale data
 */
static int
register_lang_locale_data (LANG_LOCALE_DATA * lld)
{
  LANG_LOCALE_DATA *last_lang_locale = NULL;
  LANG_LOCALE_DATA *found_lang_locale = NULL;

  assert (lld != NULL);

  found_lang_locale = find_lang_locale_data (lld->lang_name, lld->codeset, &last_lang_locale);

  assert (found_lang_locale == NULL);

  if (!lld->is_user_data)
    {
      /* make a copy of built-in */
      LANG_LOCALE_DATA *new_lld = (LANG_LOCALE_DATA *) malloc (sizeof (LANG_LOCALE_DATA));
      if (new_lld == NULL)
    {
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_INIT, false);
      return ER_LOC_INIT;
    }

      memcpy (new_lld, lld, sizeof (LANG_LOCALE_DATA));
      lld = new_lld;
    }

  if (last_lang_locale == NULL)
    {
      /* no other locales exists with the same name */
      assert (lang_Count_locales < LANG_MAX_LOADED_LOCALES);
      lang_Loaded_locales[lang_Count_locales++] = lld;
    }
  else
    {
      last_lang_locale->next_lld = lld;
    }

  if (!(lld->is_initialized) && lld->initloc != NULL)
    {
      assert (lld->lang_id < (INTL_LANG) INTL_LANG_USER_DEF_START);
      init_builtin_calendar_names (lld);
      lld->initloc (lld);

      /* init default collation */
      if (lld->default_lang_coll != NULL && lld->default_lang_coll->init_coll != NULL)
    {
      lld->default_lang_coll->init_coll (lld->default_lang_coll);
    }
    }

  return NO_ERROR;
}

/*
 * free_lang_locale_data - Releases any resources held by a language locale
 *             data
 *   return: none
 */
static void
free_lang_locale_data (LANG_LOCALE_DATA * lld)
{
  assert (lld != NULL);

  if (lld->next_lld != NULL)
    {
      free_lang_locale_data (lld->next_lld);
      lld->next_lld = NULL;
    }

  if (lld->is_user_data)
    {
      /* Text conversions having init_conv_func not NULL are built-in. They can't be deallocated. */
      if (lld->txt_conv != NULL && lld->txt_conv->init_conv_func == NULL)
    {
      free (lld->txt_conv);
      lld->txt_conv = NULL;
    }
    }

  free (lld);
}

/*
 * lang_get_msg_Loc_name - returns the language name for the message files,
 *             according to environment
 *   return: language name string
 */
const char *
lang_get_msg_Loc_name (void)
{
  if (!lang_Msg_env_initialized)
    {
      /* ignore any errors, we just need a locale for messages */
      (void) set_msg_lang_from_env ();
    }

  return lang_Msg_loc_name;
}

/*
 * lang_get_Lang_name - returns the language name according to environment
 *   return: language name string
 */
const char *
lang_get_Lang_name (void)
{
  if (!lang_Language_initialized)
    {
      assert (false);
      return NULL;
    }
  return lang_Lang_name;
}

/*
 * lang_id - Returns language id per env settings
 *   return: language identifier
 */
INTL_LANG
lang_id (void)
{
  if (!lang_Language_initialized)
    {
      assert (false);
      return -1;
    }
  return lang_Lang_id;
}

/*
 * lang_currency - Returns language currency per env settings
 *   return: language currency identifier
 */
DB_CURRENCY
lang_currency ()
{
  if (!lang_Language_initialized)
    {
      assert (false);
      return DB_CURRENCY_NULL;
    }
  return lang_Loc_currency;
}

/*
 * lang_locale_currency - Returns language currency for a language
 *   return: language currency identifier
 */
DB_CURRENCY
lang_locale_currency (const char *locale_str)
{
  int i;

  if (!lang_Language_initialized)
    {
      assert (false);
      return DB_CURRENCY_NULL;
    }

  for (i = 0; i < lang_Count_locales; i++)
    {
      if (strcasecmp (lang_Loaded_locales[i]->lang_name, locale_str) == 0)
    {
      return lang_Loaded_locales[i]->default_currency_code;
    }
    }

  return lang_currency ();
}

/*
 * lang_charset - Returns language charset per env settings
 *   return: language charset
 */
INTL_CODESET
lang_charset (void)
{
  if (!lang_Charset_initialized)
    {
      assert (false);
      return INTL_CODESET_NONE;
    }
  return lang_Loc_charset;
}

/*
 * lang_final - Releases any resources held by this module
 *   return: none
 */
void
lang_final (void)
{
  destroy_user_locales ();

  lang_free_collations ();

  lang_set_generic_unicode_norm (NULL);

  lang_unload_libraries ();

  lang_Builtin_initialized = false;
  lang_Initialized = false;
  lang_Init_w_error = false;
  lang_Language_initialized = false;
  lang_Charset_initialized = false;
  lang_Msg_env_initialized = false;
}

/*
 * lang_is_all_initialized - Checks if the all language support modules are initialized
 *   return: true if the module has been initialized, false otherwise
 */
bool
lang_is_all_initialized (void)
{
  return (lang_Initialized && lang_Builtin_initialized && lang_Charset_initialized && lang_Language_initialized
      && lang_Msg_env_initialized);
}

/*
 * lang_currency_symbol - Computes an appropriate printed representation for
 *                        a currency identifier
 *   return: currency string
 *   curr(in): currency constant
 */
const char *
lang_currency_symbol (DB_CURRENCY curr)
{
  return intl_get_money_symbol_console (curr);
}

#if defined (ENABLE_UNUSED_FUNCTION)
/*
 * lang_char_mem_size - Returns the character memory size for the given
 *                      pointer to a character
 *   return: memory size for the first character
 *   p(in)
 */
int
lang_char_mem_size (const char *p)
{
  if (LANG_VARIABLE_CHARSET (lang_charset ()))
    {
      if (0x80 & (p[0]))
    {
      return 2;
    }
    }
  return 1;
}

/*
 * lang_char_screen_size - Returns the screen size for the given pointer
 *                         to a character
 *   return: screen size for the first character
 *   p(in)
 */
int
lang_char_screen_size (const char *p)
{
  if (LANG_VARIABLE_CHARSET (lang_charset ()))
    {
      return (0x80 & (p[0]) ? 2 : 1);
    }
  return 1;
}

/*
 * lang_wchar_mem_size - Returns the memory size for the given pointer
 *                       to a wide character
 *   return: memory size for the first character
 *   p(in)
 */
int
lang_wchar_mem_size (const wchar_t * p)
{
  if (LANG_VARIABLE_CHARSET (lang_charset ()))
    {
      if (0x8000 & (p[0]))
    {
      return 2;
    }
    }
  return 1;
}

/*
 * lang_wchar_screen_size - Returns the screen size for the given pointer
 *                          to a wide character
 *   return: screen size for the first character
 *   p(in)
 */
int
lang_wchar_screen_size (const wchar_t * p)
{
  if (LANG_VARIABLE_CHARSET (lang_charset ()))
    {
      return (0x8000 & (p[0]) ? 2 : 1);
    }
  return 1;
}
#endif

/*
 * lang_check_identifier - Tests an identifier for possibility
 *   return: true if the name is suitable for identifier,
 *           false otherwise.
 *   name(in): identifier name
 *   length(in): identifier name length
 */
bool
lang_check_identifier (const char *name, int length)
{
  bool ok = false;
  int i;

  if (name == NULL)
    {
      return false;
    }

  if (char_isalpha (name[0]))
    {
      ok = true;
      for (i = 0; i < length && ok; i++)
    {
      if (!char_isalnum (name[i]) && name[i] != '_')
        {
          ok = false;
        }
    }
    }

  return (ok);
}

/*
 * lang_locale - returns language locale per env settings.
 *   return: language locale data
 */
const LANG_LOCALE_DATA *
lang_locale (void)
{
  if (!lang_Charset_initialized || !lang_Language_initialized)
    {
      assert (false);
      return NULL;
    }
  return lang_Loc_data;
}

/*
 * lang_get_specific_locale - returns language locale of a specific language
 *                and codeset
 *
 *  return: language locale data
 *  lang(in):
 *  codeset(in):
 *
 *  Note : if codeset is INTL_CODESET_NONE, returns the first locale it
 *     founds with requested language id, not matter the codeset.
 */
const LANG_LOCALE_DATA *
lang_get_specific_locale (const INTL_LANG lang, const INTL_CODESET codeset)
{
  if (!lang_Charset_initialized || !lang_Language_initialized)
    {
      assert (false);
      return NULL;
    }

  if ((int) lang < lang_Count_locales)
    {
      LANG_LOCALE_DATA *first_lang_locale = lang_Loaded_locales[lang];
      LANG_LOCALE_DATA *curr_lang_locale;

      for (curr_lang_locale = first_lang_locale; curr_lang_locale != NULL;
       curr_lang_locale = curr_lang_locale->next_lld)
    {
      if (curr_lang_locale->codeset == codeset || codeset == INTL_CODESET_NONE)
        {
          return curr_lang_locale;
        }
    }
    }

  return NULL;
}


/*
 * lang_get_first_locale_for_lang - returns first locale for language
 *  return: language locale data or NULL if language id is not valid
 *  lang(in):
 */
const LANG_LOCALE_DATA *
lang_get_first_locale_for_lang (const INTL_LANG lang)
{
  if (!lang_Charset_initialized || !lang_Language_initialized)
    {
      assert (false);
      return NULL;
    }

  if ((int) lang < lang_Count_locales)
    {
      return lang_Loaded_locales[lang];
    }

  return NULL;
}

/*
 * lang_get_builtin_lang_id_from_name - returns the builtin language id from a
 *                  language name
 *
 *   return: 0, if language name is accepted, non-zero otherwise
 *   lang_name(in):
 *   lang_id(out): language identifier
 *
 *  Note : INTL_LANG_ENGLISH is returned if name is not a valid language name
 */
static int
lang_get_builtin_lang_id_from_name (const char *lang_name, INTL_LANG * lang_id)
{
  int i;

  assert (lang_id != NULL);

  *lang_id = INTL_LANG_ENGLISH;

  for (i = 0; i < (int) (sizeof (builtin_Langs) / sizeof (LANG_DEFAULTS)); i++)
    {
      if (strncasecmp (lang_name, builtin_Langs[i].lang_name, strlen (builtin_Langs[i].lang_name)) == 0)
    {
      *lang_id = builtin_Langs[i].lang;
      return 0;
    }
    }

  assert (*lang_id < INTL_LANG_USER_DEF_START);

  return 1;
}

/*
 * lang_get_lang_id_from_name - returns the language id from a language name
 *
 *   return: 0, if language name is accepted, non-zero otherwise
 *   lang_name(in):
 *   lang_id(out): language identifier
 *
 *  Note : INTL_LANG_ENGLISH is returned if name is not a valid language name
 */
int
lang_get_lang_id_from_name (const char *lang_name, INTL_LANG * lang_id)
{
  int i;

  assert (lang_id != NULL);

  *lang_id = INTL_LANG_ENGLISH;

  for (i = 0; i < lang_Count_locales; i++)
    {
      assert (lang_Loaded_locales[i] != NULL);

      if (strcasecmp (lang_name, lang_Loaded_locales[i]->lang_name) == 0)
    {
      assert (i == (int) lang_Loaded_locales[i]->lang_id);
      *lang_id = lang_Loaded_locales[i]->lang_id;
      return 0;
    }
    }

  return 1;
}

/*
 * lang_get_lang_name_from_id - returns the language name from a language id
 *
 *   return: language name (NULL if lang_id is not valid)
 *   lang_id(in):
 *
 */
const char *
lang_get_lang_name_from_id (const INTL_LANG lang_id)
{
  if ((int) lang_id < lang_Count_locales)
    {
      assert (lang_Loaded_locales[lang_id] != NULL);
      return lang_Loaded_locales[lang_id]->lang_name;
    }

  return NULL;
}

/*
 * lang_set_flag_from_lang - set a flag according to language string
 *
 *   return: 0 if language string OK and flag was set, non-zero otherwise
 *   lang_str(in): language string identier
 *   has_user_format(in): true if user has given a format, false otherwise
 *   has_user_lang(in): true if user has given a language, false otherwise
 *   flag(out): bit flag : bit 0 is the user flag, bits 1 - 31 are for
 *      language identification
 *      Bit 0 : if set, the language was given by user
 *      Bit 1 - 31 : INTL_LANG
 *
 *  Note : function is used in context of some date-string functions.
 *     If lang_str cannot be solved, the language is assumed English.
 */
int
lang_set_flag_from_lang (const char *lang_str, bool has_user_format, bool has_user_lang, int *flag)
{
  INTL_LANG lang = INTL_LANG_ENGLISH;
  int status = 0;

  if (lang_str != NULL)
    {
      status = lang_get_lang_id_from_name (lang_str, &lang);
    }

  int lang_val = (int) lang;

  *flag = 0;

  *flag |= (has_user_format) ? 1 : 0;
  *flag |= (has_user_lang) ? 2 : 0;

  if (lang_val >= lang_Count_locales)
    {
      lang_val = (int) INTL_LANG_ENGLISH;
      status = 1;
    }

  *flag |= (lang_val << 2);

  assert (((*flag) & LANG_LOADED_LOCALES_PARITY_MASK) == 0);
  *flag |= LANG_LOADED_LOCALES_PARITY;

  return status;
}

/*
 * lang_get_lang_id_from_flag - get lang id from flag
 *
 *   return: id of language, current language is returned when flag value is
 *       invalid
 *   flag(in): bit flag : bit 0 and 1 are user flags, bits 2 - 31 are for
 *         language identification
 *
 *  Note : function is used in context of some date-string functions.
 */
INTL_LANG
lang_get_lang_id_from_flag (const int flag, bool * has_user_format, bool * has_user_lang)
{
  int lang_val;

  *has_user_format = ((flag & 0x1) == 0x1) ? true : false;
  *has_user_lang = ((flag & 0x2) == 0x2) ? true : false;

  assert ((flag & LANG_LOADED_LOCALES_PARITY_MASK) == LANG_LOADED_LOCALES_PARITY);
  lang_val = (flag & ~LANG_LOADED_LOCALES_PARITY_MASK) >> 2;

  if (lang_val >= 0 && lang_val < lang_Count_locales)
    {
      return (INTL_LANG) lang_val;
    }

  return lang_id ();
}

/*
 * lang_date_format_parse - Returns the default format of parsing date for the
 *            required language or NULL if a the default format is not
 *            available
 *   lang_id (in):
 *   codeset (in):
 *   type (in): DB type for format
 *   format_codeset (in): codeset of the format found
 *
 * Note:  If a format for combination (lang_id, codeset) is not found, then
 *    the first valid (non-NULL) format for lang_id and the codeset
 *    are returned.
 *
 */
const char *
lang_date_format_parse (const INTL_LANG lang_id, const INTL_CODESET codeset, const DB_TYPE type,
            INTL_CODESET * format_codeset)
{
  const LANG_LOCALE_DATA *lld;
  const char *format = NULL;
  const char *first_valid_format = NULL;

  assert (format_codeset != NULL);

  assert (lang_Charset_initialized && lang_Language_initialized);

  lld = lang_get_first_locale_for_lang (lang_id);

  if (lld == NULL)
    {
      return NULL;
    }

  do
    {
      switch (type)
    {
    case DB_TYPE_TIME:
      format = lld->time_format;
      break;
    case DB_TYPE_DATE:
      format = lld->date_format;
      break;
    case DB_TYPE_DATETIME:
      format = lld->datetime_format;
      break;
    case DB_TYPE_TIMESTAMP:
      format = lld->timestamp_format;
      break;
    case DB_TYPE_DATETIMETZ:
      format = lld->datetimetz_format;
      break;
    case DB_TYPE_TIMESTAMPTZ:
      format = lld->timestamptz_format;
      break;
    default:
      break;
    }

      if (lld->codeset == codeset)
    {
      *format_codeset = codeset;
      first_valid_format = format;
      break;
    }

      if (first_valid_format == NULL)
    {
      *format_codeset = lld->codeset;
      first_valid_format = format;
    }

      lld = lld->next_lld;
    }
  while (lld != NULL);

  return first_valid_format;
}

/*
 * lang_get_default_codeset - returns the default codeset to be used for a
 *                given language identifier
 *   return: codeset
 *   intl_id(in):
 */
static INTL_CODESET
lang_get_default_codeset (const INTL_LANG intl_id)
{
  unsigned int i;
  INTL_CODESET codeset = INTL_CODESET_NONE;

  for (i = 0; i < sizeof (builtin_Langs) / sizeof (LANG_DEFAULTS); i++)
    {
      if (intl_id == builtin_Langs[i].lang)
    {
      codeset = builtin_Langs[i].codeset;
      break;
    }
    }
  return codeset;
}

/*
 * lang_is_codeset_allowed - checks if a combination of language and codeset
 *               is allowed
 *   return: true if combination is allowed, false otherwise
 *   intl_id(in):
 *   codeset(in):
 */
static bool
lang_is_codeset_allowed (const INTL_LANG intl_id, const INTL_CODESET codeset)
{
  unsigned int i;

  for (i = 0; i < sizeof (builtin_Langs) / sizeof (LANG_DEFAULTS); i++)
    {
      if (intl_id == builtin_Langs[i].lang && codeset == builtin_Langs[i].codeset)
    {
      return true;
    }
    }
  return false;
}

/*
 * lang_digit_grouping_symbol - Returns symbol used for grouping digits in
 *              numbers
 *   lang_id (in):
 */
char
lang_digit_grouping_symbol (const INTL_LANG lang_id)
{
  const LANG_LOCALE_DATA *lld = lang_get_specific_locale (lang_id, INTL_CODESET_NONE);

  assert (lld != NULL);

  return lld->number_group_sym;
}

/*
 * lang_digit_fractional_symbol - Returns symbol used for fractional part of
 *                numbers
 *   lang_id (in):
 */
char
lang_digit_fractional_symbol (const INTL_LANG lang_id)
{
  const LANG_LOCALE_DATA *lld = lang_get_specific_locale (lang_id, INTL_CODESET_NONE);

  assert (lld != NULL);

  return lld->number_decimal_sym;
}

/*
 * lang_get_txt_conv - Returns the information required for console text
 *             conversion
 */
TEXT_CONVERSION *
lang_get_txt_conv (void)
{
  return console_Conv;
}

/*
 * lang_charset_name() - returns charset name
 *
 *   return:
 *   codeset(in):
 */
const char *
lang_charset_name (const INTL_CODESET codeset)
{
  int i;

  assert (codeset >= INTL_CODESET_BINARY && codeset <= INTL_CODESET_UTF8);

  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
    {
      if (codeset == lang_Db_charsets[i].charset_id)
    {
      return lang_Db_charsets[i].charset_name;
    }
    }

  return NULL;
}

/*
 * lang_charset_cubrid_name() - returns charset name
 *
 *   return:
 *   codeset(in):
 */
const char *
lang_charset_cubrid_name (const INTL_CODESET codeset)
{
  int i;

  assert (codeset >= INTL_CODESET_BINARY && codeset <= INTL_CODESET_UTF8);

  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
    {
      if (codeset == lang_Db_charsets[i].charset_id)
    {
      return lang_Db_charsets[i].charset_cubrid_name;
    }
    }

  return NULL;
}

/*
 * lang_get_charset_env_string -
 * buf(out):
 * buf_size(in):
 * lang_name(in):
 * codeset(in):
 * return:
 */
int
lang_get_charset_env_string (char *buf, int buf_size, const char *lang_name, const INTL_CODESET codeset)
{
  if (buf == NULL)
    {
      assert_release (0);
      return ER_FAILED;
    }

  if (!strcasecmp (lang_name, "en_US") && codeset == INTL_CODESET_ISO88591)
    {
      snprintf (buf, buf_size, "%s", lang_name);
    }
  else
    {
      snprintf (buf, buf_size, "%s.%s", lang_name, lang_charset_cubrid_name (codeset));
    }

  return NO_ERROR;
}

#if !defined (SERVER_MODE)
/* client side charset and collation */
static bool lang_Parser_use_client_charset = true;

/*
 * lang_db_put_charset - Saves the charset and language information into DB
 *   return: error code
 *
 * Note: This is called during database creation; charset and language are
 *   initialized with DB creation parameters.
 */
int
lang_db_put_charset (void)
{
  INTL_CODESET server_codeset;
  INTL_LANG server_lang;
  DB_VALUE value;
  int au_save;

  server_codeset = lang_charset ();

  server_lang = lang_id ();

  AU_DISABLE (au_save);
  db_make_string (&value, lang_get_lang_name_from_id (server_lang));
  if (db_put_internal (Au_root, "lang", &value) != NO_ERROR)
    {
      /* Error Setting the language */
      assert (false);
    }

  pr_clear_value (&value);

  db_make_int (&value, (int) server_codeset);
  if (db_put_internal (Au_root, "charset", &value) != NO_ERROR)
    {
      /* Error Setting the nchar codeset */
      assert (false);
    }
  AU_ENABLE (au_save);

  return NO_ERROR;
}

/*
 * lang_charset_name_to_id - Returns the INTL_CODESET of the specified charset
 *   return: NO_ERROR or error code if the specified name can't be found in
 *           the lang_Db_charsets array
 *   name(in): the name of the desired charset
 *   codeset(out): INTL_CODESET of the desired charset
 */
int
lang_charset_name_to_id (const char *name, INTL_CODESET * codeset)
{
  int i;

  /* Find the charset in the lang_Db_charsets array */
  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
    {
      if (strcmp (lang_Db_charsets[i].charset_name, name) == 0)
    {
      *codeset = lang_Db_charsets[i].charset_id;
      return NO_ERROR;
    }
    }

  return ER_FAILED;
}

/*
 * lang_get_client_charset - Gets Client's charset
 *   return: codeset
 */
INTL_CODESET
lang_get_client_charset (void)
{
  INTL_CODESET charset = LANG_SYS_CODESET;
  char *coll_name = prm_get_string_value (PRM_ID_INTL_COLLATION);

  if (coll_name != NULL)
    {
      LANG_COLLATION *lc = lang_get_collation_by_name (coll_name);
      if (lc != NULL)
    {
      charset = lc->codeset;
    }
    }

  return charset;
}

/*
 * lang_get_client_collation - Gets Client's charset
 *   return: codeset
 */
int
lang_get_client_collation (void)
{
  int coll_id = LANG_SYS_COLLATION;
  char *coll_name = prm_get_string_value (PRM_ID_INTL_COLLATION);

  if (coll_name != NULL)
    {
      LANG_COLLATION *lc = lang_get_collation_by_name (coll_name);
      if (lc != NULL)
    {
      coll_id = lc->coll.coll_id;
    }
    }

  return coll_id;
}

/*
 * lang_set_parser_use_client_charset - set if next parsing operation should
 *                      use client's setting of charset and
 *                  collation
 */
void
lang_set_parser_use_client_charset (bool use)
{
  lang_Parser_use_client_charset = use;
}

/*
 * lang_get_parser_use_client_charset - checks if parser should use client's
 *                  charset and collation
 *   return:
 */
bool
lang_get_parser_use_client_charset (void)
{
  return lang_Parser_use_client_charset;
}

#endif /* !SERVER_MODE */

/*
 * lang_charset_cubrid_name_to_id - Returns the INTL_CODESET of the charset
 *                  with CUBRID name
 *   return: codeset id, INTL_CODESET_NONE if not found
 *   name(in): the name of the desired charset
 */
INTL_CODESET
lang_charset_cubrid_name_to_id (const char *name)
{
  int current_codeset = INTL_CODESET_BINARY;

  while (current_codeset <= INTL_CODESET_LAST)
    {
      if (strcasecmp (name, lang_Db_charsets[current_codeset].charset_cubrid_name) == 0)
    {
      return (INTL_CODESET) current_codeset;
    }
      current_codeset++;
    }

  return INTL_CODESET_NONE;
}

/*
 * lang_charset_introducer() - returns introducer text to print for a charset
 *
 *   return: charset introducer or NULL if not found
 *   codeset(in):
 */
const char *
lang_charset_introducer (const INTL_CODESET codeset)
{
  int i;

  assert (codeset >= INTL_CODESET_BINARY && codeset <= INTL_CODESET_UTF8);

  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
    {
      if (codeset == lang_Db_charsets[i].charset_id)
    {
      return lang_Db_charsets[i].introducer;
    }
    }

  return NULL;
}


/* Collation functions */

/*
 * lang_strcmp_utf8() - string compare for UTF8
 *   return:
 *   lang_coll(in) : collation data
 *   string1(in):
 *   size1(in):
 *   string2(in):
 *   size2(in):
 */
static int
lang_strcmp_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
          const unsigned char *str2, const int size2, bool ignore_trailing_space)
{
  return lang_strmatch_utf8 (lang_coll, false, str1, size1, str2, size2, NULL, false, NULL, ignore_trailing_space);
}

/*
 * lang_strmatch_utf8() - string match and compare for UTF8 collations
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   lang_coll(in) : collation data
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   str1_match_size(out): size from str1 which is matched with str2
 */
static int
lang_strmatch_utf8 (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
            const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
            int *str1_match_size, bool ignore_trailing_space)
{
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  const unsigned char *str1_begin;
  unsigned char *str1_next, *str2_next;
  unsigned int cp1, cp2, w_cp1, w_cp2;
  const int alpha_cnt = lang_coll->coll.w_count;
  const unsigned int *weight_ptr = lang_coll->coll.weights;

  if (lang_coll->built_in && ignore_trailing_space)
    {
      weight_ptr = lang_coll->coll.weights_ti;
    }

  str1_begin = str1;
  str1_end = str1 + size1;
  str2_end = str2 + size2;

  for (; str1 < str1_end && str2 < str2_end;)
    {
      assert (str1_end - str1 > 0);
      assert (str2_end - str2 > 0);

      cp1 = intl_utf8_to_cp (str1, CAST_BUFLEN (str1_end - str1), &str1_next);
      cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);

      if (is_match && escape != NULL && memcmp (str2, escape, str2_next - str2) == 0)
    {
      if (!(has_last_escape && str2_next >= str2_end))
        {
          str2 = str2_next;
          cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);
        }
    }

      if (cp1 < (unsigned int) alpha_cnt)
    {
      if (cp1 == SPACE)
        {
          w_cp1 = ZERO;
        }
      else
        {
          w_cp1 = weight_ptr[cp1];
        }
    }
      else
    {
      w_cp1 = cp1;
    }

      if (cp2 < (unsigned int) alpha_cnt)
    {
      if (cp2 == SPACE)
        {
          w_cp2 = ZERO;
        }
      else
        {
          w_cp2 = weight_ptr[cp2];
        }
    }
      else
    {
      w_cp2 = cp2;
    }

      if (w_cp1 != w_cp2)
    {
      return (w_cp1 < w_cp2) ? (-1) : 1;
    }

      str1 = str1_next;
      str2 = str2_next;
    }

  size1 = CAST_BUFLEN (str1_end - str1);
  size2 = CAST_BUFLEN (str2_end - str2);

  assert (size1 == 0 || size2 == 0);

  if (is_match)
    {
      assert (str1_match_size != NULL);
      *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
    }

  if (size1 == size2)
    {
      return 0;
    }
  else if (size2 > 0)
    {
      if (is_match || !ignore_trailing_space)
    {
      return -1;
    }

      if (lang_str_utf8_trail_zero_weights (lang_coll, str2, CAST_BUFLEN (str2_end - str2)) != 0)
    {
      return -1;
    }
    }
  else
    {
      assert (size1 > 0);

      if (is_match)
    {
      return 0;
    }

      if (!ignore_trailing_space)
    {
      return 1;
    }

      if (lang_str_utf8_trail_zero_weights (lang_coll, str1, CAST_BUFLEN (str1_end - str1)) != 0)
    {
      return 1;
    }
    }

  return 0;
}

/*
 * lang_strcmp_utf8_w_contr() - string compare for UTF8 for a collation
 *              having UCA contractions
 *   return:
 *   lang_coll(in) : collation data
 *   string1(in):
 *   size1(in):
 *   string2(in):
 *   size2(in):
 */
static int
lang_strcmp_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
              const unsigned char *str2, const int size2, bool ignore_trailing_space)
{
  return lang_strmatch_utf8_w_contr (lang_coll, false, str1, size1, str2, size2, NULL, false, NULL,
                     ignore_trailing_space);
}

/*
 * lang_strmatch_utf8_w_contr() - string match or compare for UTF8 for a
 *                collation having UCA contractions
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   lang_coll(in) : collation data
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   str1_match_size(out): size from str1 which is matched with str2
 */
static int
lang_strmatch_utf8_w_contr (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
                const unsigned char *str2, int size2, const unsigned char *escape,
                const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
{
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  const unsigned char *str1_begin;
  unsigned char *str1_next, *str2_next;
  unsigned int cp1, cp2, w_cp1, w_cp2;
  const COLL_DATA *coll = &(lang_coll->coll);
  const int alpha_cnt = coll->w_count;
  const unsigned int *weight_ptr = lang_coll->coll.weights;

  bool is_str1_contr = false;
  bool is_str2_contr = false;

  str1_end = str1 + size1;
  str2_end = str2 + size2;
  str1_begin = str1;

  for (; str1 < str1_end && str2 < str2_end;)
    {
      assert (str1_end - str1 > 0);
      assert (str2_end - str2 > 0);

      cp1 = intl_utf8_to_cp (str1, CAST_BUFLEN (str1_end - str1), &str1_next);
      cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);

      if (is_match && escape != NULL && memcmp (str2, escape, str2_next - str2) == 0)
    {
      if (!(has_last_escape && str2_next >= str2_end))
        {
          str2 = str2_next;
          cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);
        }
    }

      is_str1_contr = is_str2_contr = false;

      if (cp1 < (unsigned int) alpha_cnt)
    {
      COLL_CONTRACTION *contr = NULL;

      if (str1_end - str1 >= coll->contr_min_size && cp1 >= coll->cp_first_contr_offset
          && cp1 < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
          && ((contr = lang_get_contr_for_string (coll, str1, CAST_BUFLEN (str1_end - str1), cp1)) != NULL))
        {
          assert (contr != NULL);

          w_cp1 = contr->wv;
          str1_next = (unsigned char *) str1 + contr->size;
          is_str1_contr = true;
        }
      else
        {
          w_cp1 = weight_ptr[cp1];
        }
    }
      else
    {
      w_cp1 = cp1;
    }

      if (cp2 < (unsigned int) alpha_cnt)
    {
      COLL_CONTRACTION *contr = NULL;

      if (str2_end - str2 >= coll->contr_min_size && cp2 >= coll->cp_first_contr_offset
          && cp2 < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
          && ((contr = lang_get_contr_for_string (coll, str2, CAST_BUFLEN (str2_end - str2), cp2)) != NULL))
        {
          assert (contr != NULL);

          w_cp2 = contr->wv;
          str2_next = (unsigned char *) str2 + contr->size;
          is_str2_contr = true;
        }
      else
        {
          w_cp2 = weight_ptr[cp2];
        }
    }
      else
    {
      w_cp2 = cp2;
    }

      if (is_match && coll->uca_opt.sett_match_contr == MATCH_CONTR_BOUND_ALLOW && !is_str2_contr && is_str1_contr
      && cp1 == cp2)
    {
      /* re-read weight for str1 ignoring contractions */
      if (cp1 < (unsigned int) alpha_cnt)
        {
          w_cp1 = weight_ptr[cp1];
        }
      else
        {
          w_cp1 = cp1;
        }
      str1_next = (unsigned char *) str1 + intl_Len_utf8_char[*str1];
    }

      if (w_cp1 != w_cp2)
    {
      return (w_cp1 < w_cp2) ? (-1) : 1;
    }

      str1 = str1_next;
      str2 = str2_next;
    }

  size1 = CAST_BUFLEN (str1_end - str1);
  size2 = CAST_BUFLEN (str2_end - str2);

  assert (size1 == 0 || size2 == 0);

  if (is_match)
    {
      assert (str1_match_size != NULL);
      *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
    }

  if (size1 == size2)
    {
      return 0;
    }
  else if (size2 > 0)
    {
      if (is_match || !ignore_trailing_space)
    {
      return -1;
    }

      /* use same function as for collation without contractions : we suppose that there are no contractions with zero
       * weights or having starting codepoints with zero weight */
      if (lang_str_utf8_trail_zero_weights (lang_coll, str2, CAST_BUFLEN (str2_end - str2)) != 0)
    {
      return -1;
    }
    }
  else
    {
      assert (size1 > 0);
      if (is_match)
    {
      return 0;
    }

      if (!ignore_trailing_space)
    {
      return 1;
    }

      /* same function as for collation without contractions */
      if (lang_str_utf8_trail_zero_weights (lang_coll, str1, CAST_BUFLEN (str1_end - str1)) != 0)
    {
      return 1;
    }
    }

  return 0;
}

#define ADD_TO_HASH(pseudo_key, w)      \
  do {                      \
    unsigned int i;             \
    pseudo_key = (pseudo_key << 4) + w;     \
    i = pseudo_key & 0xf0000000;        \
    if (i != 0)                 \
      {                     \
    pseudo_key ^= i >> 24;          \
    pseudo_key ^= i;            \
      }                     \
  } while (0)

/*
 * lang_mht2str_utf8() - computes hash 2 style for a UTF-8 string having
 *           collation without expansions
 *
 *   return: hash value
 *   lang_coll(in) : collation data
 *   str(in):
 *   size(in):
 */
static unsigned int
lang_mht2str_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
{
  const unsigned char *str_end;
  unsigned char *str_next;
  unsigned int cp, w;
  const COLL_DATA *coll = &(lang_coll->coll);
  const int alpha_cnt = coll->w_count;
  const unsigned int *weight_ptr = lang_coll->coll.weights;
  unsigned int pseudo_key = 0;

  str_end = str + size;

  for (; str < str_end;)
    {
      assert (str_end - str > 0);

      cp = intl_utf8_to_cp (str, CAST_BUFLEN (str_end - str), &str_next);

      if (cp < (unsigned int) alpha_cnt)
    {
      COLL_CONTRACTION *contr = NULL;

      if (coll->count_contr > 0 && str_end - str >= coll->contr_min_size && cp >= coll->cp_first_contr_offset
          && cp < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
          && ((contr = lang_get_contr_for_string (coll, str, CAST_BUFLEN (str_end - str), cp)) != NULL))
        {
          assert (contr != NULL);

          w = contr->wv;
          str_next = (unsigned char *) str + contr->size;
        }
      else
        {
          w = weight_ptr[cp];
        }
    }
      else
    {
      w = cp;
    }

      ADD_TO_HASH (pseudo_key, w);

      str = str_next;
    }

  return pseudo_key;
}

/*
 * lang_get_w_first_el() - get the weight of the first element (character or
 *             contraction) encountered in the string
 *
 *   return: weight value
 *   coll_data(in): collation data
 *   str(in): buffer to check for contractions
 *   str_size(in): size of buffer (bytes)
 *   next_char(out): pointer to the end of element (next character)
 *
 *   Note : This function works only on UTF-8 collations without expansions.
 *
 */
static unsigned int
lang_get_w_first_el (const COLL_DATA * coll, const unsigned char *str, const int str_size, unsigned char **next_char,
             bool ignore_trailing_space)
{
  unsigned int cp, w;
  const int alpha_cnt = coll->w_count;
  const unsigned int *weight_ptr = coll->weights;

  assert (coll->uca_exp_num == 0);
  assert (str_size > 0);
  assert (next_char != NULL);

  cp = intl_utf8_to_cp (str, str_size, next_char);
  if (cp < (unsigned int) alpha_cnt)
    {
      COLL_CONTRACTION *contr = NULL;

      if (coll->count_contr > 0 && str_size >= coll->contr_min_size && cp >= coll->cp_first_contr_offset
      && cp < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
      && ((contr = lang_get_contr_for_string (coll, str, str_size, cp)) != NULL))
    {
      assert (contr != NULL);

      w = contr->wv;
      *next_char = (unsigned char *) str + contr->size;
    }
      else
    {
      if (cp == ASCII_SPACE && ignore_trailing_space)
        {
          return 0;
        }
      w = weight_ptr[cp];
    }
    }
  else
    {
      w = cp;
    }

  return w;
}

/*
 * lang_get_contr_for_string() - checks if the string starts with a
 *               contraction
 *
 *   return: contraction pointer or NULL if no contraction is found
 *   coll_data(in): collation data
 *   str(in): buffer to check for contractions
 *   str_size(in): size of buffer (bytes)
 *   cp(in): codepoint of first character in 'str'
 *
 */
static COLL_CONTRACTION *
lang_get_contr_for_string (const COLL_DATA * coll_data, const unsigned char *str, const int str_size, unsigned int cp)
{
  const int *first_contr;
  int contr_id;
  COLL_CONTRACTION *contr;
  int cmp;

  assert (coll_data != NULL);
  assert (coll_data->count_contr > 0);

  assert (str != NULL);
  assert (str_size >= coll_data->contr_min_size);

  first_contr = coll_data->cp_first_contr_array;
  assert (first_contr != NULL);
  contr_id = first_contr[cp - coll_data->cp_first_contr_offset];

  if (contr_id == -1)
    {
      return NULL;
    }

  assert (contr_id >= 0 && contr_id < coll_data->count_contr);
  contr = &(coll_data->contr_list[contr_id]);

  do
    {
      if ((int) contr->size > str_size)
    {
      cmp = memcmp (contr->c_buf, str, str_size);
      if (cmp == 0)
        {
          cmp = 1;
        }
    }
      else
    {
      cmp = memcmp (contr->c_buf, str, contr->size);
    }

      if (cmp >= 0)
    {
      break;
    }

      assert (cmp < 0);

      contr++;
      contr_id++;

    }
  while (contr_id < coll_data->count_contr);

  if (cmp != 0)
    {
      contr = NULL;
    }

  return contr;
}

static UCA_L13_W uca_l13_max_weight = 0xffffffff;
static UCA_L4_W uca_l4_max_weight = 0xffff;

/*
 * lang_get_uca_w_l13() - returns pointer to array of CEs of first collatable
 *            element in string (codepoint or contraction) and
 *            number of CEs in this array
 *   return:
 *   coll_data(in): collation data
 *   use_contractions(in):
 *   str(in): string to get weights for
 *   size(in): size of string (bytes)
 *   uca_w_l13(out): pointer to weight array
 *   num_ce(out): number of Collation Elements
 *   str_next(out): pointer to next collatable element in string
 *   cp_out(out): bit field value : codepoint value, and if contraction is
 *        found than INTL_MASK_CONTR mask is set (MSB)
 */
static void
lang_get_uca_w_l13 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str, const int size,
            UCA_L13_W ** uca_w_l13, int *num_ce, unsigned char **str_next, unsigned int *cp_out)
{
  unsigned int cp;
  const int alpha_cnt = coll_data->w_count;
  const int exp_num = coll_data->uca_exp_num;

  assert (size > 0);

  cp = intl_utf8_to_cp (str, size, str_next);

  *cp_out = cp;

  if (cp < (unsigned int) alpha_cnt)
    {
      COLL_CONTRACTION *contr = NULL;

      if (use_contractions && coll_data->count_contr > 0 && size >= coll_data->contr_min_size
      && cp >= coll_data->cp_first_contr_offset
      && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
      && ((contr = lang_get_contr_for_string (coll_data, str, size, cp)) != NULL))
    {
      assert (contr != NULL);
      *uca_w_l13 = contr->uca_w_l13;
      *num_ce = contr->uca_num;
      *str_next = (unsigned char *) str + contr->size;
      *cp_out = INTL_MASK_CONTR | cp;
    }
      else
    {
      *uca_w_l13 = &(coll_data->uca_w_l13[cp * exp_num]);
      *num_ce = coll_data->uca_num[cp];
      /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
    }
    }
  else
    {
      *uca_w_l13 = &uca_l13_max_weight;
      *num_ce = 1;
      /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
    }
}


/*
 * lang_get_uca_back_weight_l13() - returns pointer to array of CEs of
 *                  previous collatable element in string and
 *                  number of CEs in this array
 *
 *   return:
 *   coll_data(in): collation data
 *   use_contractions(in):
 *   str(in): string to get weights for
 *   size(in): size of string (bytes)
 *   uca_w_l13(out): pointer to weight array
 *   num_ce(out): number of Collation Elements
 *   str_next(out): pointer to next collatable element in string
 *   cp_out(out): bit field value : codepoint value, and if contraction is
 *        found than INTL_MASK_CONTR mask is set (MSB)
 */
static void
lang_get_uca_back_weight_l13 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str_start,
                  const unsigned char *str_last, UCA_L13_W ** uca_w_l13, int *num_ce,
                  unsigned char **str_prev, unsigned int *cp_out)
{
  unsigned int cp;
  const int alpha_cnt = coll_data->w_count;
  const int exp_num = coll_data->uca_exp_num;

  assert (str_prev != NULL);
  assert (cp_out != NULL);
  assert (str_start <= str_last);

  cp = intl_back_utf8_to_cp (str_start, str_last, str_prev);
  *cp_out = cp;

  if (cp < (unsigned int) alpha_cnt)
    {
      COLL_CONTRACTION *contr = NULL;
      unsigned int cp_prev;
      unsigned char *str_prev_prev = NULL;

      if (*str_prev >= str_start)
    {
      cp_prev = intl_back_utf8_to_cp (str_start, *str_prev, &str_prev_prev);

      if (use_contractions && coll_data->count_contr > 0 && cp_prev < (unsigned int) alpha_cnt
          && str_last - *str_prev >= coll_data->contr_min_size && cp >= coll_data->cp_first_contr_offset
          && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
          && ((contr = lang_get_contr_for_string (coll_data, str_prev_prev + 1,
                              CAST_BUFLEN (str_last - str_prev_prev), cp_prev)) != NULL))
        {
          assert (contr != NULL);
          *uca_w_l13 = contr->uca_w_l13;
          *num_ce = contr->uca_num;
          *str_prev = str_prev_prev;
          *cp_out = INTL_MASK_CONTR | cp_prev;
          return;
        }
    }

      *uca_w_l13 = &(coll_data->uca_w_l13[cp * exp_num]);
      *num_ce = coll_data->uca_num[cp];
      /* leave str_prev pointer to the one returned by intl_back_utf8_to_cp */
    }
  else
    {
      *uca_w_l13 = &uca_l13_max_weight;
      *num_ce = 1;
      /* leave str_prev pointer to the one returned by 'intl_back_utf8_to_cp' */
    }
}

/*
 * lang_get_uca_w_l4() - returns pointer to array of CEs of first collatable
 *           element in string (codepoint or contraction) and
 *           number of CEs in this array
 *   return:
 *   coll_data(in): collation data
 *   use_contractions(in):
 *   str(in): string to get weights for
 *   size(in): size of string (bytes)
 *   uca_w_l13(out): pointer to weight array
 *   num_ce(out): number of Collation Elements
 *   str_next(out): pointer to next collatable element in string
 *   cp_out(out): bit field value : codepoint value, and if contraction is
 *        found than INTL_MASK_CONTR mask is set (MSB)
 *
 */
static void
lang_get_uca_w_l4 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str, const int size,
           UCA_L4_W ** uca_w_l4, int *num_ce, unsigned char **str_next, unsigned int *cp_out)
{
  unsigned int cp;
  const int alpha_cnt = coll_data->w_count;
  const int exp_num = coll_data->uca_exp_num;

  assert (size > 0);

  cp = intl_utf8_to_cp (str, size, str_next);

  if (cp < (unsigned int) alpha_cnt)
    {
      COLL_CONTRACTION *contr = NULL;

      if (use_contractions && coll_data->count_contr > 0 && size >= coll_data->contr_min_size
      && cp >= coll_data->cp_first_contr_offset
      && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
      && ((contr = lang_get_contr_for_string (coll_data, str, size, cp)) != NULL))
    {
      assert (contr != NULL);
      *uca_w_l4 = contr->uca_w_l4;
      *num_ce = contr->uca_num;
      *str_next = (unsigned char *) str + contr->size;
      *cp_out = INTL_MASK_CONTR | cp;
    }
      else
    {
      *uca_w_l4 = &(coll_data->uca_w_l4[cp * exp_num]);
      *num_ce = coll_data->uca_num[cp];
      /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
    }
    }
  else
    {
      *uca_w_l4 = &uca_l4_max_weight;
      *num_ce = 1;
      /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
    }
}

/* retrieve UCA weight level:
 * l = level
 * i = position weight array
 * l13w = array of compressed weight for levels 1,2,3
 * l4w = array of weight level 4
 */
#define GET_UCA_WEIGHT(l, i, l13w, l4w)     \
  ((l == 0) ? (UCA_GET_L1_W (l13w[i])) :    \
   (l == 1) ? (UCA_GET_L2_W (l13w[i])) :    \
   (l == 2) ? (UCA_GET_L3_W (l13w[i])) : (l4w[i]))

#define INTL_CONTR_FOUND(v) (((v) & INTL_MASK_CONTR) == INTL_MASK_CONTR)
/*
 * lang_strmatch_utf8_uca_w_level() - string match or compare for UTF8
 *  collation employing full UCA weights (expansions and contractions)
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   coll_data(in) : collation data
 *   level(in) : current UCA level to compare
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   offset_next_level(in/out) : offset in bytes from which to start the
 *               compare; used to avoid compare between
 *               binary identical part in consecutive compare
 *               levels
 *   str1_match_size(out): size from str1 which is matched with str2
 */
static int
lang_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, const int level, bool is_match, const unsigned char *str1,
                const int size1, const unsigned char *str2, const int size2,
                const unsigned char *escape, const bool has_last_escape, int *offset_next_level,
                int *str1_match_size, bool ignore_trailing_space)
{
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  const unsigned char *str1_begin;
  unsigned char *str1_next, *str2_next;
  UCA_L13_W *uca_w_l13_1 = NULL;
  UCA_L13_W *uca_w_l13_2 = NULL;
  UCA_L4_W *uca_w_l4_1 = NULL;
  UCA_L4_W *uca_w_l4_2 = NULL;
  int num_ce1 = 0, num_ce2 = 0;
  int ce_index1 = 0, ce_index2 = 0;
  unsigned int w1 = 0, w2 = 0;

  bool compute_offset = false;
  unsigned int str1_cp_contr = 0, str2_cp_contr = 0;
  int cmp_offset = 0;

  int result = 0;

  assert (offset_next_level != NULL && *offset_next_level > -1);
  assert (level >= 0 && level <= 4);

  str1_end = str1 + size1;
  str2_end = str2 + size2;
  str1_begin = str1;

  if (level == 0)
    {
      assert (*offset_next_level == 0);
      compute_offset = true;
    }
  else
    {
      cmp_offset = *offset_next_level;
      if (cmp_offset > 0)
    {
      assert (cmp_offset <= size1);
      assert (cmp_offset <= size2);
      str1 += cmp_offset;
      str2 += cmp_offset;

    }
      compute_offset = false;
    }

  str1_next = (unsigned char *) str1;
  str2_next = (unsigned char *) str2;

  for (;;)
    {
    read_weights1:
      if (num_ce1 == 0)
    {
      str1 = str1_next;
      if (str1 >= str1_end)
        {
          goto read_weights2;
        }

      if (level == 3)
        {
          lang_get_uca_w_l4 (coll_data, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l4_1, &num_ce1,
                 &str1_next, &str1_cp_contr);
        }
      else
        {
          lang_get_uca_w_l13 (coll_data, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1,
                  &str1_next, &str1_cp_contr);
        }
      assert (num_ce1 > 0);

      ce_index1 = 0;
    }

    read_weights2:
      if (num_ce2 == 0)
    {
      int c_size;

      str2 = str2_next;
      if (str2 >= str2_end)
        {
          goto compare;
        }

      if (is_match && escape != NULL && intl_cmp_char (str2, escape, INTL_CODESET_UTF8, &c_size) == 0)
        {
          if (!(has_last_escape && str2 + c_size >= str2_end))
        {
          str2 += c_size;
        }
        }

      if (level == 3)
        {
          lang_get_uca_w_l4 (coll_data, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l4_2, &num_ce2,
                 &str2_next, &str1_cp_contr);
        }
      else
        {
          lang_get_uca_w_l13 (coll_data, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l13_2, &num_ce2,
                  &str2_next, &str2_cp_contr);
        }

      if (is_match && coll_data->uca_opt.sett_match_contr == MATCH_CONTR_BOUND_ALLOW
          && !INTL_CONTR_FOUND (str2_cp_contr) && INTL_CONTR_FOUND (str1_cp_contr) && ce_index1 == 0
          && str2_cp_contr == (str1_cp_contr & (~INTL_MASK_CONTR)))
        {
          /* re-compute weight of str1 without considering contractions */
          if (level == 3)
        {
          lang_get_uca_w_l4 (coll_data, false, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l4_1, &num_ce1,
                     &str1_next, &str1_cp_contr);
        }
          else
        {
          lang_get_uca_w_l13 (coll_data, false, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1,
                      &str1_next, &str1_cp_contr);
        }
          assert (num_ce1 > 0);
        }

      assert (num_ce2 > 0);

      ce_index2 = 0;
    }

      if (compute_offset)
    {
      if (ce_index1 == 0 && ce_index2 == 0)
        {
          if (!INTL_CONTR_FOUND (str1_cp_contr) && str1_cp_contr == str2_cp_contr)
        {
          assert (!INTL_CONTR_FOUND (str2_cp_contr));
          cmp_offset += CAST_BUFLEN (str1_next - str1);
        }
          else
        {
          compute_offset = false;
        }
        }
      else if (ce_index1 != ce_index2)
        {
          compute_offset = false;
        }
    }

    compare:
      if (num_ce1 == 0 && str1 >= str1_end)
    {
      /* str1 was consumed */
      if (num_ce2 == 0)
        {
          if (str2 >= str2_end)
        {
          /* both strings consumed and equal */
          assert (result == 0);
          goto exit;
        }
          else
        {
          if (is_match || !ignore_trailing_space)
            {
              result = -1;
              goto exit;
            }
          goto read_weights2;
        }
        }

      assert (num_ce2 > 0);
      if (is_match && *str2 == ASCII_SPACE)
        {
          /* trailing spaces are not matched */
          result = -1;
          goto exit;
        }

      if (!ignore_trailing_space)
        {
          result = -1;
          goto exit;
        }

      /* consume any remaining zero-weight values (skip them) from str2 */
      do
        {
          w2 = GET_UCA_WEIGHT (level, ce_index2, uca_w_l13_2, uca_w_l4_2);
          if (w2 != 0)
        {
          /* non-zero weight : strings are not equal */
          result = -1;
          goto exit;
        }
          ce_index2++;
          num_ce2--;
        }
      while (num_ce2 > 0);

      goto read_weights2;
    }

      if (num_ce2 == 0 && str2 >= str2_end)
    {
      if (is_match)
        {
          assert (result == 0);
          goto exit;
        }

      if (!ignore_trailing_space)
        {
          result = 1;
          goto exit;
        }

      /* consume any remaining zero-weight values (skip them) from str1 */
      while (num_ce1 > 0)
        {
          w1 = GET_UCA_WEIGHT (level, ce_index1, uca_w_l13_1, uca_w_l4_1);
          if (w1 != 0)
        {
          /* non-zero weight : strings are not equal */
          result = 1;
          goto exit;
        }
          ce_index1++;
          num_ce1--;
        }

      goto read_weights1;
    }

      w1 = GET_UCA_WEIGHT (level, ce_index1, uca_w_l13_1, uca_w_l4_1);
      w2 = GET_UCA_WEIGHT (level, ce_index2, uca_w_l13_2, uca_w_l4_2);

      /* ignore zero weights (unless character is space) */
      if (w1 == 0 && *str1 != ASCII_SPACE)
    {
      ce_index1++;
      num_ce1--;

      if (w2 == 0 && *str2 != ASCII_SPACE)
        {
          ce_index2++;
          num_ce2--;
        }

      goto read_weights1;
    }
      else if (w2 == 0 && *str2 != ASCII_SPACE)
    {
      ce_index2++;
      num_ce2--;

      goto read_weights1;
    }
      else if (w1 > w2)
    {
      result = 1;
      goto exit;
    }
      else if (w1 < w2)
    {
      result = -1;
      goto exit;
    }

      ce_index1++;
      ce_index2++;

      num_ce1--;
      num_ce2--;
    }

  if (str2 < str2_end)
    {
      assert (str1 == str1_end);
      if (ignore_trailing_space)
    {
      if (lang_str_utf8_trail_zero_weights_w_exp (coll_data, level, str2, CAST_BUFLEN (str2_end - str2)) != 0)
        {
          result = -1;
        }
    }
      else
    {
      result = -1;
    }
    }
  else if (str1 < str1_end)
    {
      assert (str2 == str2_end);
      if (ignore_trailing_space)
    {
      if (lang_str_utf8_trail_zero_weights_w_exp (coll_data, level, str1, CAST_BUFLEN (str1_end - str1)) != 0)
        {
          result = 1;
        }
    }
      else
    {
      result = 1;
    }
    }
  else
    {
      assert (str2 == str2_end && str1 == str1_end);

      if (num_ce1 > num_ce2)
    {
      result = 1;
    }
      else if (num_ce1 < num_ce2)
    {
      result = -1;
    }
    }

exit:
  if (is_match)
    {
      assert (str1_match_size != NULL);
      *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
    }

  if (level == 0)
    {
      *offset_next_level = cmp_offset;
    }
  return result;
}

/*
 * lang_mht2str_utf8_exp() -
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   coll_data(in) : collation data
 *   level(in) : current UCA level to compare
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   offset_next_level(in/out) : offset in bytes from which to start the
 *               compare; used to avoid compare between
 *               binary identical part in consecutive compare
 *               levels
 *   str1_match_size(out): size from str1 which is matched with str2
 */
static unsigned int
lang_mht2str_utf8_exp (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
{
  const unsigned char *str_end;
  unsigned char *str_next;
  const COLL_DATA *coll_data = &(lang_coll->coll);
  UCA_L13_W *uca_w_l13 = NULL;
  UCA_L4_W *uca_w_l4 = NULL;
  int num_ce = 0;
  int ce_index = 0;
  unsigned int w, cp;
  const int alpha_cnt = coll_data->w_count;
  const int exp_num = coll_data->uca_exp_num;
  unsigned int pseudo_key = 0;
  unsigned int level;
  int str_size;

  str_end = str + size;

  str_next = (unsigned char *) str;

  for (;;)
    {
      if (num_ce == 0)
    {
      str = str_next;
      if (str >= str_end)
        {
          break;
        }

      str_size = CAST_BUFLEN (str_end - str);
      cp = intl_utf8_to_cp (str, str_size, &str_next);

      if (cp < (unsigned int) alpha_cnt)
        {
          COLL_CONTRACTION *contr = NULL;

          if (coll_data->count_contr > 0 && str_size >= coll_data->contr_min_size
          && cp >= coll_data->cp_first_contr_offset
          && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
          && ((contr = lang_get_contr_for_string (coll_data, str, str_size, cp)) != NULL))
        {
          assert (contr != NULL);
          uca_w_l13 = contr->uca_w_l13;
          if (coll_data->uca_opt.sett_strength >= TAILOR_QUATERNARY)
            {
              uca_w_l4 = contr->uca_w_l4;
            }
          num_ce = contr->uca_num;
          str_next = (unsigned char *) str + contr->size;
        }
          else
        {
          uca_w_l13 = &(coll_data->uca_w_l13[cp * exp_num]);
          if (coll_data->uca_opt.sett_strength >= TAILOR_QUATERNARY)
            {
              uca_w_l4 = &(coll_data->uca_w_l4[cp * exp_num]);
            }
          num_ce = coll_data->uca_num[cp];
          /* leave next pointer to the value returned by 'intl_utf8_to_cp' */
        }
        }
      else
        {
          uca_w_l13 = &uca_l13_max_weight;
          if (coll_data->uca_opt.sett_strength >= TAILOR_QUATERNARY)
        {
          uca_w_l4 = &uca_l4_max_weight;
        }
          num_ce = 1;
          /* leave next pointer to the value returned by 'intl_utf8_to_cp' */
        }

      assert (num_ce > 0);

      ce_index = 0;
    }

      if (num_ce == 0 && str >= str_end)
    {
      break;
    }

      for (level = 0; level < (unsigned int) coll_data->uca_opt.sett_strength; level++)
    {
      w = GET_UCA_WEIGHT (level, ce_index, uca_w_l13, uca_w_l4);
      ADD_TO_HASH (pseudo_key, w);
    }

      ce_index++;
      num_ce--;
    }

  return pseudo_key;
}


/*
 * lang_back_strmatch_utf8_uca_w_level() - string match or compare for UTF8
 *  collation employing full UCA weights (expansions and contractions)
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   coll_data(in) : collation data
 *   level(in) : current UCA level to compare
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   offset_next_level(in/out) : offset in bytes from which to start the
 *               compare; used to avoid compare between
 *               binary identical part in consecutive compare
 *               levels
 *   str1_match_size(out): size from str1 which is matched with str2
 */
static int
lang_back_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, bool is_match, const unsigned char *str1,
                     const int size1, const unsigned char *str2, const int size2,
                     const unsigned char *escape, const bool has_last_escape, int *offset_next_level,
                     int *str1_match_size, bool ignore_trailing_space)
{
  const unsigned char *str1_start;
  const unsigned char *str2_start;
  const unsigned char *str1_last;
  const unsigned char *str2_last;
  unsigned char *str1_prev, *str2_prev;
  UCA_L13_W *uca_w_l13_1 = NULL;
  UCA_L13_W *uca_w_l13_2 = NULL;
  int num_ce1 = 0, num_ce2 = 0;
  int ce_index1 = -1, ce_index2 = -1;
  unsigned int w1 = 0, w2 = 0;
  unsigned int str1_cp_contr = 0, str2_cp_contr = 0;
  int result = 0;

  assert (offset_next_level != NULL && *offset_next_level > -1);

  str1_last = str1 + size1 - 1;
  str2_last = str2 + size2 - 1;
  str1_start = str1;
  str2_start = str2;

  while (*str1_last == ASCII_SPACE)
    {
      str1_last--;
    }

  while (*str2_last == ASCII_SPACE)
    {
      str2_last--;
    }

  str1_prev = (unsigned char *) str1_last;
  str2_prev = (unsigned char *) str2_last;

  for (;;)
    {
    read_weights1:
      if (ce_index1 < 0)
    {
      str1 = str1_prev;
      if (str1 < str1_start)
        {
          goto read_weights2;
        }

      lang_get_uca_back_weight_l13 (coll_data, true, str1_start, str1, &uca_w_l13_1, &num_ce1, &str1_prev,
                    &str1_cp_contr);

      assert (num_ce1 > 0);

      ce_index1 = num_ce1 - 1;
    }

    read_weights2:
      if (ce_index2 < 0)
    {
      int c_size;

      str2 = str2_prev;
      if (str2 < str2_start)
        {
          goto compare;
        }

      if (is_match && escape != NULL && !(has_last_escape && str2 == str2_last))
        {
          unsigned char *str2_prev_prev;

          (void) intl_back_utf8_to_cp (str2, str2_start, &str2_prev_prev);

          if (intl_cmp_char (str2_prev_prev + 1, escape, INTL_CODESET_UTF8, &c_size) == 0)
        {
          str2 = str2_prev_prev;
        }
        }

      lang_get_uca_back_weight_l13 (coll_data, true, str2_start, str2, &uca_w_l13_2, &num_ce2, &str2_prev,
                    &str2_cp_contr);

      assert (num_ce2 > 0);

      ce_index2 = num_ce2 - 1;

      if (is_match && coll_data->uca_opt.sett_match_contr == MATCH_CONTR_BOUND_ALLOW
          && !INTL_CONTR_FOUND (str2_cp_contr) && INTL_CONTR_FOUND (str1_cp_contr) && ce_index1 == num_ce1 - 1
          && str2_cp_contr == (str1_cp_contr & (~INTL_MASK_CONTR)))
        {
          /* re-compute weight of str1 without considering contractions */
          lang_get_uca_back_weight_l13 (coll_data, false, str1_start, str1, &uca_w_l13_1, &num_ce1, &str1_prev,
                        &str1_cp_contr);

          assert (num_ce1 > 0);
          ce_index1 = num_ce1 - 1;
        }
    }

    compare:
      if (ce_index1 < 0 && str1 < str1_start)
    {
      /* str1 was consumed */
      if (ce_index2 < 0)
        {
          if (str2 < str2_start)
        {
          /* both strings consumed and equal */
          assert (result == 0);
          goto exit;
        }
          else
        {
          if (is_match || !ignore_trailing_space)
            {
              result = -1;
              goto exit;
            }
          goto read_weights2;
        }
        }

      assert (ce_index2 >= 0);
      if (is_match || !ignore_trailing_space)
        {
          /* trailing spaces are not matched */
          result = -1;
          goto exit;
        }

      /* consume any remaining zero-weight values (skip them) from str2 */
      do
        {
          w2 = UCA_GET_L2_W (uca_w_l13_2[ce_index2]);
          if (w2 != 0)
        {
          /* non-zero weight : strings are not equal */
          result = -1;
          goto exit;
        }
          ce_index2--;
        }
      while (ce_index2 > 0);

      goto read_weights2;
    }

      if (ce_index2 < 0 && str2 < str2_start)
    {
      if (is_match)
        {
          assert (result == 0);
          goto exit;
        }

      if (!ignore_trailing_space)
        {
          result = 1;
          goto exit;
        }
      /* consume any remaining zero-weight values (skip them) from str1 */
      while (ce_index1 >= 0)
        {
          w1 = UCA_GET_L2_W (uca_w_l13_1[ce_index1]);
          if (w1 != 0)
        {
          /* non-zero weight : strings are not equal */
          result = 1;
          goto exit;
        }
          ce_index1--;
        }

      goto read_weights1;
    }

      assert (ce_index1 >= 0 && ce_index2 >= 0);

      w1 = UCA_GET_L2_W (uca_w_l13_1[ce_index1]);
      w2 = UCA_GET_L2_W (uca_w_l13_2[ce_index2]);

      /* ignore zero weights (unless character is space) */
      if (w1 == 0 && *str1 != ASCII_SPACE)
    {
      ce_index1--;

      if (w2 == 0 && *str2 != ASCII_SPACE)
        {
          ce_index2--;
        }

      goto read_weights1;
    }
      else if (w2 == 0 && *str2 != ASCII_SPACE)
    {
      ce_index2--;
      goto read_weights1;
    }
      else if (w1 > w2)
    {
      result = 1;
      goto exit;
    }
      else if (w1 < w2)
    {
      result = -1;
      goto exit;
    }

      ce_index1--;
      ce_index2--;
    }

  if (str1 > str1_start)
    {
      assert (str2 <= str2_start);
      result = 1;
    }
  else if (str2 > str2_start)
    {
      assert (str1 <= str1_start);
      result = -1;
    }
  else
    {
      if (ce_index1 > ce_index2)
    {
      result = 1;
      goto exit;
    }
      else if (ce_index1 < ce_index2)
    {
      result = -1;
      goto exit;
    }
    }

exit:
  if (is_match)
    {
      assert (str1_match_size != NULL);
      *str1_match_size = CAST_BUFLEN (str1_last - str1_start) + 1;
    }

  return result;
}

/*
 * lang_strcmp_utf8_uca() - string compare for UTF8 for a collation using
 *              full UCA weights (expansions and contractions)
 *   return:
 *   lang_coll(in):
 *   string1(in):
 *   size1(in):
 *   string2(in):
 *   size2(in):
 */
static int
lang_strcmp_utf8_uca (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
              const unsigned char *str2, const int size2, bool ignore_trailing_space)
{
  return lang_strmatch_utf8_uca_w_coll_data (&(lang_coll->coll), false, str1, size1, str2, size2, NULL, false, NULL,
                         ignore_trailing_space);
}

/*
 * lang_strmatch_utf8_uca() - string match for UTF8 for a collation using
 *                full UCA weights (expansions and contractions)
 *   return:
 *   lang_coll(in):
 *   is_match(in):
 *   string1(in):
 *   size1(in):
 *   string2(in):
 *   size2(in):
 *   escape(in):
 *   has_last_escape(in):
 *   str1_match_size(out):
 */
static int
lang_strmatch_utf8_uca (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, const int size1,
            const unsigned char *str2, const int size2, const unsigned char *escape,
            const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
{
  return lang_strmatch_utf8_uca_w_coll_data (&(lang_coll->coll), is_match, str1, size1, str2, size2, escape,
                         has_last_escape, str1_match_size, ignore_trailing_space);
}

/*
 * lang_strmatch_utf8_uca_w_coll_data() - string match/compare for UTF8 for a
 *   collation using full UCA weights (+ expansions and contractions)
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   coll_data(in):
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   str1_match_size(out): size from str1 which is matched with str2
 */
int
lang_strmatch_utf8_uca_w_coll_data (const COLL_DATA * coll_data, bool is_match, const unsigned char *str1,
                    const int size1, const unsigned char *str2, const int size2,
                    const unsigned char *escape, const bool has_last_escape, int *str1_match_size,
                    bool ignore_trailing_space)
{
  int res;
  int cmp_offset = 0;

  /* compare level 1 */
  res =
    lang_strmatch_utf8_uca_w_level (coll_data, 0, is_match, str1, size1, str2, size2, escape, has_last_escape,
                    &cmp_offset, str1_match_size, ignore_trailing_space);
  if (res != 0)
    {
      return res;
    }

  if (coll_data->uca_opt.sett_strength == TAILOR_PRIMARY)
    {
      if (coll_data->uca_opt.sett_caseLevel)
    {
      /* compare level 3 (casing) */
      res =
        lang_strmatch_utf8_uca_w_level (coll_data, 2, is_match, str1, size1, str2, size2, escape, has_last_escape,
                        &cmp_offset, str1_match_size, ignore_trailing_space);
      if (res != 0)
        {
          /* reverse order when caseFirst == UPPER */
          return (coll_data->uca_opt.sett_caseFirst == 1) ? -res : res;
        }
    }
      return 0;
    }

  assert (coll_data->uca_opt.sett_strength >= TAILOR_SECONDARY);

  /* compare level 2 */
  if (coll_data->uca_opt.sett_backwards)
    {
      int str1_level_2_size;

      if (is_match)
    {
      str1_level_2_size = *str1_match_size;
    }
      else
    {
      str1_level_2_size = size1;
    }
      if (str1_level_2_size > 0 && size2 > 0)
    {
      res =
        lang_back_strmatch_utf8_uca_w_level (coll_data, is_match, str1, str1_level_2_size, str2, size2, escape,
                         has_last_escape, &cmp_offset, str1_match_size, ignore_trailing_space);
    }
      else
    {
      res = (str1_level_2_size == size2) ? 0 : ((str1_level_2_size > size2) ? 1 : -1);
    }
    }
  else
    {
      res =
    lang_strmatch_utf8_uca_w_level (coll_data, 1, is_match, str1, size1, str2, size2, escape, has_last_escape,
                    &cmp_offset, str1_match_size, ignore_trailing_space);
    }

  if (res != 0)
    {
      return res;
    }

  if (coll_data->uca_opt.sett_strength == TAILOR_SECONDARY)
    {
      return 0;
    }

  /* compare level 3 */
  res =
    lang_strmatch_utf8_uca_w_level (coll_data, 2, is_match, str1, size1, str2, size2, escape, has_last_escape,
                    &cmp_offset, str1_match_size, ignore_trailing_space);
  if (res != 0)
    {
      /* reverse order when caseFirst == UPPER */
      return (coll_data->uca_opt.sett_caseFirst == 1) ? -res : res;
    }

  if (coll_data->uca_opt.sett_strength == TAILOR_TERTIARY)
    {
      return 0;
    }

  /* compare level 4 */
  res =
    lang_strmatch_utf8_uca_w_level (coll_data, 3, is_match, str1, size1, str2, size2, escape, has_last_escape,
                    &cmp_offset, str1_match_size, ignore_trailing_space);
  if (res != 0)
    {
      /* reverse order when caseFirst == UPPER */
      return res;
    }

  return 0;
}

/*
 * lang_str_utf8_trail_zero_weights() - cheks if remaining characters of an
 *                  UTF-8 string have all zero weights
 *
 *   return: 0 if all remaining characters have zero weight, 1 otherwise
 *   lang_coll(in): collation data
 *   str(in):
 *   size(in):
 */
static int
lang_str_utf8_trail_zero_weights (const LANG_COLLATION * lang_coll, const unsigned char *str, int size)
{
  unsigned char *str_next;
  unsigned int cp;
  unsigned int *weight = (lang_coll->built_in) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;

  while (size > 0)
    {
      cp = intl_utf8_to_cp (str, size, &str_next);

      if (cp >= (unsigned int) lang_coll->coll.w_count || weight[cp] != 0)
    {
      return 1;
    }
      size -= CAST_BUFLEN (str_next - str);
      str = str_next;
    }

  return 0;
}

/*
 * lang_str_utf8_trail_zero_weights_w_exp() - cheks if remaining characters of
 *                   an UTF-8 string have all zero weights
 *                   collation with expansions.
 *
 *   return: 0 if all remaining characters have zero weight, 1 otherwise
 *   coll_data(in): collation data
 *   level(in):current level of matching
 *   str(in):
 *   size(in):
 */
static int
lang_str_utf8_trail_zero_weights_w_exp (const COLL_DATA * coll_data, const int level, const unsigned char *str,
                    int size)
{
  UCA_L13_W *uca_w_l13 = NULL;
  UCA_L4_W *uca_w_l4 = NULL;
  unsigned char *str_next;
  int num_ce = 0;
  int ce_index = 0;
  unsigned int dummy;

  str_next = (unsigned char *) str;
  while (size > 0)
    {
      if (num_ce == 0)
    {
      str = str_next;

      if (level == 3)
        {
          lang_get_uca_w_l4 (coll_data, true, str, size, &uca_w_l4, &num_ce, &str_next, &dummy);
        }
      else
        {
          lang_get_uca_w_l13 (coll_data, true, str, size, &uca_w_l13, &num_ce, &str_next, &dummy);
        }
      assert (num_ce > 0);

      ce_index = 0;
      size -= CAST_BUFLEN (str_next - str);
      str = str_next;
    }

      if (GET_UCA_WEIGHT (level, ce_index, uca_w_l13, uca_w_l4) != 0)
    {
      return 1;
    }

      ce_index++;
      num_ce--;
    }

  return 0;
}

/*
 * lang_next_coll_char_utf8() - computes the next collatable char
 *   return: size in bytes of the next collatable char
 *   lang_coll(on): collation
 *   seq(in): pointer to current char
 *   size(in): available bytes for current char
 *   next_seq(in/out): buffer to return next alphabetical char
 *   len_next(in/out): length in chars of next char (always 1 for this func)
 *
 *  Note :  It is assumed that the input buffer (cur_char) contains at least
 *      one UTF-8 character.
 *      The calling function should take into account cases when 'next'
 *      character is encoded on greater byte size.
 */
static int
lang_next_coll_char_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
              unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
{
  unsigned int cp_alpha_char, cp_next_alpha_char;
  const int alpha_cnt = lang_coll->coll.w_count;
  const unsigned int *next_alpha_char = (ignore_trailing_space) ? lang_coll->coll.next_cp_ti : lang_coll->coll.next_cp;

  unsigned char *dummy = NULL;

  assert (seq != NULL);
  assert (next_seq != NULL);
  assert (len_next != NULL);
  assert (size > 0);

  cp_alpha_char = intl_utf8_to_cp (seq, size, &dummy);

  if (cp_alpha_char < (unsigned int) alpha_cnt)
    {
      cp_next_alpha_char = next_alpha_char[cp_alpha_char];
    }
  else
    {
      cp_next_alpha_char = cp_alpha_char + 1;
    }

  *len_next = 1;

  return intl_cp_to_utf8 (cp_next_alpha_char, next_seq);
}

/*
 * lang_next_coll_seq_utf8_w_contr() - computes the next collatable sequence
 *                     for locales having contractions
 *   return: size in bytes of the next collatable sequence
 *   lang_coll(on): collation
 *   seq(in): pointer to current sequence
 *   size(in): available bytes for current sequence
 *   next_seq(in/out): buffer to return next collatable sequence
 *   len_next(in/out): length in chars of next sequence
 *
 *  Note :  It is assumed that the input buffer (cur_char) contains at least
 *      one UTF-8 character.
 */
static int
lang_next_coll_seq_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
                 unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
{
  unsigned int cp_first_char;
  unsigned int next_seq_id;
  unsigned int cp_next_char;
  const int alpha_cnt = lang_coll->coll.w_count;
  const unsigned int *next_alpha_char = (ignore_trailing_space) ? lang_coll->coll.next_cp_ti : lang_coll->coll.next_cp;

  unsigned char *dummy = NULL;
  COLL_CONTRACTION *contr = NULL;

  assert (seq != NULL);
  assert (next_seq != NULL);
  assert (len_next != NULL);
  assert (size > 0);

  cp_first_char = intl_utf8_to_cp (seq, size, &dummy);

  if (cp_first_char < (unsigned int) alpha_cnt)
    {
      if (size >= lang_coll->coll.contr_min_size && cp_first_char >= lang_coll->coll.cp_first_contr_offset
      && cp_first_char < (lang_coll->coll.cp_first_contr_offset + lang_coll->coll.cp_first_contr_count))
    {
      contr = lang_get_contr_for_string (&(lang_coll->coll), seq, size, cp_first_char);
    }

      if (contr == NULL)
    {
      next_seq_id = next_alpha_char[cp_first_char];
    }
      else
    {
      next_seq_id = contr->next;
    }

      if (INTL_IS_NEXT_CONTR (next_seq_id))
    {
      contr = &(lang_coll->coll.contr_list[INTL_GET_NEXT_CONTR_ID (next_seq_id)]);
      memcpy (next_seq, contr->c_buf, contr->size);
      *len_next = contr->cp_count;
      return contr->size;
    }
      else
    {
      cp_next_char = next_seq_id;
    }
    }
  else
    {
      /* codepoint is not collated in current locale */
      cp_next_char = cp_first_char + 1;
    }

  *len_next = 1;
  return intl_cp_to_utf8 (cp_next_char, next_seq);
}

/*
 * lang_split_key_iso() - finds the prefix key between two strings (ISO
 *            charset with cases sensitive collation)
 *
 *   return:  error status
 *   lang_coll(in):
 *   is_desc(in):
 *   str1(in):
 *   size1(in):
 *   str2(in):
 *   size2(in):
 *   key(out): key
 *   byte_size(out): size in bytes of key
 *
 *  Note : this function is used by index prefix computation
 */
static int
lang_split_key_iso (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
            const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
            bool ignore_trailing_space)
{
  const unsigned char *str1_end, *str2_end;
  const unsigned char *str1_begin, *str2_begin;
  int key_size;
  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;

  assert (key != NULL);
  assert (byte_size != NULL);

  str1_end = str1 + size1;
  str2_end = str2 + size2;
  str1_begin = str1;
  str2_begin = str2;

  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
    {
      if (*str1 != *str2)
    {
      assert ((!is_desc && *str1 < *str2) || (is_desc && *str1 > *str2));
      break;
    }
    }

  if (!is_desc)
    {               /* normal index */
      *key = (unsigned char *) str2_begin;

      /* common part plus a character with non-zero weight */
      while (str2 < str2_end)
    {
      if (weight[*str2++] != ZERO)
        {
          break;
        }
    }
      assert (str2 <= str2_end);
      key_size = CAST_BUFLEN (str2 - str2_begin);
    }
  else
    {               /* reverse index */
      assert (is_desc);

      /* common part plus a character with non-zero weight from str1 */
      while (str1 < str1_end)
    {
      if (weight[*str1++] != ZERO)
        {
          break;
        }
    }

      if (str1 >= str1_end)
    {
      /* str1 exhaused or at last char, we use str2 as key */
      *key = (unsigned char *) str2_begin;
      key_size = CAST_BUFLEN (str2_end - str2_begin);
    }
      else
    {
      assert (str1 < str1_end);
      *key = (unsigned char *) str1_begin;
      key_size = CAST_BUFLEN (str1 - str1_begin);
    }
    }

  *byte_size = key_size;

  return NO_ERROR;
}

/*
 * lang_split_key_byte() - finds the prefix key :
 *             collations  with byte-characters (ISO charset) and
 *             weight values (e.g. case insensitive).
 *
 *   return:  error status
 *   lang_coll(in):
 *   is_desc(in):
 *   str1(in):
 *   size1(in):
 *   str2(in):
 *   size2(in):
 *   key(out): key
 *   byte_size(out): size in bytes of key
 *
 *  Note : this function is used by index prefix computation
 */
static int
lang_split_key_byte (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
             const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
             bool ignore_trailing_space)
{
  const unsigned char *str1_end, *str2_end;
  const unsigned char *str1_begin, *str2_begin;
  unsigned int w1, w2;
  int key_size;
  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;

  assert (key != NULL);
  assert (byte_size != NULL);

  str1_end = str1 + size1;
  str2_end = str2 + size2;
  str1_begin = str1;
  str2_begin = str2;

  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
    {
      w1 = weight[*str1];
      w2 = weight[*str2];

      if (w1 != w2)
    {
      assert ((!is_desc && w1 < w2) || (is_desc && w1 > w2));
      break;
    }
    }

  if (!is_desc)
    {               /* normal index */
      *key = (unsigned char *) str2_begin;

      /* common part plus a character with non-zero weight */
      while (str2 < str2_end)
    {
      if (weight[*str2++] != 0)
        {
          break;
        }
    }
      key_size = CAST_BUFLEN (str2 - str2_begin);
    }
  else
    {               /* reverse index */
      assert (is_desc);

      /* common part plus a character with non-zero weight from str1 */
      while (str1 < str1_end)
    {
      if (weight[*str1++] != 0)
        {
          break;
        }
    }

      if (str1 >= str1_end)
    {
      /* str1 exhaused or at last char, we use str2 as key */
      *key = (unsigned char *) str2_begin;
      key_size = CAST_BUFLEN (str2_end - str2_begin);
    }
      else
    {
      assert (str1 < str1_end);
      *key = (unsigned char *) str1_begin;
      key_size = CAST_BUFLEN (str1 - str1_begin);
    }
    }

  *byte_size = key_size;

  return NO_ERROR;
}

/*
 * lang_split_key_utf8() - finds the prefix key; UTF-8 collation with
 *             contractions but without expansions
 *
 *   return:  error status
 *   lang_coll(in):
 *   is_desc(in):
 *   str1(in):
 *   size1(in):
 *   str2(in):
 *   size2(in):
 *   key(out): key
 *   byte_size(out): size in bytes of key
 *
 *  Note : this function is used by index prefix computation
 */
static int
lang_split_key_utf8 (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
             const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
             bool ignore_trailing_space)
{
  const unsigned char *str1_end, *str2_end;
  const unsigned char *str1_begin, *str2_begin;
  unsigned char *str1_next, *str2_next;
  unsigned int w1, w2;
  int key_size;
  const COLL_DATA *coll = &(lang_coll->coll);

  assert (key != NULL);
  assert (byte_size != NULL);

  str1_end = str1 + size1;
  str2_end = str2 + size2;
  str1_begin = str1;
  str2_begin = str2;

  for (; str1 < str1_end && str2 < str2_end;)
    {
      w1 = lang_get_w_first_el (coll, str1, CAST_BUFLEN (str1_end - str1), &str1_next, ignore_trailing_space);
      w2 = lang_get_w_first_el (coll, str2, CAST_BUFLEN (str2_end - str2), &str2_next, ignore_trailing_space);

      if (w1 != w2)
    {
      assert ((!is_desc && w1 < w2) || (is_desc && w1 > w2));
      break;
    }

      str1 = str1_next;
      str2 = str2_next;
    }

  if (!is_desc)
    {               /* normal index */
      *key = (unsigned char *) str2_begin;

      /* common part plus a character with non-zero weight from str2 */
      while (str2 < str2_end)
    {
      w2 = lang_get_w_first_el (coll, str2, CAST_BUFLEN (str2_end - str2), &str2_next, ignore_trailing_space);
      str2 = str2_next;
      if (w2 != 0)
        {
          break;
        }
    }

      assert (str2 <= str2_end);
      key_size = CAST_BUFLEN (str2 - str2_begin);
    }
  else
    {               /* reverse index */
      assert (is_desc);
      /* common part plus a character with non-zero weight from str1 */
      while (str1 < str1_end)
    {
      w1 = lang_get_w_first_el (coll, str1, CAST_BUFLEN (str1_end - str1), &str1_next, ignore_trailing_space);
      str1 = str1_next;
      if (w1 != 0)
        {
          break;
        }
    }

      if (str1 >= str1_end)
    {
      /* str1 exhaused or at last char, we use str2 as key */
      *key = (unsigned char *) str2_begin;
      key_size = CAST_BUFLEN (str2_end - str2_begin);
    }
      else
    {
      assert (str1 < str1_end);
      *key = (unsigned char *) str1_begin;
      key_size = CAST_BUFLEN (str1 - str1_begin);
    }
    }

  *byte_size = key_size;

  return NO_ERROR;
}

/*
 * lang_split_key_w_exp() - finds the prefix key for UTF-8 strings and
 *              collation with expansions
 *
 *   return:  error status
 *   lang_coll(in):
 *   is_desc(in):
 *   str1(in):
 *   size1(in):
 *   str2(in):
 *   size2(in):
 *   key(out): key
 *   byte_size(out): size in bytes in key
 *
 *  Note : this function is used by index prefix computation
 */
static int
lang_split_key_w_exp (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
              const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
              bool ignore_trailing_space)
{
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  unsigned char *str1_next, *str2_next;
  unsigned char *str1_begin, *str2_begin;
  UCA_L13_W *uca_w_l13_1 = NULL;
  UCA_L13_W *uca_w_l13_2 = NULL;
  int num_ce1 = 0, num_ce2 = 0;
  int ce_index1 = 0, ce_index2 = 0;
  unsigned int w1 = 0, w2 = 0;
  const COLL_DATA *cd = &(lang_coll->coll);
  unsigned int dummy;
  int key_size;
  bool force_key = false;

  assert (key != NULL);
  assert (byte_size != NULL);

  str1_begin = str1_next = (unsigned char *) str1;
  str2_begin = str2_next = (unsigned char *) str2;

  str1_end = str1 + size1;
  str2_end = str2 + size2;

  /* Regular string compare in collation with expansions requires multiple passes up to the UCA level of collation or
   * until a weight difference Key prefix algorithm takes into account only level 1 of weight */
  for (;;)
    {
    read_weights1:
      if (num_ce1 == 0)
    {
      str1 = str1_next;
      if (str1 >= str1_end)
        {
          goto read_weights2;
        }

      lang_get_uca_w_l13 (cd, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1, &str1_next,
                  &dummy);
      assert (num_ce1 > 0);

      ce_index1 = 0;
    }

    read_weights2:
      if (num_ce2 == 0)
    {
      str2 = str2_next;
      if (str2 >= str2_end)
        {
          goto compare;
        }

      lang_get_uca_w_l13 (cd, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l13_2, &num_ce2, &str2_next,
                  &dummy);

      assert (num_ce2 > 0);

      ce_index2 = 0;
    }

    compare:
      if ((num_ce1 == 0 && str1 >= str1_end) || (num_ce2 == 0 && str2 >= str2_end))
    {
      force_key = true;
      break;
    }

      w1 = UCA_GET_L1_W (uca_w_l13_1[ce_index1]);
      w2 = UCA_GET_L1_W (uca_w_l13_2[ce_index2]);

      /* ignore zero weights (unless character is space) */
      if (w1 == 0 && *str1 != ASCII_SPACE)
    {
      ce_index1++;
      num_ce1--;

      if (w2 == 0 && *str2 != ASCII_SPACE)
        {
          ce_index2++;
          num_ce2--;
        }

      goto read_weights1;
    }
      else if (w2 == 0 && *str2 != ASCII_SPACE)
    {
      ce_index2++;
      num_ce2--;

      goto read_weights1;
    }
      else if (w1 != w2)
    {
      assert ((is_desc && w1 > w2) || (!is_desc && w1 < w2));
      break;
    }

      assert (w1 == w2);

      ce_index1++;
      ce_index2++;

      num_ce1--;
      num_ce2--;
    }

  if (force_key)
    {
      *key = str2_begin;
      *byte_size = size2;
      return NO_ERROR;
    }

  if (!is_desc)
    {               /* normal index */
      *key = (unsigned char *) str2_begin;

      /* common part plus a character with non-zero weight */
      while (str2 < str2_end)
    {
      lang_get_uca_w_l13 (cd, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l13_2, &num_ce2, &str2_next,
                  &dummy);
      str2 = str2_next;

      if (UCA_GET_L1_W (uca_w_l13_2[0]) != 0)
        {
          break;
        }
    }

      assert (str2 <= str2_end);
      key_size = CAST_BUFLEN (str2 - str2_begin);
    }
  else
    {               /* reverse index */
      assert (is_desc);
      /* common part plus a character with non-zero weight from str1 */
      while (str1 < str1_end)
    {
      lang_get_uca_w_l13 (cd, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1, &str1_next,
                  &dummy);
      str1 = str1_next;

      if (UCA_GET_L1_W (uca_w_l13_1[0]) != 0)
        {
          break;
        }
    }

      if (str1 >= str1_end)
    {
      /* str1 exhaused or at last char, we use str2 as key */
      *key = (unsigned char *) str2_begin;
      key_size = CAST_BUFLEN (str2_end - str2_begin);
    }
      else
    {
      assert (str1 < str1_end);
      *key = (unsigned char *) str1_begin;
      key_size = CAST_BUFLEN (str1 - str1_begin);
    }
    }

  *byte_size = key_size;

  return NO_ERROR;
}

/*
 * lang_split_key_euckr() - finds the prefix key for EUC-KR collation
 *
 *   return:  error status
 *   lang_coll(in):
 *   is_desc(in):
 *   str1(in):
 *   size1(in):
 *   str2(in):
 *   size2(in):
 *   key(out): key
 *   byte_size(out): size in bytes in key
 *
 *  Note : this function is used by index prefix computation
 */
static int
lang_split_key_euckr (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
              const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
              bool ignore_trailing_space)
{
  const unsigned char *str1_next, *str2_next;
  int key_size, char1_size, char2_size;
  const unsigned char *str1_end, *str2_end;
  const unsigned char *str1_begin, *str2_begin;
  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;

  assert (key != NULL);
  assert (byte_size != NULL);

  str1_end = str1 + size1;
  str2_end = str2 + size2;
  str1_begin = str1;
  str2_begin = str2;

  for (; str1 < str1_end && str2 < str2_end;)
    {
      str1_next = intl_nextchar_euc (str1, &char1_size);
      str2_next = intl_nextchar_euc (str2, &char2_size);

      if (char1_size != char2_size || memcmp (str1, str2, char1_size) != 0)
    {
      break;
    }

      str1 = str1_next;
      str2 = str2_next;
    }

  if (!is_desc)
    {               /* normal index */
      *key = (unsigned char *) str2_begin;

      /* common part plus a character with non-zero weight */
      while (str2 < str2_end)
    {
      bool is_zero_weight = false;
      str2_next = intl_nextchar_euc (str2, &char2_size);
      if (*str2 == ASCII_SPACE || *str2 == 0 || (*str2 == EUC_SPACE && char2_size == 2 && *(str2 + 1) == EUC_SPACE))
        {
          is_zero_weight = (weight[SPACE] == 0);
        }
      str2 = str2_next;
      if (!is_zero_weight)
        {
          break;
        }
    }

      assert (str2 <= str2_end);
      key_size = CAST_BUFLEN (str2 - str2_begin);
    }
  else
    {               /* reverse index */
      assert (is_desc);

      /* common part plus a character with non-zero weight from str1 */
      while (str1 < str1_end)
    {
      bool is_zero_weight = false;
      str1_next = intl_nextchar_euc (str1, &char1_size);
      if (*str1 == ASCII_SPACE || *str1 == 0 || (*str1 == EUC_SPACE && char1_size == 2 && *(str1 + 1) == EUC_SPACE))
        {
          is_zero_weight = (weight[SPACE] == 0);
        }
      str1 = str1_next;
      if (!is_zero_weight)
        {
          break;
        }
    }

      if (str1 >= str1_end)
    {
      /* str1 exhaused or at last char, we use str2 as key */
      *key = (unsigned char *) str2_begin;
      key_size = CAST_BUFLEN (str2_end - str2_begin);
    }
      else
    {
      assert (str1 < str1_end);
      *key = (unsigned char *) str1_begin;
      key_size = CAST_BUFLEN (str1 - str1_begin);
    }
    }

  *byte_size = key_size;

  return NO_ERROR;
}

/*
 * English Locale Data
 */


/*
 * lang_initloc_en () - init locale data for English language
 *   return:
 */
static void
lang_initloc_en_iso88591 (LANG_LOCALE_DATA * ld)
{
  assert (ld != NULL);

  coll_Iso_binary.default_lang = ld;
  coll_Iso88591_en_cs.default_lang = ld;
  coll_Iso88591_en_ci.default_lang = ld;

  ld->is_initialized = true;
}

/*
 * lang_initloc_en_binary () - init locale data for English language
 *   return:
 */
static void
lang_initloc_en_binary (LANG_LOCALE_DATA * ld)
{
  assert (ld != NULL);

  coll_Binary.default_lang = ld;

  ld->is_initialized = true;
}

/*
 * lang_init_coll_en_cs () - init collation for English case sensitive
 *               on no matter charset (iso88591, utf8, euckr)
 *               with optional ts (trailing space sensitive)
 *   return:
 */
static void
lang_init_coll_en_cs (LANG_COLLATION * lang_coll)
{
  assert (lang_coll != NULL);

  if (!(lang_coll->need_init))
    {
      return;
    }

  /* init collation data for English case sensitive (no matter the charset) with optional ts (trailing space sensitive) */
  static bool is_common_en_cs_init =[](COLL_DATA * coll_data) {
    int i;
    for (i = 0; i < coll_data->w_count; i++)
      {
    coll_data->weights_ti[i] = coll_data->weights[i] = i;
    coll_data->next_cp_ti[i] = coll_data->next_cp[i] = i + 1;
      }

    coll_data->weights_ti[32] = 0;
    coll_data->next_cp_ti[32] = 1;
    return true;
  }
  (&lang_coll->coll);

  lang_coll->need_init = false;

  /* Notice:
   * This ensures the variable is not optimized away, even though it does not change the functional logic of the code
   * Please do not delete the following two lines.
   */
  (void) is_common_en_cs_init;  // Dummy Reference  
  *(volatile bool *) &is_common_en_cs_init;
}

/*
 * lang_init_coll_en_ci () - init collation for English case insensitive
 *               on no matter charset (iso88591, utf8, euckr)
 *               with optional ts (trailing space sensitive)
 *   return:
 */
static void
lang_init_coll_en_ci (LANG_COLLATION * lang_coll)
{
  assert (lang_coll != NULL);

  if (!(lang_coll->need_init))
    {
      return;
    }

  /* init collation data for English case insensitive (no matter the charset) with optional ts (trailing space sensitive) */
  static bool is_common_en_ci_init =[](COLL_DATA * coll_data) {
    int i;
    for (i = 0; i < coll_data->w_count; i++)
      {
    coll_data->weights_ti[i] = coll_data->weights[i] = i;
    coll_data->next_cp_ti[i] = coll_data->next_cp[i] = i + 1;
      }

    for (i = 'a'; i <= (int) 'z'; i++)
      {
    coll_data->weights_ti[i] = coll_data->weights[i] = i - ('a' - 'A');
    coll_data->next_cp_ti[i] = coll_data->next_cp[i] = i + 1 - ('a' - 'A');
      }

    coll_data->next_cp['z'] = coll_data->next_cp['Z'];
    coll_data->next_cp['a' - 1] = coll_data->next_cp['A' - 1];

    coll_data->next_cp_ti['z'] = coll_data->next_cp_ti['Z'];
    coll_data->next_cp_ti['a' - 1] = coll_data->next_cp_ti['A' - 1];

    /* for ignore trailing space */
    coll_data->weights_ti[32] = 0;
    coll_data->next_cp_ti[32] = 1;
    return true;
  }
  (&lang_coll->coll);

  lang_coll->need_init = false;

  /* Notice:
   * This ensures the variable is not optimized away, even though it does not change the functional logic of the code
   * Please do not delete the following two lines.
   */
  (void) is_common_en_ci_init;  // Dummy Reference  
  *(volatile bool *) &is_common_en_ci_init;
}

/*
 * lang_initloc_en () - init locale data for English language
 *   return:
 */
static void
lang_initloc_en_utf8 (LANG_LOCALE_DATA * ld)
{
  int i;

  assert (ld != NULL);

  assert (ld->default_lang_coll != NULL);

  /* init alphabet */
  for (i = 0; i < LANG_CHAR_COUNT_EN; i++)
    {
      lang_upper_EN[i] = i;
      lang_lower_EN[i] = i;
    }

  for (i = (int) 'a'; i <= (int) 'z'; i++)
    {
      lang_upper_EN[i] = i - ('a' - 'A');
      lang_lower_EN[i - ('a' - 'A')] = i;
    }

  /* other initializations to follow here */
  coll_Utf8_binary.default_lang = ld;
  coll_Utf8_en_cs.default_lang = ld;
  coll_Utf8_en_ci.default_lang = ld;

  ld->is_initialized = true;
}

/*
 * lang_fastcmp_byte () - compare two character strings of ISO-8859-1 and etc
 *                 codeset
 *
 * Arguments:
 *    lang_coll: collation data
 *      string1: 1st character string
 *        size1: size of 1st string
 *      string2: 2nd character string
 *        size2: size of 2nd string
 *
 * Returns:
 *   Greater than 0 if string1 > string2
 *   Equal to 0     if string1 = string2
 *   Less than 0    if string1 < string2
 *
 * Errors:
 *
 * Note:
 *   This function is similar to strcmp(3) or bcmp(3). It is designed to
 *   follow SQL_TEXT character set collation. Padding character(space ' ') is
 *   the smallest character in the set. (e.g.) "ab z" < "ab\t1"
 *
 */

static int
lang_fastcmp_byte (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
           const unsigned char *string2, const int size2, bool ignore_trailing_space)
{
  int n, i, cmp;
  unsigned int c1, c2;
  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;


  n = size1 < size2 ? size1 : size2;
  for (i = 0, cmp = 0; i < n && cmp == 0; i++)
    {
      c1 = *string1++;
      if (c1 == SPACE)
    {
      c1 = ZERO;
    }
      else
    {
      c1 = weight[c1];
    }

      c2 = *string2++;
      if (c2 == SPACE)
    {
      c2 = ZERO;
    }
      else
    {
      c2 = weight[c2];
    }

      cmp = c1 - c2;
    }

  if (cmp || size1 == size2)
    {
      return cmp;
    }

  if (!ignore_trailing_space && size1 != size2)
    {
      return size1 - size2;
    }

  c1 = c2 = ZERO;
  if (size1 < size2)
    {
      n = size2 - size1;
      for (i = 0; i < n && cmp == 0; i++)
    {
      c2 = weight[*string2++];
      cmp = c1 - c2;
    }
    }
  else
    {
      n = size1 - size2;
      for (i = 0; i < n && cmp == 0; i++)
    {
      c1 = weight[*string1++];
      cmp = c1 - c2;
    }
    }
  return cmp;
}

/*
 * lang_strmatch_byte () - match or compare two character strings of
 *                  ISO-8859-1 codeset
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   lang_coll(in) : collation data
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   str1_match_size(out): size from str1 which is matched with str2
 */
static int
lang_strmatch_byte (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
            const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
            int *str1_match_size, bool ignore_trailing_space)
{
  unsigned int c1, c2;
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  const unsigned char *str1_begin;
  const int alpha_cnt = lang_coll->coll.w_count;
  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;

  str1_begin = str1;
  str1_end = str1 + size1;
  str2_end = str2 + size2;
  for (; str1 < str1_end && str2 < str2_end;)
    {
      assert (str1_end - str1 > 0);
      assert (str2_end - str2 > 0);

      c1 = *str1++;
      if (c1 == SPACE)
    {
      c1 = ZERO;
    }

      c2 = *str2++;
      if (c2 == SPACE)
    {
      c2 = ZERO;
    }

      if (is_match && escape != NULL && c2 == *escape)
    {
      if (!(has_last_escape && str2 + 1 >= str2_end))
        {
          c2 = *str2++;
          if (c2 == SPACE)
        {
          c2 = ZERO;
        }
        }
    }

      if (c1 < (unsigned int) alpha_cnt)
    {
      c1 = weight[c1];
    }

      if (c2 < (unsigned int) alpha_cnt)
    {
      c2 = weight[c2];
    }

      if (c1 != c2)
    {
      return c1 - c2;
    }
    }

  size1 = CAST_BUFLEN (str1_end - str1);
  size2 = CAST_BUFLEN (str2_end - str2);

  assert (size1 == 0 || size2 == 0);

  if (is_match)
    {
      assert (str1_match_size != NULL);
      *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
    }

  if (size1 == size2)
    {
      return 0;
    }
  else if (size2 > 0)
    {
      if (is_match)
    {
      /* pattern string should be exhausted for a full match */
      return -1;
    }
      for (; str2 < str2_end;)
    {
      c2 = weight[*str2++];
      if (c2)
        {
          return -1;
        }
    }
    }
  else
    {
      assert (size1 > 0);

      if (is_match)
    {
      return 0;
    }

      for (; str1 < str1_end;)
    {
      c1 = weight[*str1++];
      if (c1)
        {
          return 1;
        }
    }
    }
  return 0;
}

/*
 * lang_mht2str_default () -
 *   return:
 *   lang_coll(in):
 *   str(in):
 *   size(in):
 *
 */
static unsigned int
lang_mht2str_default (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
{
  return mht_2str_pseudo_key (str, size);
}

/*
 * lang_mht2str_byte () -
 *   return:
 *   lang_coll(in):
 *   str(in):
 *   size(in):
 *
 */
static unsigned int
lang_mht2str_byte (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
{
  const unsigned char *str_end = str + size;
  unsigned int pseudo_key = 0;
  unsigned int w;

  for (; str < str_end; str++)
    {
      w = lang_coll->coll.weights[*str];
      ADD_TO_HASH (pseudo_key, w);
    }

  return pseudo_key;
}

/*
 * lang_next_alpha_char_iso88591() - computes the next alphabetical char
 *   return: size in bytes of the next alphabetical char
 *   lang_coll(in): collation data
 *   seq(in): pointer to current char
 *   size(in): size in bytes for seq
 *   next_seq(in/out): buffer to return next alphabetical char
 *   len_next(in/out): length in chars for nex_seq
 *
 */
static int
lang_next_alpha_char_iso88591 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
                   unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
{
  assert (seq != NULL);
  assert (next_seq != NULL);
  assert (len_next != NULL);
  assert (size > 0);

  *next_seq = (*seq == 0xff) ? 0xff : (*seq + 1);
  *len_next = 1;
  return 1;
}

/*
 * lang_next_coll_byte() - computes the next collatable char
 *   return: size in bytes of the next collatable char
 *   lang_coll(on): collation
 *   seq(in): pointer to current char
 *   size(in): available bytes for current char
 *   next_seq(in/out): buffer to return next alphabetical char
 *   len_next(in/out): length in chars of next char (always 1 for this func)
 *
 *  Note :  This assumes the weights and next col are define at byte level.
 */
static int
lang_next_coll_byte (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
             unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
{
  unsigned int cp_alpha_char, cp_next_alpha_char;
  const int alpha_cnt = lang_coll->coll.w_count;
  const unsigned int *next_alpha_char = (ignore_trailing_space) ? lang_coll->coll.next_cp_ti : lang_coll->coll.next_cp;

  assert (seq != NULL);
  assert (next_seq != NULL);
  assert (len_next != NULL);
  assert (size > 0);

  cp_alpha_char = (unsigned int) *seq;

  if (cp_alpha_char < (unsigned int) alpha_cnt)
    {
      cp_next_alpha_char = next_alpha_char[cp_alpha_char];
    }
  else
    {
      cp_next_alpha_char = (cp_alpha_char == 0xff) ? 0xff : (cp_alpha_char + 1);
    }

  assert (cp_next_alpha_char <= 0xff);

  *next_seq = (unsigned char) cp_next_alpha_char;
  *len_next = 1;

  return 1;
}


/*
 * Turkish Locale Data
 */

/*
 * lang_init_coll_Utf8_tr_cs () - init collation data for Turkish
 *   return:
 *   lang_coll(in):
 */
static void
lang_init_coll_Utf8_tr_cs (LANG_COLLATION * lang_coll)
{
  int i;
  unsigned int *lang_Weight_TR;
  unsigned int *lang_Next_alpha_char_TR;
  unsigned int *lang_Weight_TR_ti;
  unsigned int *lang_Next_alpha_char_TR_ti;

  const unsigned int special_upper_cp[] = {
    0xc7,           /* capital C with cedilla */
    0x11e,          /* capital letter G with breve */
    0x130,          /* capital letter I with dot above */
    0xd6,           /* capital letter O with diaeresis */
    0x15e,          /* capital letter S with cedilla */
    0xdc            /* capital letter U with diaeresis */
  };

  const unsigned int special_prev_upper_cp[] = { 'C', 'G', 'I', 'O', 'S', 'U' };

  const unsigned int special_lower_cp[] = {
    0xe7,           /* small c with cedilla */
    0x11f,          /* small letter g with breve */
    0x131,          /* small letter dotless i */
    0xf6,           /* small letter o with diaeresis */
    0x15f,          /* small letter s with cedilla */
    0xfc            /* small letter u with diaeresis */
  };

  const unsigned int special_prev_lower_cp[] = { 'c', 'g', 'h', 'o', 's', 'u' };

  assert (lang_coll != NULL);

  if (!(lang_coll->need_init))
    {
      return;
    }

  lang_Weight_TR = lang_coll->coll.weights;
  lang_Next_alpha_char_TR = lang_coll->coll.next_cp;

  lang_Weight_TR_ti = lang_coll->coll.weights_ti;
  lang_Next_alpha_char_TR_ti = lang_coll->coll.next_cp_ti;

  for (i = 0; i < LANG_CHAR_COUNT_TR; i++)
    {
      lang_Weight_TR[i] = i;
      lang_Next_alpha_char_TR[i] = i + 1;
      lang_Weight_TR_ti[i] = i;
      lang_Next_alpha_char_TR_ti[i] = i + 1;
    }

  assert (DIM (special_lower_cp) == DIM (special_upper_cp));

  /* specific turkish letters: weighting for string compare */
  for (i = 0; i < (int) DIM (special_upper_cp); i++)
    {
      unsigned int j;
      unsigned int cp = special_upper_cp[i];
      unsigned cp_repl = 1 + special_prev_upper_cp[i];
      unsigned int w_repl = lang_Weight_TR[cp_repl];

      lang_Weight_TR[cp] = w_repl;
      lang_Weight_TR_ti[cp] = w_repl;

      assert (cp_repl < cp);
      for (j = cp_repl; j < cp; j++)
    {
      if (lang_Weight_TR[j] >= w_repl)
        {
          (lang_Weight_TR[j])++;
          (lang_Weight_TR_ti[j])++;
        }
    }
    }

  for (i = 0; i < (int) DIM (special_lower_cp); i++)
    {
      unsigned int j;
      unsigned int cp = special_lower_cp[i];
      unsigned cp_repl = 1 + special_prev_lower_cp[i];
      unsigned int w_repl = lang_Weight_TR[cp_repl];

      lang_Weight_TR[cp] = w_repl;
      lang_Weight_TR_ti[cp] = w_repl;

      assert (cp_repl < cp);
      for (j = cp_repl; j < cp; j++)
    {
      if (lang_Weight_TR[j] >= w_repl)
        {
          (lang_Weight_TR[j])++;
          (lang_Weight_TR_ti[j])++;
        }
    }
    }

  /* next letter in alphabet (for pattern searching) */
  for (i = 0; i < (int) DIM (special_upper_cp); i++)
    {
      unsigned int cp_special = special_upper_cp[i];
      unsigned int cp_prev = special_prev_upper_cp[i];
      unsigned int cp_next = cp_prev + 1;

      lang_Next_alpha_char_TR[cp_prev] = cp_special;
      lang_Next_alpha_char_TR[cp_special] = cp_next;
      lang_Next_alpha_char_TR_ti[cp_prev] = cp_special;
      lang_Next_alpha_char_TR_ti[cp_special] = cp_next;
    }

  for (i = 0; i < (int) DIM (special_lower_cp); i++)
    {
      unsigned int cp_special = special_lower_cp[i];
      unsigned int cp_prev = special_prev_lower_cp[i];
      unsigned int cp_next = cp_prev + 1;

      lang_Next_alpha_char_TR[cp_prev] = cp_special;
      lang_Next_alpha_char_TR[cp_special] = cp_next;
      lang_Next_alpha_char_TR_ti[cp_prev] = cp_special;
      lang_Next_alpha_char_TR_ti[cp_special] = cp_next;
    }

  lang_Weight_TR_ti[32] = 0;
  lang_Next_alpha_char_TR_ti[32] = 1;

  /* other initializations to follow here */

  lang_coll->need_init = false;
}

/*
 * lang_initloc_tr_iso () - init locale data for Turkish language
 *              (ISO charset)
 *   return:
 *   ld(in/out):
 */
static void
lang_initloc_tr_iso (LANG_LOCALE_DATA * ld)
{
  assert (ld != NULL);

  ld->is_initialized = true;
}

/*
 * lang_initloc_tr_utf8 () - init locale data for Turkish language (UTF8)
 *   return:
 *   ld(in/out):
 */
static void
lang_initloc_tr_utf8 (LANG_LOCALE_DATA * ld)
{
  int i;

  const unsigned int special_upper_cp[] = {
    0xc7,           /* capital C with cedilla */
    0x11e,          /* capital letter G with breve */
    0x130,          /* capital letter I with dot above */
    0xd6,           /* capital letter O with diaeresis */
    0x15e,          /* capital letter S with cedilla */
    0xdc            /* capital letter U with diaeresis */
  };

  const unsigned int special_lower_cp[] = {
    0xe7,           /* small c with cedilla */
    0x11f,          /* small letter g with breve */
    0x131,          /* small letter dotless i */
    0xf6,           /* small letter o with diaeresis */
    0x15f,          /* small letter s with cedilla */
    0xfc            /* small letter u with diaeresis */
  };

  assert (ld != NULL);

  assert (ld->default_lang_coll != NULL);

  /* init alphabet */
  for (i = 0; i < LANG_CHAR_COUNT_TR; i++)
    {
      lang_upper_TR[i] = i;
      lang_lower_TR[i] = i;
    }

  for (i = (int) 'a'; i <= (int) 'z'; i++)
    {
      lang_upper_TR[i] = i - ('a' - 'A');
      lang_lower_TR[i - ('a' - 'A')] = i;

      lang_lower_TR[i] = i;
      lang_upper_TR[i - ('a' - 'A')] = i - ('a' - 'A');
    }

  assert (DIM (special_lower_cp) == DIM (special_upper_cp));
  /* specific turkish letters: */
  for (i = 0; i < (int) DIM (special_lower_cp); i++)
    {
      lang_lower_TR[special_lower_cp[i]] = special_lower_cp[i];
      lang_upper_TR[special_lower_cp[i]] = special_upper_cp[i];

      lang_lower_TR[special_upper_cp[i]] = special_lower_cp[i];
      lang_upper_TR[special_upper_cp[i]] = special_upper_cp[i];
    }

  memcpy (lang_upper_i_TR, lang_upper_TR, LANG_CHAR_COUNT_TR * sizeof (lang_upper_TR[0]));
  memcpy (lang_lower_i_TR, lang_lower_TR, LANG_CHAR_COUNT_TR * sizeof (lang_lower_TR[0]));

  /* identifiers alphabet : same as Unicode data */
  lang_upper_i_TR[0x131] = 'I'; /* small letter dotless i */
  lang_lower_i_TR[0x130] = 'i'; /* capital letter I with dot above */

  /* exceptions in TR casing for user alphabet : */
  lang_upper_TR[0x131] = 'I';   /* small letter dotless i */
  lang_lower_TR[0x131] = 0x131; /* small letter dotless i */
  lang_upper_TR['i'] = 0x130;   /* capital letter I with dot above */
  lang_lower_TR['i'] = 'i';

  lang_lower_TR[0x130] = 'i';   /* capital letter I with dot above */
  lang_upper_TR[0x130] = 0x130; /* capital letter I with dot above */
  lang_upper_TR['I'] = 'I';
  lang_lower_TR['I'] = 0x131;   /* small letter dotless i */

  /* other initializations to follow here */
  coll_Utf8_tr_cs.default_lang = ld;

  ld->is_initialized = true;
}


/*
 * Korean Locale Data
 */

/*
 * lang_initloc_ko_iso () - init locale data for Korean language with ISO
 *              charset
 *   return:
 */
static void
lang_initloc_ko_iso (LANG_LOCALE_DATA * ld)
{
  assert (ld != NULL);

  ld->is_initialized = true;
}

/*
 * lang_initloc_ko_utf8 () - init locale data for Korean language with UTF-8
 *               charset
 *   return:
 */
static void
lang_initloc_ko_utf8 (LANG_LOCALE_DATA * ld)
{
  assert (ld != NULL);

  coll_Utf8_ko_cs.default_lang = ld;

  ld->is_initialized = true;
}


/*
 * lang_initloc_ko_euc () - init locale data for Korean language with EUC-KR
 *              charset
 *   return:
 */
static void
lang_initloc_ko_euc (LANG_LOCALE_DATA * ld)
{
  assert (ld != NULL);

  coll_Euckr_bin.default_lang = ld;

  ld->is_initialized = true;
}

/*
 * lang_fastcmp_ko () - compare two EUC-KR character strings
 *
 * Arguments:
 *    lang_coll: collation data
 *      string1: 1st character string
 *        size1: size of 1st string
 *      string2: 2nd character string
 *        size2: size of 2nd string
 *
 * Returns:
 *   Greater than 0 if string1 > string2
 *   Equal to 0     if string1 = string2
 *   Less than 0    if string1 < string2
 *
 */
static int
lang_fastcmp_ko (const LANG_COLLATION * lang_coll, const unsigned char *string1, int size1,
         const unsigned char *string2, int size2, bool ignore_trailing_space)
{
  int cmp;
  unsigned char c1, c2;
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;

  assert (size1 >= 0 && size2 >= 0);

  str1_end = string1 + size1;
  str2_end = string2 + size2;

  for (cmp = 0; string1 < str1_end && string2 < str2_end && cmp == 0;)
    {
      c1 = *string1++;
      if (c1 == ASCII_SPACE)
    {
      c1 = ZERO;
    }
      else if (c1 == EUC_SPACE && string1 < str1_end && *string1 == EUC_SPACE)
    {
      c1 = ZERO;
      string1++;
    }

      c2 = *string2++;
      if (c2 == ASCII_SPACE)
    {
      c2 = ZERO;
    }
      else if (c2 == EUC_SPACE && string2 < str2_end && *string2 == EUC_SPACE)
    {
      c2 = ZERO;
      string2++;
    }
      cmp = c1 - c2;
    }

  if (cmp != 0)
    {
      return cmp;
    }

  size1 = CAST_BUFLEN (str1_end - string1);
  size2 = CAST_BUFLEN (str2_end - string2);

  assert (size1 == 0 || size2 == 0);

  if (size1 == size2)
    {
      return cmp;
    }

  c1 = c2 = ZERO;
  if (size1 < size2)
    {
      assert (size1 == 0 && size2 > 0);

      for (; string2 < str2_end && c2 == ZERO;)
    {
      c2 = *string2++;
      if (c2 == ASCII_SPACE)
        {
          c2 = weight[SPACE];
        }
      else if (c2 == EUC_SPACE && string2 < str2_end && *string2 == EUC_SPACE)
        {
          c2 = weight[SPACE];
          string2++;
        }
    }
    }
  else
    {
      assert (size1 > 0 && size2 == 0);

      for (; string1 < str1_end && c1 == ZERO;)
    {
      c1 = *string1++;
      if (c1 == ASCII_SPACE)
        {
          c1 = weight[SPACE];
        }
      else if (c1 == EUC_SPACE && string1 < str1_end && *string1 == EUC_SPACE)
        {
          c1 = weight[SPACE];
          string1++;
        }
    }
    }
  return c1 - c2;
}

/*
 * lang_mht2str_ko () -
 *
 * Arguments:
 *    lang_coll: collation data
 *      str: character string
 *        size: size of string
 *
 *
 */
static unsigned int
lang_mht2str_ko (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
{
  const unsigned char *str_end;
  unsigned int pseudo_key = 0;
  unsigned int w;

  assert (size >= 0);

  str_end = str + size;

  /* the caller of hash function eliminated only trailing ASCII spaces */
  /* eliminate the remaining both trailing EUC and ASCII spaces */
  while (str_end > str)
    {
      if (*(str_end - 1) == ASCII_SPACE)
    {
      str_end--;
      continue;
    }
      else if (str_end > str + 1 && *(str_end - 1) == EUC_SPACE && *(str_end - 2) == EUC_SPACE)
    {
      str_end--;
      str_end--;
      continue;
    }
      break;
    }

  for (; str < str_end;)
    {
      w = *str++;
      if (w == EUC_SPACE && str < str_end && *str == EUC_SPACE)
    {
      w = ZERO;
      str++;
    }
      else if (w == ASCII_SPACE)
    {
      w = ZERO;
    }

      ADD_TO_HASH (pseudo_key, w);
    }

  return pseudo_key;
}


/*
 * lang_strmatch_ko () - compare two EUC-KR character strings
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   lang_coll(in) : collation data
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   str1_match_size(out): size from str1 which is matched with str2
 *
 */
static int
lang_strmatch_ko (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
          const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
          int *str1_match_size, bool ignore_trailing_space)
{
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  const unsigned char *str1_next;
  const unsigned char *str2_next;
  const unsigned char *str1_begin;
  int char1_size, char2_size, cmp = 0;
  unsigned int c1, c2;

  assert (size1 >= 0 && size2 >= 0);

  str1_begin = str1;
  str1_end = str1 + size1;
  str2_end = str2 + size2;

  for (; str1 < str1_end && str2 < str2_end;)
    {
      assert (str1_end - str1 > 0);
      assert (str2_end - str2 > 0);

      str1_next = intl_nextchar_euc (str1, &char1_size);
      str2_next = intl_nextchar_euc (str2, &char2_size);

      if (is_match && escape != NULL && memcmp (str2, escape, char2_size) == 0)
    {
      if (!(has_last_escape && str2_next >= str2_end))
        {
          str2 = str2_next;
          str2_next = intl_nextchar_euc (str2, &char2_size);
        }
    }

      c1 = *str1;
      c2 = *str2;
      if (*str1 == ASCII_SPACE || (*str1 == EUC_SPACE && str1 + 1 < str1_end && *(str1 + 1) == EUC_SPACE))
    {
      c1 = ZERO;
    }

      if (*str2 == ASCII_SPACE || (*str2 == EUC_SPACE && str2 + 1 < str2_end && *(str2 + 1) == EUC_SPACE))
    {
      c2 = ZERO;
    }

      if (c1 == c2 && c1 == 0)
    {
      ;
    }
      else if (char1_size != char2_size)
    {
      return (char1_size < char2_size) ? (-1) : 1;
    }
      else
    {
      cmp = memcmp (str1, str2, char1_size);
      if (cmp != 0)
        {
          return cmp;
        }
    }

      str1 = str1_next;
      str2 = str2_next;
    }

  size1 = CAST_BUFLEN (str1_end - str1);
  size2 = CAST_BUFLEN (str2_end - str2);

  if (is_match)
    {
      assert (str1_match_size != NULL);
      *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
    }

  assert (size1 == 0 || size2 == 0);
  assert (cmp == 0);

  if (size1 == size2)
    {
      return 0;
    }
  else if (size2 > 0)
    {
      if (is_match)
    {
      return -1;
    }

      for (; str2 < str2_end;)
    {
      c2 = *str2++;
      if (c2 == ASCII_SPACE)
        {
          c2 = ZERO;
        }
      else if (c2 == EUC_SPACE && str2 < str2_end && *str2 == EUC_SPACE)
        {
          c2 = ZERO;
          str2++;
        }

      if (c2 > 0)
        {
          return -1;
        }
    }
    }
  else
    {
      assert (size1 > 0);

      if (is_match)
    {
      return 0;
    }

      for (; str1 < str1_end;)
    {
      c1 = *str1++;
      if (c1 == ASCII_SPACE)
        {
          c1 = ZERO;
        }
      else if (c1 == EUC_SPACE && str1 < str1_end && *str1 == EUC_SPACE)
        {
          c1 = ZERO;
          str1++;
        }

      if (c1 > 0)
        {
          return 1;
        }
    }
    }
  return cmp;
}

/*
 * lang_next_alpha_char_ko() - computes the next alphabetical char
 *   return: size in bytes of the next alphabetical char
 *   lang_coll(in): collation data
 *   seq(in): pointer to current char
 *   size(in): size in bytes for seq
 *   next_seq(in/out): buffer to return next alphabetical char
 *   len_next(in/out): length in chars for nex_seq
 */
static int
lang_next_alpha_char_ko (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
             unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
{
  int char_size;
  assert (seq != NULL);
  assert (next_seq != NULL);
  assert (len_next != NULL);
  assert (size > 0);

  (void) intl_char_size ((unsigned char *) seq, 1, INTL_CODESET_KSC5601_EUC, &char_size);
  memcpy (next_seq, seq, char_size);

  assert (char_size <= 3);
  /* increment last byte of current character without carry and without mixing ASCII range with korean range; this
   * works for EUC-KR characters encoding which don't have terminal byte = FF */
  if ((char_size == 1 && *next_seq < 0x7f) || (char_size > 1 && next_seq[char_size - 1] < 0xff))
    {
      next_seq[char_size - 1]++;
    }

  *len_next = 1;
  return char_size;
}

/*
 * lang_fastcmp_binary () - string compare for "binary" collation (with binary
 *              charset). Space character does not count with
 *              zero weight
 *   return:
 *   lang_coll(in):
 *   string1(in):
 *   size1(in):
 *   string2(in):
 *   size2(in):
 */
static int
lang_fastcmp_binary (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
             const unsigned char *string2, const int size2, bool ignore_trailing_space)
{
  int i, size;

  size = size1 < size2 ? size1 : size2;
  for (i = 0; i < size; i++, string1++, string2++)
    {
      /* compare weights of the two chars */
      if (*string1 > *string2)
    {
      return 1;
    }
      else if (*string1 < *string2)
    {
      return -1;
    }
    }

  if (size1 < size2)
    {
      size = size2 - size1;
      for (i = 0; i < size; i++)
    {
      /* ignore tailing white spaces */
      if (*string2++ > 0)
        {
          return -1;
        }
    }
    }
  else if (size1 > size2)
    {
      size = size1 - size2;
      for (i = 0; i < size; i++)
    {
      /* ignore trailing white spaces */
      if (*string1++ > 0)
        {
          return 1;
        }
    }
    }

  return 0;
}

/*
 * lang_strmatch_binary () - match or compare two character strings of
 *               Binary (Raw-byte) codeset
 *
 *   return: negative if str1 < str2, positive if str1 > str2, zero otherwise
 *   lang_coll(in) : collation data
 *   is_match(in) : true if match, otherwise is compare
 *   str1(in):
 *   size1(in):
 *   str2(in): this is the pattern string in case of match
 *   size2(in):
 *   escape(in): pointer to escape character (multi-byte allowed)
 *       (used in context of LIKE)
 *   has_last_escape(in): true if it should check if last character is the
 *            escape character
 *   str1_match_size(out): size from str1 which is matched with str2
 */
static int
lang_strmatch_binary (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
              const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
              int *str1_match_size, bool ignore_trailing_space)
{
  unsigned int c1, c2;
  const unsigned char *str1_end;
  const unsigned char *str2_end;
  const unsigned char *str1_begin;

  str1_begin = str1;
  str1_end = str1 + size1;
  str2_end = str2 + size2;
  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
    {
      assert (str1_end - str1 > 0);
      assert (str2_end - str2 > 0);

      c1 = *str1;
      c2 = *str2;

      if (is_match && escape != NULL && c2 == *escape)
    {
      str2++;
      if (!(has_last_escape && str2 + 1 >= str2_end))
        {
          c2 = *str2;
        }
    }

      if (c1 != c2)
    {
      return (c1 < c2) ? -1 : 1;
    }
    }

  size1 = CAST_BUFLEN (str1_end - str1);
  size2 = CAST_BUFLEN (str2_end - str2);

  assert (size1 == 0 || size2 == 0);

  if (is_match)
    {
      assert (str1_match_size != NULL);
      *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
    }

  if (size1 == size2)
    {
      return 0;
    }
  else if (size2 > 0)
    {
      if (is_match)
    {
      /* pattern string should be exhausted for a full match */
      return -1;
    }
      for (; str2 < str2_end; str2++)
    {
      if (*str2 > 0)
        {
          return -1;
        }
    }
    }
  else
    {
      assert (size1 > 0);

      if (is_match)
    {
      return 0;
    }

      for (; str1 < str1_end; str1++)
    {
      if (*str1 > 0)
        {
          return 1;
        }
    }
    }
  return 0;
}

/*
 * lang_split_key_binary() - finds the prefix key for "binary" collation
 *               (binary/raw-byte charset)
 *
 *   return:  error status
 *   lang_coll(in):
 *   is_desc(in):
 *   str1(in):
 *   size1(in):
 *   str2(in):
 *   size2(in):
 *   key(out): key
 *   byte_size(out): size in bytes of key
 *
 *  Note : this function is used by index prefix computation (BTREE building)
 */
static int
lang_split_key_binary (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
               const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
               bool ignore_trailing_space)
{
  const unsigned char *str1_end, *str2_end;
  const unsigned char *str1_begin, *str2_begin;
  int key_size;

  assert (key != NULL);
  assert (byte_size != NULL);

  str1_end = str1 + size1;
  str2_end = str2 + size2;
  str1_begin = str1;
  str2_begin = str2;

  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
    {
      if (*str1 != *str2)
    {
      assert ((!is_desc && *str1 < *str2) || (is_desc && *str1 > *str2));
      break;
    }
    }

  if (!is_desc)
    {               /* normal index */
      *key = (unsigned char *) str2_begin;

      /* common part plus a character with non-zero weight */
      while (str2 < str2_end)
    {
      if (*str2++ != 0)
        {
          break;
        }
    }
      assert (str2 <= str2_end);
      key_size = CAST_BUFLEN (str2 - str2_begin);
    }
  else
    {               /* reverse index */
      assert (is_desc);

      /* common part plus a character with non-zero weight from str1 */
      while (str1 < str1_end)
    {
      if (*str1++ != 0)
        {
          break;
        }
    }

      if (str1 >= str1_end)
    {
      /* str1 exhaused or at last char, we use str2 as key */
      *key = (unsigned char *) str2_begin;
      key_size = CAST_BUFLEN (str2_end - str2_begin);
    }
      else
    {
      assert (str1 < str1_end);
      *key = (unsigned char *) str1_begin;
      key_size = CAST_BUFLEN (str1 - str1_begin);
    }
    }

  *byte_size = key_size;

  return NO_ERROR;
}


#if defined(WINDOWS)
#define GET_SYM_ADDR(lib, sym) GetProcAddress((HMODULE)lib, sym)
#else
#define GET_SYM_ADDR(lib, sym) dlsym(lib, sym)
#endif

#define SHLIB_GET_ADDR(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME)                 \
  do {                                              \
    if (snprintf (sym_name, LOC_LIB_SYMBOL_NAME_SIZE - 1, "" SYM_NAME "_%s", LOC_NAME) < 0)     \
      goto error_loading_symbol;                                        \
    v = (SYM_TYPE) GET_SYM_ADDR (lh, sym_name);                         \
    if (v == NULL)                                      \
      {                                             \
    goto error_loading_symbol;                              \
      }                                             \
  } while (0)

#define SHLIB_GET_ADDR_W_REF(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME)       \
  do {                                      \
    snprintf (sym_name, LOC_LIB_SYMBOL_NAME_SIZE, "" SYM_NAME "_ref_%s",    \
          LOC_NAME);                            \
    temp_char_sym = (char *) GET_SYM_ADDR (lh, sym_name);           \
    if (temp_char_sym == NULL)                          \
      {                                     \
    goto error_loading_symbol;                      \
      }                                     \
    strcpy (sym_name, temp_char_sym);                       \
    v = (SYM_TYPE) GET_SYM_ADDR (lh, sym_name);                 \
    if (v == NULL)                              \
      {                                     \
    goto error_loading_symbol;                      \
      }                                     \
  } while (0)

#define SHLIB_GET_VAL(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME)  \
  do {                              \
    SYM_TYPE* aux;                      \
    SHLIB_GET_ADDR(aux, SYM_NAME, SYM_TYPE*, lh, LOC_NAME); \
    v = *aux;                           \
  } while (0);


/*
 * lang_locale_data_load_from_lib() - loads locale data from shared libray
 *
 * return: error code
 * lld(out): lang locale data
 * lib_handle(in)
 * lf(in): locale file info
 * is_load_for_dump (in): true if load is in context of dump tool
 */
int
lang_locale_data_load_from_lib (LANG_LOCALE_DATA * lld, void *lib_handle, const LOCALE_FILE * lf, bool is_load_for_dump)
{
  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
  char err_msg[ERR_MSG_SIZE + PATH_MAX];
  char **temp_array_sym;
  int *temp_num_sym;
  int err_status = NO_ERROR;
  int i, count_coll_to_load;
  const char *alpha_suffix = NULL;
  bool load_w_identifier_name;
  int txt_conv_type;
  bool sym_loc_name_found = false;

  assert (lld != NULL);
  assert (lib_handle != NULL);
  assert (lf != NULL);
  assert (lf->locale_name != NULL);

  SHLIB_GET_ADDR (lld->lang_name, "locale_name", char *, lib_handle, lf->locale_name);
  sym_loc_name_found = true;

  SHLIB_GET_ADDR (lld->checksum, "locale_checksum", char *, lib_handle, lf->locale_name);
  if (strlen (lld->checksum) != 32)
    {
      snprintf (err_msg, sizeof (err_msg) - 1, "invalid checksum in locale" " library %s", lf->lib_file);
      err_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, err_status, false);
      goto exit;
    }

  SHLIB_GET_ADDR (lld->date_format, "date_format", char *, lib_handle, lld->lang_name);
  SHLIB_GET_ADDR (lld->time_format, "time_format", char *, lib_handle, lld->lang_name);
  SHLIB_GET_ADDR (lld->datetime_format, "datetime_format", char *, lib_handle, lld->lang_name);
  SHLIB_GET_ADDR (lld->timestamp_format, "timestamp_format", char *, lib_handle, lld->lang_name);
  SHLIB_GET_ADDR (lld->datetimetz_format, "datetimetz_format", char *, lib_handle, lld->lang_name);
  SHLIB_GET_ADDR (lld->timestamptz_format, "timestamptz_format", char *, lib_handle, lld->lang_name);

  SHLIB_GET_ADDR (temp_array_sym, "month_names_abbreviated", char **, lib_handle, lld->lang_name);
  for (i = 0; i < CAL_MONTH_COUNT; i++)
    {
      lld->month_short_name[i] = temp_array_sym[i];
    }

  SHLIB_GET_ADDR (temp_array_sym, "month_names_wide", char **, lib_handle, lld->lang_name);
  for (i = 0; i < CAL_MONTH_COUNT; i++)
    {
      lld->month_name[i] = temp_array_sym[i];
    }

  SHLIB_GET_ADDR (temp_array_sym, "day_names_abbreviated", char **, lib_handle, lld->lang_name);
  for (i = 0; i < CAL_DAY_COUNT; i++)
    {
      lld->day_short_name[i] = temp_array_sym[i];
    }

  SHLIB_GET_ADDR (temp_array_sym, "day_names_wide", char **, lib_handle, lld->lang_name);
  for (i = 0; i < CAL_DAY_COUNT; i++)
    {
      lld->day_name[i] = temp_array_sym[i];
    }

  SHLIB_GET_ADDR (temp_array_sym, "am_pm", char **, lib_handle, lld->lang_name);
  for (i = 0; i < CAL_AM_PM_COUNT; i++)
    {
      lld->am_pm[i] = temp_array_sym[i];
    }

  SHLIB_GET_ADDR (lld->day_short_parse_order, "day_names_abbr_parse_order", char *, lib_handle, lld->lang_name);

  SHLIB_GET_ADDR (lld->day_parse_order, "day_names_wide_parse_order", char *, lib_handle, lld->lang_name);

  SHLIB_GET_ADDR (lld->month_short_parse_order, "month_names_abbr_parse_order", char *, lib_handle, lld->lang_name);

  SHLIB_GET_ADDR (lld->month_parse_order, "month_names_wide_parse_order", char *, lib_handle, lld->lang_name);

  SHLIB_GET_ADDR (lld->am_pm_parse_order, "am_pm_parse_order", char *, lib_handle, lld->lang_name);

  SHLIB_GET_VAL (lld->number_decimal_sym, "number_decimal_sym", char, lib_handle, lld->lang_name);

  SHLIB_GET_VAL (lld->number_group_sym, "number_group_sym", char, lib_handle, lld->lang_name);

  int currency_code;
  SHLIB_GET_VAL (currency_code, "default_currency_code", int, lib_handle, lld->lang_name);

  lld->default_currency_code = (DB_CURRENCY) currency_code;

  /* alphabet */
  SHLIB_GET_ADDR (temp_num_sym, "alphabet_a_type", int *, lib_handle, lld->lang_name);
  assert (*temp_num_sym >= ALPHABET_UNICODE && *temp_num_sym <= ALPHABET_TAILORED);
  lld->alphabet.a_type = (ALPHABET_TYPE) (*temp_num_sym);

  if (lld->alphabet.a_type == ALPHABET_UNICODE)
    {
      alpha_suffix = "unicode";
    }
  else if (lld->alphabet.a_type == ALPHABET_ASCII)
    {
      alpha_suffix = "ascii";
    }
  else
    {
      alpha_suffix = lld->lang_name;
    }
  err_status = lang_locale_load_alpha_from_lib (&(lld->alphabet), false, alpha_suffix, lib_handle, lf);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  /* identifier alphabet */
  SHLIB_GET_ADDR (temp_num_sym, "ident_alphabet_a_type", int *, lib_handle, lld->lang_name);
  assert (*temp_num_sym >= ALPHABET_UNICODE && *temp_num_sym <= ALPHABET_TAILORED);
  lld->ident_alphabet.a_type = (ALPHABET_TYPE) (*temp_num_sym);

  load_w_identifier_name = false;
  if (lld->ident_alphabet.a_type == ALPHABET_UNICODE)
    {
      alpha_suffix = "unicode";
    }
  else if (lld->ident_alphabet.a_type == ALPHABET_ASCII)
    {
      alpha_suffix = "ascii";
    }
  else
    {
      alpha_suffix = lld->lang_name;
      load_w_identifier_name = true;
    }

  err_status =
    lang_locale_load_alpha_from_lib (&(lld->ident_alphabet), load_w_identifier_name, alpha_suffix, lib_handle, lf);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  /* console conversion */
  SHLIB_GET_VAL (txt_conv_type, "tc_conv_type", int, lib_handle, lld->lang_name);

  if (txt_conv_type == TEXT_CONV_ISO_88591_BUILTIN)
    {
      lld->txt_conv = &con_Iso_8859_1_conv;
    }
  else if (txt_conv_type == TEXT_CONV_ISO_88599_BUILTIN)
    {
      lld->txt_conv = &con_Iso_8859_9_conv;
    }
  else if (txt_conv_type == TEXT_CONV_NO_CONVERSION)
    {
      lld->txt_conv = NULL;
    }
  else
    {
      unsigned char *is_lead_byte;
      assert (txt_conv_type == TEXT_CONV_GENERIC_1BYTE || txt_conv_type == TEXT_CONV_GENERIC_2BYTE);

      lld->txt_conv = (TEXT_CONVERSION *) malloc (sizeof (TEXT_CONVERSION));
      if (lld->txt_conv == NULL)
    {
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_OUT_OF_VIRTUAL_MEMORY, 1, sizeof (TEXT_CONVERSION));
      err_status = ER_OUT_OF_VIRTUAL_MEMORY;
      goto exit;
    }
      memset (lld->txt_conv, 0, sizeof (TEXT_CONVERSION));

      lld->txt_conv->conv_type = (TEXT_CONV_TYPE) txt_conv_type;

      SHLIB_GET_ADDR (is_lead_byte, "tc_is_lead_byte", unsigned char *, lib_handle, lld->lang_name);
      memcpy (lld->txt_conv->byte_flag, is_lead_byte, 256);

      SHLIB_GET_VAL (lld->txt_conv->utf8_first_cp, "tc_utf8_first_cp", unsigned int, lib_handle, lld->lang_name);

      SHLIB_GET_VAL (lld->txt_conv->utf8_last_cp, "tc_utf8_last_cp", unsigned int, lib_handle, lld->lang_name);

      SHLIB_GET_VAL (lld->txt_conv->text_first_cp, "tc_text_first_cp", unsigned int, lib_handle, lld->lang_name);

      SHLIB_GET_VAL (lld->txt_conv->text_last_cp, "tc_text_last_cp", unsigned int, lib_handle, lld->lang_name);

      SHLIB_GET_ADDR (lld->txt_conv->win_codepages, "tc_win_codepages", char *, lib_handle, lld->lang_name);

      SHLIB_GET_ADDR (lld->txt_conv->nl_lang_str, "tc_nl_lang_str", char *, lib_handle, lld->lang_name);

      SHLIB_GET_ADDR (lld->txt_conv->utf8_to_text, "tc_utf8_to_text", CONV_CP_TO_BYTES *, lib_handle, lld->lang_name);

      SHLIB_GET_ADDR (lld->txt_conv->text_to_utf8, "tc_text_to_utf8", CONV_CP_TO_BYTES *, lib_handle, lld->lang_name);
    }

  err_status = lang_locale_load_normalization_from_lib (&(lld->unicode_norm), lib_handle, lf);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  /* collation data */
  if (is_load_for_dump)
    {
      goto exit;
    }

  err_status = lang_load_count_coll_from_lib (&count_coll_to_load, lib_handle, lf);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  for (i = 0; i < count_coll_to_load; i++)
    {
      /* get name of collation */
      char *collation_name = NULL;
      LANG_COLLATION *lang_coll = NULL;
      COLL_DATA *coll = NULL;

      err_status = lang_load_get_coll_name_from_lib (i, &collation_name, lib_handle, lf);
      if (err_status != NO_ERROR)
    {
      goto exit;
    }

      if (lang_get_collation_by_name (collation_name) != NULL)
    {
      /* collation already loaded */
      continue;
    }

      lang_coll = (LANG_COLLATION *) malloc (sizeof (LANG_COLLATION));
      if (lang_coll == NULL)
    {
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_INIT, false);
      err_status = ER_LOC_INIT;
      goto exit;
    }
      memset (lang_coll, 0, sizeof (LANG_COLLATION));

      assert (strlen (collation_name) < (int) sizeof (lang_coll->coll.coll_name));
      strncpy (lang_coll->coll.coll_name, collation_name, sizeof (lang_coll->coll.coll_name) - 1);

      coll = &(lang_coll->coll);
      err_status = lang_load_coll_from_lib (coll, lib_handle, lf);
      if (err_status != NO_ERROR)
    {
      assert (lang_coll != NULL);
      free (lang_coll);
      goto exit;
    }

      lang_coll->codeset = INTL_CODESET_UTF8;
      lang_coll->built_in = 0;

      /* by default enable optimizations */
      lang_coll->options.allow_like_rewrite = true;
      lang_coll->options.allow_index_opt = true;
      lang_coll->options.allow_prefix_index = true;

      if (coll->uca_opt.sett_strength < TAILOR_QUATERNARY)
    {
      lang_coll->options.allow_index_opt = false;
      lang_coll->options.allow_like_rewrite = false;
    }

      if (coll->uca_exp_num > 1)
    {
      lang_coll->fastcmp = lang_strcmp_utf8_uca;
      lang_coll->strmatch = lang_strmatch_utf8_uca;
      lang_coll->next_coll_seq = lang_next_coll_seq_utf8_w_contr;
      lang_coll->split_key = lang_split_key_w_exp;
      lang_coll->mht2str = lang_mht2str_utf8_exp;
      lang_coll->options.allow_like_rewrite = false;
      lang_coll->options.allow_prefix_index = false;
    }
      else if (coll->count_contr > 0)
    {
      lang_coll->fastcmp = lang_strcmp_utf8_w_contr;
      lang_coll->strmatch = lang_strmatch_utf8_w_contr;
      lang_coll->next_coll_seq = lang_next_coll_seq_utf8_w_contr;
      lang_coll->split_key = lang_split_key_utf8;
      lang_coll->mht2str = lang_mht2str_utf8;
    }
      else
    {
      lang_coll->fastcmp = lang_strcmp_utf8;
      lang_coll->strmatch = lang_strmatch_utf8;
      lang_coll->next_coll_seq = lang_next_coll_char_utf8;
      lang_coll->split_key = lang_split_key_utf8;
      lang_coll->mht2str = lang_mht2str_utf8;
    }

      err_status = register_collation (lang_coll);
      if (err_status != NO_ERROR)
    {
      assert (lang_coll != NULL);
      free (lang_coll);
      goto exit;
    }

      lang_coll->default_lang = lld;

      /* first collation in locale is the default collation of locale */
      if (lld->default_lang_coll == NULL)
    {
      lld->default_lang_coll = lang_coll;
    }
    }


exit:
  return err_status;

error_loading_symbol:
  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
        sym_name, lf->lib_file, lf->locale_name);
  if (!sym_loc_name_found)
    {
      strcat (err_msg,
          "\n Locale might not be compiled into the selected " "library.\n Check configuration and recompile locale"
          ", if necessary,\n using the make_locale script");
    }
  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, is_load_for_dump);

  return ER_LOC_INIT;
}

/*
 * lang_load_count_coll_from_lib() - reads and returns the number of
 *                   collations in library
 *
 * return: error code
 * count_coll(out): number of collations in lib associated with locale
 * lib_handle(in):
 * lf(in): locale file info
 */
int
lang_load_count_coll_from_lib (int *count_coll, void *lib_handle, const LOCALE_FILE * lf)
{
  char err_msg[ERR_MSG_SIZE + PATH_MAX];
  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];

  assert (count_coll != NULL);
  assert (lib_handle != NULL);
  assert (lf != NULL);
  assert (lf->locale_name != NULL);

  SHLIB_GET_VAL (*count_coll, "count_coll", int, lib_handle, lf->locale_name);

  return NO_ERROR;

error_loading_symbol:
  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
        sym_name, lf->lib_file, lf->locale_name);
  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);

  return ER_LOC_INIT;
}

/*
 * lang_load_get_coll_name_from_lib() - reads and returns the name of n-th
 *                  collation in library
 *
 * return: error code
 * coll_pos(in): collation index to return
 * coll_name(out): name of collation
 * lib_handle(in):
 * lf(in): locale file info
 */
int
lang_load_get_coll_name_from_lib (const int coll_pos, char **coll_name, void *lib_handle, const LOCALE_FILE * lf)
{
  char err_msg[ERR_MSG_SIZE + PATH_MAX];
  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
  char coll_suffix[COLL_NAME_SIZE + LANG_MAX_LANGNAME + 5];

  assert (coll_name != NULL);
  assert (lib_handle != NULL);
  assert (lf != NULL);
  assert (lf->locale_name != NULL);

  *coll_name = NULL;
  snprintf (coll_suffix, sizeof (coll_suffix) - 1, "%d_%s", coll_pos, lf->locale_name);
  SHLIB_GET_ADDR (*coll_name, "collation", char *, lib_handle, coll_suffix);

  return NO_ERROR;

error_loading_symbol:
  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
        sym_name, lf->lib_file, lf->locale_name);
  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);

  return ER_LOC_INIT;
}

/*
 * lang_load_coll_from_lib() - loads collation data from library
 *
 * return: error code
 * cd(out): collation data
 * lib_handle(in):
 * lf(in): locale file info
 */
int
lang_load_coll_from_lib (COLL_DATA * cd, void *lib_handle, const LOCALE_FILE * lf)
{
  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
  char *temp_char_sym;
  int *temp_num_sym;
  char err_msg[ERR_MSG_SIZE + PATH_MAX];
  int err_status = NO_ERROR;
  char *coll_checksum = NULL;

  assert (cd != NULL);
  assert (lib_handle != NULL);
  assert (lf != NULL);
  assert (lf->locale_name != NULL);

  SHLIB_GET_ADDR (temp_char_sym, "coll_name", char *, lib_handle, cd->coll_name);

  if (strcmp (temp_char_sym, cd->coll_name))
    {
      err_status = ER_LOC_INIT;
      snprintf (err_msg, sizeof (err_msg) - 1, "Collation %s not found in shared library %s", cd->coll_name,
        lf->lib_file);
      LOG_LOCALE_ERROR (err_msg, err_status, false);
      goto exit;
    }

  SHLIB_GET_ADDR (coll_checksum, "coll_checksum", char *, lib_handle, cd->coll_name);
  strncpy (cd->checksum, coll_checksum, 32);
  cd->checksum[32] = '\0';

  SHLIB_GET_VAL (cd->coll_id, "coll_id", int, lib_handle, cd->coll_name);

  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_strength", int *, lib_handle, cd->coll_name);
  assert (*temp_num_sym >= TAILOR_UNDEFINED && *temp_num_sym <= TAILOR_IDENTITY);
  cd->uca_opt.sett_strength = (T_LEVEL) * temp_num_sym;

  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_backwards", int *, lib_handle, cd->coll_name);
  cd->uca_opt.sett_backwards = (bool) * temp_num_sym;

  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_caseLevel", int *, lib_handle, cd->coll_name);
  cd->uca_opt.sett_caseLevel = (bool) * temp_num_sym;

  SHLIB_GET_VAL (cd->uca_opt.sett_caseFirst, "coll_sett_caseFirst", int, lib_handle, cd->coll_name);

  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_expansions", int *, lib_handle, cd->coll_name);
  cd->uca_opt.sett_expansions = (bool) * temp_num_sym;

  SHLIB_GET_VAL (cd->uca_opt.sett_contr_policy, "coll_sett_contr_policy", int, lib_handle, cd->coll_name);

  SHLIB_GET_VAL (cd->w_count, "coll_w_count", int, lib_handle, cd->coll_name);

  SHLIB_GET_VAL (cd->uca_exp_num, "coll_uca_exp_num", int, lib_handle, cd->coll_name);

  SHLIB_GET_VAL (cd->count_contr, "coll_count_contr", int, lib_handle, cd->coll_name);

  SHLIB_GET_ADDR (temp_num_sym, "coll_match_contr", int *, lib_handle, cd->coll_name);
  cd->uca_opt.sett_match_contr = (COLL_MATCH_CONTR) * temp_num_sym;

  if (cd->count_contr > 0)
    {
      SHLIB_GET_ADDR_W_REF (cd->contr_list, "coll_contr_list", COLL_CONTRACTION *, lib_handle, cd->coll_name);

      SHLIB_GET_VAL (cd->contr_min_size, "coll_contr_min_size", int, lib_handle, cd->coll_name);

      SHLIB_GET_VAL (cd->cp_first_contr_offset, "coll_cp_first_contr_offset", int, lib_handle, cd->coll_name);

      SHLIB_GET_VAL (cd->cp_first_contr_count, "coll_cp_first_contr_count", int, lib_handle, cd->coll_name);

      SHLIB_GET_ADDR_W_REF (cd->cp_first_contr_array, "coll_cp_first_contr_array", int *, lib_handle, cd->coll_name);
    }

  if (cd->uca_opt.sett_expansions)
    {
      assert (cd->uca_exp_num > 1);

      SHLIB_GET_ADDR_W_REF (cd->uca_w_l13, "coll_uca_w_l13", UCA_L13_W *, lib_handle, cd->coll_name);

      if (cd->uca_opt.sett_strength >= TAILOR_QUATERNARY)
    {
      SHLIB_GET_ADDR_W_REF (cd->uca_w_l4, "coll_uca_w_l4", UCA_L4_W *, lib_handle, cd->coll_name);
    }

      SHLIB_GET_ADDR_W_REF (cd->uca_num, "coll_uca_num", char *, lib_handle, cd->coll_name);
    }
  else
    {
      SHLIB_GET_ADDR_W_REF (cd->weights, "coll_weights", unsigned int *, lib_handle, cd->coll_name);
    }

  SHLIB_GET_ADDR_W_REF (cd->next_cp, "coll_next_cp", unsigned int *, lib_handle, cd->coll_name);

exit:
  return err_status;

error_loading_symbol:
  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
        sym_name, lf->lib_file, lf->locale_name);
  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);

  return ER_LOC_INIT;
}

/*
 * lang_locale_load_alpha_from_lib() - loads locale data from shared libray
 *
 * return: error code
 * a(in/out): alphabet to load
 * load_w_identifier_name(in): true if alphabet is to be load as "identifier"
 *                 name
 * lib_handle(in):
 * lf(in): locale file info
 */
static int
lang_locale_load_alpha_from_lib (ALPHABET_DATA * a, bool load_w_identifier_name, const char *alpha_suffix,
                 void *lib_handle, const LOCALE_FILE * lf)
{
  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
  char err_msg[ERR_MSG_SIZE + PATH_MAX];
  int err_status = NO_ERROR;

  assert (a != NULL);
  assert (lib_handle != NULL);
  assert (lf != NULL);
  assert (lf->locale_name != NULL);

  if (load_w_identifier_name)
    {
      SHLIB_GET_VAL (a->l_count, "ident_alphabet_l_count", int, lib_handle, alpha_suffix);

      SHLIB_GET_VAL (a->lower_multiplier, "ident_alphabet_lower_multiplier", int, lib_handle, alpha_suffix);

      SHLIB_GET_VAL (a->upper_multiplier, "ident_alphabet_upper_multiplier", int, lib_handle, alpha_suffix);

      SHLIB_GET_ADDR (a->lower_cp, "ident_alphabet_lower_cp", unsigned int *, lib_handle, alpha_suffix);

      SHLIB_GET_ADDR (a->upper_cp, "ident_alphabet_upper_cp", unsigned int *, lib_handle, alpha_suffix);
    }
  else
    {
      SHLIB_GET_VAL (a->l_count, "alphabet_l_count", int, lib_handle, alpha_suffix);

      SHLIB_GET_VAL (a->lower_multiplier, "alphabet_lower_multiplier", int, lib_handle, alpha_suffix);

      SHLIB_GET_VAL (a->upper_multiplier, "alphabet_upper_multiplier", int, lib_handle, alpha_suffix);

      SHLIB_GET_ADDR (a->lower_cp, "alphabet_lower_cp", unsigned int *, lib_handle, alpha_suffix);

      SHLIB_GET_ADDR (a->upper_cp, "alphabet_upper_cp", unsigned int *, lib_handle, alpha_suffix);
    }

  return err_status;

error_loading_symbol:
  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
        sym_name, lf->lib_file, lf->locale_name);
  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);

  return ER_LOC_INIT;
}

/*
 * lang_load_library - loads the locale specific DLL/so
 * Returns : error code - ER_LOC_INIT if library load fails
 *          - NO_ERROR if success
 * lib_file(in)  : path to library
 * handle(out)   : handle to the loaded library
 */
int
lang_load_library (const char *lib_file, void **handle)
{
  int err_status = NO_ERROR;
  char err_msg[ERR_MSG_SIZE];
#if defined(WINDOWS)
  DWORD loading_err;
  char *lpMsgBuf;
  UINT error_mode = 0;
#else
  char *error;
#endif

  assert (lib_file != NULL);

#if defined(WINDOWS)
  error_mode = SetErrorMode (SEM_NOOPENFILEERRORBOX | SEM_FAILCRITICALERRORS);
  *handle = LoadLibrary (lib_file);
  SetErrorMode (error_mode);
  loading_err = GetLastError ();
#else
  dlerror ();           /* Clear any existing error */
  *handle = dlopen (lib_file, RTLD_NOW);
#endif

  if (*handle == NULL)
    {
      err_status = ER_LOC_INIT;
#if defined(WINDOWS)
      FormatMessage (FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ARGUMENT_ARRAY, NULL,
             loading_err, MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT), (char *) &lpMsgBuf, 1,
             (va_list *) & lib_file);
      snprintf (err_msg, sizeof (err_msg) - 1,
        "Library file is invalid or not accessible.\n" " Unable to load %s !\n %s", lib_file, lpMsgBuf);
      LocalFree (lpMsgBuf);
#else
      error = dlerror ();
      snprintf (err_msg, sizeof (err_msg) - 1,
        "Library file is invalid or not accessible.\n" " Unable to load %s !\n %s", lib_file, error);
#endif
      LOG_LOCALE_ERROR (err_msg, err_status, false);
    }

  return err_status;
}

/*
 * lang_unload_libraries - unloads the loaded locale libraries (DLL/so)
 *             and frees additional data.
 */
static void
lang_unload_libraries (void)
{
  int i;

  for (i = 0; i < loclib_Handle_count; i++)
    {
      assert (loclib_Handle[i] != NULL);
#if defined(WINDOWS)
      FreeLibrary ((HMODULE) loclib_Handle[i]);
#else
      dlclose (loclib_Handle[i]);
#endif
      loclib_Handle[i] = NULL;
    }
  free (loclib_Handle);
  loclib_Handle = NULL;
  loclib_Handle_count = 0;
}

/*
 * destroy_user_locales - frees the memory holding the locales already loaded
 *            from the locale libraries (DLL/so)
 */
static void
destroy_user_locales (void)
{
  int i;

  for (i = 0; i < lang_Count_locales; i++)
    {
      assert (lang_Loaded_locales[i] != NULL);

      free_lang_locale_data (lang_Loaded_locales[i]);
      lang_Loaded_locales[i] = NULL;
    }

  lang_Count_locales = 0;
}

/*
 * lang_locale_load_normalization_from_lib - loads normalization data from
 *                       the locale library
 */
static int
lang_locale_load_normalization_from_lib (UNICODE_NORMALIZATION * norm, void *lib_handle, const LOCALE_FILE * lf)
{
  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
  char err_msg[ERR_MSG_SIZE + PATH_MAX];

  assert (norm != NULL);

  memset (norm, 0, sizeof (UNICODE_NORMALIZATION));

  SHLIB_GET_ADDR (norm->unicode_mappings, "unicode_mappings", UNICODE_MAPPING *, lib_handle,
          UNICODE_NORMALIZATION_DECORATOR);
  SHLIB_GET_VAL (norm->unicode_mappings_count, "unicode_mappings_count", int, lib_handle,
         UNICODE_NORMALIZATION_DECORATOR);
  SHLIB_GET_ADDR (norm->unicode_mapping_index, "unicode_mapping_index", int *, lib_handle,
          UNICODE_NORMALIZATION_DECORATOR);
  SHLIB_GET_ADDR (norm->list_full_decomp, "list_full_decomp", int *, lib_handle, UNICODE_NORMALIZATION_DECORATOR);

  return NO_ERROR;

error_loading_symbol:
  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
        sym_name, lf->lib_file, lf->locale_name);
  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);

  return ER_LOC_INIT;
}

/*
 * lang_get_generic_unicode_norm - gets the global unicode
 *          normalization structure
 * Returns:
 */
UNICODE_NORMALIZATION *
lang_get_generic_unicode_norm (void)
{
  return generic_Unicode_norm;
}

/*
 * lang_set_generic_unicode_norm - sets the global unicode
 *          normalization structure
 */
void
lang_set_generic_unicode_norm (UNICODE_NORMALIZATION * norm)
{
  generic_Unicode_norm = norm;
}

/*
 * lang_free_collations - frees all collation data
 */
static void
lang_free_collations (void)
{
  int i;

  if (lang_Count_collations <= 0)
    {
      return;
    }
  for (i = 0; i < LANG_MAX_COLLATIONS; i++)
    {
      assert (lang_Collations[i] != NULL);
      if (!(lang_Collations[i]->built_in))
    {
      free (lang_Collations[i]);
    }
      lang_Collations[i] = NULL;
    }

  lang_Count_collations = 0;
}

/*
 * lang_check_coll_compat - checks compatibility of current collations (of
 *              running process) with a reference set of
 *              collations
 * Returns : error code
 * coll_array(in): reference collations
 * coll_cnt(in):
 * client_text(in): text to display in message error for client (this can be
 *          "server" when checking server vs database)
 * server_text(in): text to display in message error for server (this can be
 *          "database" when checking server vs database)
 */
int
lang_check_coll_compat (const LANG_COLL_COMPAT * coll_array, const int coll_cnt, const char *client_text,
            const char *server_text)
{
  char err_msg[ERR_MSG_SIZE];
  int i;
  int er_status = NO_ERROR;

  assert (coll_array != NULL);
  assert (coll_cnt > 0);
  assert (client_text != NULL);
  assert (server_text != NULL);

  if (lang_Count_collations != coll_cnt)
    {
      snprintf (err_msg, sizeof (err_msg) - 1,
        "Number of collations do not match : " "%s has %d collations, %s has %d collations", client_text,
        lang_Count_collations, server_text, coll_cnt);
      er_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      goto exit;
    }

  for (i = 0; i < coll_cnt; i++)
    {
      const LANG_COLL_COMPAT *ref_c;
      LANG_COLLATION *lc;

      ref_c = &(coll_array[i]);

      assert (ref_c->coll_id >= 0 && ref_c->coll_id < LANG_MAX_COLLATIONS);
      /* collation id is valid, check if same collation */
      lc = lang_get_collation (ref_c->coll_id);

      if (lc->coll.coll_id != ref_c->coll_id)
    {
      snprintf (err_msg, sizeof (err_msg) - 1,
            "Collation '%s' with id %d from %s not found with the " "same id on %s", ref_c->coll_name,
            ref_c->coll_id, server_text, client_text);
      er_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      goto exit;
    }

      if (strcmp (lc->coll.coll_name, ref_c->coll_name))
    {
      snprintf (err_msg, sizeof (err_msg) - 1,
            "Names of collation with id %d do not match : " "on %s, is '%s'; on %s, is '%s'", ref_c->coll_id,
            client_text, ref_c->coll_name, server_text, lc->coll.coll_name);
      er_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      goto exit;
    }

      if (lc->codeset != ref_c->codeset)
    {
      snprintf (err_msg, sizeof (err_msg) - 1,
            "Codesets of collation '%s' with id %d do not match : "
            "on %s, codeset is %d; on %s, codeset is %d", ref_c->coll_name, ref_c->coll_id, client_text,
            ref_c->codeset, server_text, lc->codeset);
      er_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      goto exit;
    }

      if (strcasecmp (lc->coll.checksum, ref_c->checksum))
    {
      snprintf (err_msg, sizeof (err_msg) - 1,
            "Collation '%s' with id %d has changed : " "on %s, checksum is '%s'; on %s, checksum is '%s'",
            ref_c->coll_name, ref_c->coll_id, client_text, ref_c->checksum, server_text, lc->coll.checksum);
      er_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      goto exit;
    }
    }
exit:
  return er_status;
}

/*
 * lang_check_locale_compat - checks compatibility of current locales (of
 *                running process) with a reference set of
 *                locales
 * Returns : error code
 * loc_array(in): reference locales
 * loc_cnt(in):
 * client_text(in): text to display in message error for client
 * server_text(in): text to display in message error for server
 */
int
lang_check_locale_compat (const LANG_LOCALE_COMPAT * loc_array, const int loc_cnt, const char *client_text,
              const char *server_text)
{
  char err_msg[ERR_MSG_SIZE];
  int i, j;
  int er_status = NO_ERROR;

  assert (loc_array != NULL);
  assert (loc_cnt > 0);

  /* check that each locale from client is defined by server */
  for (i = 0; i < lang_Count_locales; i++)
    {
      LANG_LOCALE_DATA *lld = lang_Loaded_locales[i];
      const LANG_LOCALE_COMPAT *ref_loc = NULL;

      do
    {
      bool ref_found = false;

      for (j = 0; j < loc_cnt; j++)
        {
          ref_loc = &(loc_array[j]);

          if (lld->codeset == ref_loc->codeset && strcasecmp (lld->lang_name, ref_loc->lang_name) == 0)
        {
          ref_found = true;
          break;
        }
        }

      if (!ref_found)
        {
          snprintf (err_msg, sizeof (err_msg) - 1, "Locale '%s' with codeset %d loaded by %s " "not found on %s",
            lld->lang_name, lld->codeset, client_text, server_text);
          er_status = ER_LOC_INIT;
          LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
          goto exit;
        }

      assert (ref_found);

      if (strcasecmp (ref_loc->checksum, lld->checksum))
        {
          snprintf (err_msg, sizeof (err_msg) - 1,
            "Locale '%s' with codeset %d has changed : " "on %s, checksum is '%s'; on %s, checksum is '%s'",
            ref_loc->lang_name, ref_loc->codeset, server_text, ref_loc->checksum, client_text,
            lld->checksum);
          er_status = ER_LOC_INIT;
          LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
          goto exit;
        }
      lld = lld->next_lld;

    }
      while (lld != NULL);
    }

  /* check that each locale from server is loaded by client */
  for (j = 0; j < loc_cnt; j++)
    {
      bool loc_found = false;
      const LANG_LOCALE_COMPAT *ref_loc = NULL;
      LANG_LOCALE_DATA *lld = NULL;

      ref_loc = &(loc_array[j]);

      for (i = 0; i < lang_Count_locales && !loc_found; i++)
    {
      lld = lang_Loaded_locales[i];

      do
        {
          if (lld->codeset == ref_loc->codeset && strcasecmp (lld->lang_name, ref_loc->lang_name) == 0)
        {
          loc_found = true;
          break;
        }
          lld = lld->next_lld;
        }
      while (lld != NULL);
    }

      if (!loc_found)
    {
      snprintf (err_msg, sizeof (err_msg) - 1, "Locale '%s' with codeset %d defined on %s " "is not loaded by %s",
            ref_loc->lang_name, ref_loc->codeset, server_text, client_text);
      er_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      goto exit;
    }

      assert (loc_found && lld != NULL);

      if (strcasecmp (ref_loc->checksum, lld->checksum))
    {
      snprintf (err_msg, sizeof (err_msg) - 1,
            "Locale '%s' with codeset %d has changed : " "on %s, checksum is '%s'; on %s, checksum is '%s'",
            ref_loc->lang_name, ref_loc->codeset, server_text, ref_loc->checksum, client_text, lld->checksum);
      er_status = ER_LOC_INIT;
      LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
      goto exit;
    }
    }

exit:
  return er_status;
}

#undef EUC_SPACE
#undef ASCII_SPACE

#undef SPACE
#undef PAD
#undef ZERO