Skip to content

File uca_support.c

File List > base > uca_support.c

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/*
 * uca_support.c : Unicode Collation Algorithm support
 */
#include <assert.h>

#include <errno.h>
#include "utility.h"
#include "environment_variable.h"
#include "locale_support.h"
#include "error_manager.h"
#include "porting.h"

#include "intl_support.h"
#include "uca_support.h"
#include "unicode_support.h"
// XXX: SHOULD BE THE LAST INCLUDE HEADER
#include "memory_wrapper.hpp"

#if defined (SUPPRESS_STRLEN_WARNING)
#define strlen(s1)  ((int) strlen(s1))
#endif /* defined (SUPPRESS_STRLEN_WARNING) */

#if defined(SA_MODE)

#define DUCET_FILE "ducet.txt"

#define MAX_WEIGHT_LEVELS 4

#define MAX_UCA_WEIGHT 0xFFFF
#define MAX_UCA_CODEPOINT 0xFFFF

#define UCA_CONTR_EXP_CNT_GROW 8

#define MAX_LOGICAL_POS 14

/* Collation element */
typedef struct uca_coll_ce UCA_COLL_CE;
struct uca_coll_ce
{
  UCA_W weight[MAX_WEIGHT_LEVELS];
};

typedef struct uca_coll_ce_list UCA_COLL_CE_LIST;
struct uca_coll_ce_list
{
  unsigned char num;
  UCA_COLL_CE ce[MAX_UCA_EXP_CE];
};

#define GET_UCA_WEIGHT(ce_list,i,w) ((ce_list)->ce[(i)].weight[(w)])
#define SET_UCA_WEIGHT(ce_list,i,w,val) \
  do { \
    (ce_list)->ce[(i)].weight[(w)] = (val); \
    } while (0);

/* UCA contraction */
typedef struct uca_chr_seq UCA_CHR_SEQ;
struct uca_chr_seq
{
  UCA_CP cp_list[LOC_MAX_UCA_CHARS_SEQ];
  int cp_count;

  UCA_COLL_CE_LIST ce;
};

typedef UCA_CHR_SEQ UCA_CONTRACTION;
typedef UCA_CHR_SEQ UCA_EXPANSION;

/* Contraction, ID tuple, used for reconciliation after contractions list optimization */
typedef struct uca_coll_contr_id UCA_COLL_CONTR_ID;
struct uca_coll_contr_id
{
  COLL_CONTRACTION contr_ref;
  int pos_id;
};

/* Collation keys : single codepoint, multiple codepoints (contractions) */
typedef enum
{
  COLL_KEY_TYPE_CP = 0,
  COLL_KEY_TYPE_CONTR,
  COLL_KEY_TYPE_EXP
} UCA_COLL_KEY_TYPE;

typedef struct uca_coll_key UCA_COLL_KEY;
struct uca_coll_key
{
  UCA_COLL_KEY_TYPE type;
  union
  {
    int cp;
    int contr_id;
    int exp_id;
  } val;
};

typedef struct uca_storage UCA_STORAGE;

typedef struct uca_weight_key_list UCA_WEIGHT_KEY_LIST;
struct uca_weight_key_list
{
  UCA_COLL_KEY *key_list;
  int list_count;
};

struct uca_storage
{
  /* single code-point CE */
  UCA_COLL_CE_LIST *coll_cp;

  /* contractions CE */
  UCA_CONTRACTION *coll_contr;
  int max_contr;
  int count_contr;

  /* tailoring expansions CE */
  UCA_EXPANSION *coll_exp;
  int max_exp;
  int count_exp;

  /* settings for previous instance (used only by DUCET storage) */
  char prev_file_path[PATH_MAX];
  int prev_contr_policy;
};

static UCA_STORAGE ducet = {
  NULL,
  NULL, 0, 0,
  NULL, 0, 0,
  "", CONTR_IGNORE
};

static UCA_STORAGE curr_uca = {
  NULL,
  NULL, 0, 0,
  NULL, 0, 0,
  "", CONTR_IGNORE
};

static UCA_W *w_occurences[MAX_WEIGHT_LEVELS];

static UCA_WEIGHT_KEY_LIST *weight_key_list;

static int logical_pos_cp[MAX_LOGICAL_POS];

/* used for sorting */
static UCA_OPTIONS *uca_tailoring_options = NULL;

static int load_ducet (const char *file_path, const int sett_contr_policy);
static int init_uca_instance (LOCALE_COLLATION * lc);
static int destroy_uca_instance (void);
static int build_key_list_groups (LOCALE_COLLATION * lc);
static void sort_coll_key_lists (LOCALE_COLLATION * lc);
static void sort_one_coll_key_list (LOCALE_COLLATION * lc, int weight_index);
static int uca_comp_func_coll_key_fo (const void *arg1, const void *arg2);
static int uca_comp_func_coll_key (const void *arg1, const void *arg2);
static UCA_COLL_CE_LIST *get_ce_list_from_coll_key (const UCA_COLL_KEY * key);
static int create_opt_weights (LOCALE_COLLATION * lc);
static int optimize_coll_contractions (LOCALE_COLLATION * lc);
static int set_next_value_for_coll_key (LOCALE_COLLATION * lc, const UCA_COLL_KEY * coll_key,
                    const UCA_COLL_KEY * next_key);
static int add_opt_coll_contraction (LOCALE_COLLATION * lc, const UCA_COLL_KEY * contr_key, const unsigned int wv,
                     bool use_expansions);
static int compare_ce_list (UCA_COLL_CE_LIST * ce_list1, UCA_COLL_CE_LIST * ce_list2, UCA_OPTIONS * uca_opt);
static UCA_COLL_KEY *get_key_with_ce_sublist (UCA_COLL_CE_LIST * uca_item, const int lvl);
static void make_coll_key (UCA_COLL_KEY * key, UCA_COLL_KEY_TYPE type, const int key_id);
static int find_contr_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st);
static int find_exp_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st);
static int apply_tailoring_rule (TAILOR_DIR dir, UCA_COLL_KEY * anchor_key, UCA_COLL_KEY * key, UCA_COLL_KEY * ref_key,
                 T_LEVEL lvl);
static int apply_tailoring_rule_identity (UCA_COLL_KEY * key, UCA_COLL_KEY * ref_key);
static int apply_tailoring_rule_w_dir (TAILOR_DIR dir, UCA_COLL_KEY * anchor_key, UCA_COLL_KEY * key,
                       UCA_COLL_KEY * ref_key, T_LEVEL lvl);
static int apply_tailoring_rules (LOCALE_COLLATION * lc);
static int compute_weights_per_level_stats (void);
#if 0
static int compact_weight_values (const int level, const UCA_W max_weight);
static void build_weight_remap_filter (const UCA_W * w_ocurr, const int max_weight, UCA_W * w_filter);
#endif
static int add_key_to_weight_stats_list (const UCA_COLL_KEY * key, UCA_W wv);
static int remove_key_from_weight_stats_list (const UCA_COLL_KEY * key, UCA_W wv);
static int change_key_weight_list (const UCA_COLL_KEY * key, UCA_W w_from, UCA_W w_to);
static int string_to_coll_ce_list (char *s, UCA_COLL_CE_LIST * ui);
static int apply_absolute_tailoring_rules (LOCALE_COLLATION * lc);
static UCA_CONTRACTION *new_contraction (UCA_STORAGE * storage);
static int add_uca_contr_or_exp (LOCALE_COLLATION * lc, UCA_STORAGE * storage, const unsigned int *cp_array,
                 const int cp_count, const UCA_COLL_KEY_TYPE seq_type);
static int read_cp_from_tag (unsigned char *buffer, CP_BUF_TYPE type, UCA_CP * cp);

static int comp_func_coll_contr_bin (const void *arg1, const void *arg2);

static int create_opt_ce_w_exp (LOCALE_COLLATION * lc);

static int uca_comp_func_coll_list_exp_fo (const void *arg1, const void *arg2);
static int uca_comp_func_coll_list_exp (const void *arg1, const void *arg2);

static void build_compressed_uca_w_l13 (const UCA_COLL_CE_LIST * ce_list, UCA_L13_W * uca_w_l13);
static void build_uca_w_l4 (const UCA_COLL_CE_LIST * ce_list, UCA_L4_W * uca_w_l4);
/*
 * load_ducet - Read the DUCET file (standardised and availabe at Unicode.org)
 *      into the ducet array; parse it and load all the information in
 *      the ducet storage.
 * Returns: error status
 *
 * file_path(in): file path for DUCET file
 * sett_contr_policy(in): behavior for contractions
 *
 */
static int
load_ducet (const char *file_path, const int sett_contr_policy)
{
  FILE *f = NULL;
  char str[256];
  char str_ref[256];
  char str_orig[256];
  char *weights[64];
  int lines = 1;
  int i;
  int err_status = NO_ERROR;
  char err_msg[ERR_MSG_SIZE];
  unsigned int cp;
  int w;

  UCA_COLL_CE_LIST *temp_ducet = NULL;
  UCA_COLL_CE_LIST *ducet_cp = NULL;

  assert (file_path != NULL);

  if (strcmp (file_path, ducet.prev_file_path) == 0 && sett_contr_policy == ducet.prev_contr_policy)
    {
      /* already loaded */
      return NO_ERROR;
    }

  uca_free_data ();

  temp_ducet = (UCA_COLL_CE_LIST *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));
  if (temp_ducet == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  ducet_cp = ducet.coll_cp = temp_ducet;

  for (i = 0; i <= MAX_UCA_CODEPOINT; i++)
    {
      ducet_cp[i].num = 0;
      for (w = 0; w < MAX_WEIGHT_LEVELS; w++)
    {
      int ce_pos;

      for (ce_pos = 0; ce_pos < MAX_UCA_EXP_CE; ce_pos++)
        {
          SET_UCA_WEIGHT (&(ducet_cp[i]), ce_pos, w, 0);
        }
    }
    }

  f = fopen_ex (file_path, "rt");
  if (f == NULL)
    {
      err_status = ER_LOC_GEN;
      snprintf_dots_truncate (err_msg, sizeof (err_msg) - 1, "Cannot open file %s", file_path);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
      goto exit;
    }

  assert (RULE_POS_LAST_TRAIL < sizeof (logical_pos_cp) / sizeof (logical_pos_cp[0]));
  for (i = 0; i < (int) (sizeof (logical_pos_cp) / sizeof (logical_pos_cp[0])); i++)
    {
      logical_pos_cp[i] = -1;
    }

  while (fgets (str, sizeof (str), f))
    {
      char *comment;
      char *weight;
      char *s, *save;
      int codenum;
      bool is_variable = false;
      char is_ignorable[4] = { -1, -1, -1, -1 };
      UCA_COLL_CE_LIST *ce_list = NULL;
      UCA_CP cp_list[LOC_MAX_UCA_CHARS_SEQ];
      uint32 cp_int_list[LOC_MAX_UCA_CHARS_SEQ];
      int cp_list_count = 0;
      bool is_allowed = true;
      int val;

      lines++;

      strcpy (str_ref, str);
      strcpy (str_orig, str);
      str_to_int32 (&val, &s, str, 16);
      cp = (unsigned int) val;

      /* Skip comment lines and unicode values above max allowed CP */
      if (str[0] == '#' || (cp > MAX_UCA_CODEPOINT))
    {
      continue;
    }

      if ((comment = strchr (str, '#')))
    {
      /* Remove comments found after the weight list. */
      *comment++ = '\0';
      for (; *comment == ' '; comment++);
    }
      else
    {
      /* Skip empty lines and @version line. */
      continue;
    }

      if ((weight = strchr (str, ';')))
    {
      /* Drop chars after ';' until '[' reached */
      *weight++ = '\0';
      for (; *weight == ' '; weight++);
      /* At this point, we have the unicode representation stored in str and weight items stored in the weight
       * string var */
    }
      else
    {
      /* If the "if" tests above pass, but the line does not contain ';' then the line is not valid, so skip it */
      continue;
    }

      /* Count number of chars in Uncode item e.g. codenum will be greater that 1 if contraction found. */
      codenum = 1;
      cp_int_list[cp_list_count++] = cp;
      str_to_int32 (&val, &s, str, 16);
      codenum += string_to_int_array (s, &(cp_int_list[1]), LOC_MAX_UCA_CHARS_SEQ - 1, " \t");

      if (codenum > LOC_MAX_UCA_CHARS_SEQ)
    {
      /* If the number of codepoints found in the input string is greater than LOC_MAX_UCA_CHARS_SEQ, only the
       * first LOC_MAX_UCA_CHARS_SEQ are used, and no error is thrown. */
      codenum = LOC_MAX_UCA_CHARS_SEQ;
    }

      cp_list_count = codenum;

      for (i = 0; i < codenum; i++)
    {
      if (cp_int_list[i] > MAX_UCA_CODEPOINT)
        {
          is_allowed = false;
          cp_list_count = i;
          break;
        }
      cp_list[i] = (UCA_CP) cp_int_list[i];
    }

      if (!is_allowed)
    {
      continue;
    }

      if (codenum > 1)
    {
      UCA_CONTRACTION *contr = NULL;

      if ((sett_contr_policy & CONTR_DUCET_USE) != CONTR_DUCET_USE)
        {
          continue;
        }

      assert (codenum <= LOC_MAX_UCA_CHARS_SEQ);

      codenum = MIN (codenum, LOC_MAX_UCA_CHARS_SEQ);

      contr = new_contraction (&ducet);
      if (contr == NULL)
        {
          err_status = ER_OUT_OF_VIRTUAL_MEMORY;
          goto exit;
        }

      contr->cp_count = cp_list_count;
      memcpy (contr->cp_list, cp_list, cp_list_count * sizeof (UCA_CP));

      ce_list = &(contr->ce);
    }
      else
    {
      ce_list = &(ducet_cp[cp]);
    }

      assert (ce_list != NULL);

      ce_list->num = 0;
      s = strtok_r (weight, " []", &save);
      while (s)
    {
      /* Count the number of collation elements of the current char */
      weights[ce_list->num] = s;
      s = strtok_r (NULL, " []", &save);
      ce_list->num++;
    }

      for (w = 0; w < ce_list->num; w++)
    {
      int partnum;

      partnum = 0;
      s = weights[w];

      is_variable = ((*s) == '*') ? true : false;

      /* Now, in weights[...] we have the collation elements stored as strings We decompose the N collation
       * elements into 4 weight values and store them into uca[code].weight[1..4][1..N] */
      while (*s)
        {
          char *endptr;
          int result = 0;
          int val;

          result = str_to_int32 (&val, &endptr, s + 1, 16);
          SET_UCA_WEIGHT (ce_list, w, partnum, (int) val);

          assert (partnum < 4);
          if (val == 0 && is_ignorable[partnum] == -1)
        {
          is_ignorable[partnum] = 1;
        }
          else
        {
          is_ignorable[partnum] = 0;
        }

          s = endptr;
          partnum++;
        }
    }

      if (is_variable)
    {
      logical_pos_cp[RULE_POS_LAST_VAR] = cp;
      if (logical_pos_cp[RULE_POS_FIRST_VAR] == -1)
        {
          logical_pos_cp[RULE_POS_FIRST_VAR] = cp;
        }
    }

      assert (is_ignorable[0] == 0 || is_ignorable[0] == 1);
      assert (is_ignorable[1] == 0 || is_ignorable[1] == 1);
      assert (is_ignorable[2] == 0 || is_ignorable[2] == 1);

      if (is_ignorable[0] == 1)
    {
      logical_pos_cp[RULE_POS_LAST_PRI_IGN] = cp;
      if (logical_pos_cp[RULE_POS_FIRST_PRI_IGN] == -1)
        {
          logical_pos_cp[RULE_POS_FIRST_PRI_IGN] = cp;
        }
    }

      if (is_ignorable[1] == 1)
    {
      logical_pos_cp[RULE_POS_LAST_SEC_IGN] = cp;
      if (logical_pos_cp[RULE_POS_FIRST_SEC_IGN] == -1)
        {
          logical_pos_cp[RULE_POS_FIRST_SEC_IGN] = cp;
        }
    }

      if (is_ignorable[2] == 1)
    {
      logical_pos_cp[RULE_POS_LAST_TERT_IGN] = cp;
      if (logical_pos_cp[RULE_POS_FIRST_TERT_IGN] == -1)
        {
          logical_pos_cp[RULE_POS_FIRST_TERT_IGN] = cp;
        }
    }

      if (is_ignorable[0] == 0 && is_ignorable[1] == 0 && is_ignorable[2] == 0)
    {
      logical_pos_cp[RULE_POS_LAST_NON_IGN] = cp;
      if (logical_pos_cp[RULE_POS_FIRST_NON_IGN] == -1)
        {
          logical_pos_cp[RULE_POS_FIRST_NON_IGN] = cp;
        }
    }
    }

  fclose (f);
  f = NULL;

  /* Set implicit weights for unicode values not found in the DUCET file */
  for (cp = 0; cp <= MAX_UCA_CODEPOINT; cp++)
    {
      unsigned int base, aaaa, bbbb;

      /* Skip if the Unicode value was found in the DUCET file */
      if (ducet_cp[cp].num)
    {
      continue;
    }

      /*
       * 3400;<CJK Ideograph Extension A, First> 4DB5;<CJK Ideograph Extension A, Last> 4E00;<CJK Ideograph, First>
       * 9FA5;<CJK Ideograph, Last> */

      if (cp >= 0x3400 && cp <= 0x4DB5)
    {
      base = 0xFB80;
    }
      else if (cp >= 0x4E00 && cp <= 0x9FA5)
    {
      base = 0xFB40;
    }
      else
    {
      base = 0xFBC0;
    }

      aaaa = base + (cp >> 15);
      bbbb = (cp & 0x7FFF) | 0x8000;
      SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 0, aaaa);
      SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 0, bbbb);

      SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 1, 0x0020);
      SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 1, 0x0000);

      SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 2, 0x0002);
      SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 2, 0x0000);

      SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 3, 0x0001);
      SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 3, 0x0000);

      ducet_cp[cp].num = 2;
    }

exit:
  strncpy (ducet.prev_file_path, file_path, sizeof (ducet.prev_file_path));
  ducet.prev_file_path[sizeof (ducet.prev_file_path) - 1] = '\0';

  ducet.prev_contr_policy = sett_contr_policy;

  if (f != NULL)
    {
      fclose (f);
    }

  return err_status;
}

/*
 * init_uca_instance - Prepares one UCA instance for processing
 * Returns :  ERR_OUT_OF_VIRTUAL_MEMORY - if an allocation fails;
 *        NO_ERROR - success if otherwise.
 */
static int
init_uca_instance (LOCALE_COLLATION * lc)
{
  int i;
  int err_status = NO_ERROR;
  UCA_COLL_CE_LIST *uca_cp = NULL;
  char ducet_file_path[PATH_MAX];

  assert (lc != NULL);

  uca_cp = (UCA_COLL_CE_LIST *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));
  if (uca_cp == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }
  memset (uca_cp, 0, (MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));

  curr_uca.coll_cp = uca_cp;

  envvar_localedatadir_file (ducet_file_path, sizeof (ducet_file_path), DUCET_FILE);
  err_status = load_ducet (ducet_file_path, lc->tail_coll.uca_opt.sett_contr_policy);

  if (err_status != NO_ERROR)
    {
      goto exit;
    }
  memcpy (uca_cp, ducet.coll_cp, (MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));

  /* copy contractions */
  for (i = 0; i < ducet.count_contr; i++)
    {
      UCA_CONTRACTION *ducet_contr = &(ducet.coll_contr[i]);
      UCA_CONTRACTION *uca_contr;
      int j;

      for (j = 0; j < ducet_contr->cp_count; j++)
    {
      if (ducet_contr->cp_list[j] >= lc->tail_coll.sett_max_cp)
        {
          err_status = ER_LOC_GEN;
          LOG_LOCALE_ERROR ("Codepoint in DUCET contraction exceeds " "maximum allowed codepoint", ER_LOC_GEN,
                true);
          goto exit;
        }
    }

      uca_contr = new_contraction (&curr_uca);

      if (uca_contr == NULL)
    {
      err_status = ER_OUT_OF_VIRTUAL_MEMORY;
      goto exit;
    }

      uca_contr->cp_count = ducet_contr->cp_count;
      memcpy (uca_contr->cp_list, ducet_contr->cp_list, ducet_contr->cp_count * sizeof (UCA_CP));
      memcpy (&(uca_contr->ce), &(ducet_contr->ce), sizeof (UCA_COLL_CE_LIST));
    }

  for (i = 0; i < MAX_WEIGHT_LEVELS; i++)
    {
      w_occurences[i] = (UCA_W *) malloc ((MAX_UCA_WEIGHT + 1) * sizeof (UCA_W));

      if (w_occurences[i] == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

      memset (w_occurences[i], 0, (MAX_UCA_WEIGHT + 1) * sizeof (UCA_W));
    }

  weight_key_list = (UCA_WEIGHT_KEY_LIST *) malloc ((MAX_UCA_WEIGHT + 1) * sizeof (UCA_WEIGHT_KEY_LIST));
  if (weight_key_list == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }
  memset (weight_key_list, 0, (MAX_UCA_WEIGHT + 1) * sizeof (UCA_WEIGHT_KEY_LIST));

exit:
  return err_status;
}

/*
 * destroy_uca_instance - Unload the UCa array and all auxiliary arrays.
 * Returns :  NO_ERROR.
 */
static int
destroy_uca_instance (void)
{
  int i;

  if (curr_uca.coll_cp != NULL)
    {
      free (curr_uca.coll_cp);
      curr_uca.coll_cp = NULL;
    }

  if (curr_uca.coll_contr != NULL)
    {
      assert (curr_uca.max_contr > 0);

      free (curr_uca.coll_contr);
      curr_uca.coll_contr = NULL;
      curr_uca.max_contr = 0;
      curr_uca.count_contr = 0;
    }

  if (curr_uca.coll_exp != NULL)
    {
      assert (curr_uca.max_exp > 0);

      free (curr_uca.coll_exp);
      curr_uca.coll_exp = NULL;
      curr_uca.max_exp = 0;
      curr_uca.count_exp = 0;
    }

  for (i = 0; i < MAX_WEIGHT_LEVELS; i++)
    {
      if (w_occurences[i] != NULL)
    {
      free (w_occurences[i]);
      w_occurences[i] = NULL;
    }
    }

  if (weight_key_list != NULL)
    {
      for (i = 0; i <= MAX_UCA_WEIGHT; i++)
    {
      if (weight_key_list[i].key_list != NULL)
        {
          free (weight_key_list[i].key_list);
          weight_key_list[i].key_list = NULL;
        }
    }
      free (weight_key_list);
      weight_key_list = NULL;
    }

  return NO_ERROR;
}

/*
 * compare_ce_list - compares two lists of collation elements using the
 *           settings of locale
 * Returns : -1 if ce_list1 collates before ce_list2;
 *        0 if ce_list1 collates on the same level as ce_list2;
 *        1 if ce_list1 collates after ce_list2.
 * ce_list1 (in) : collation element list to compare.
 * ce_list2 (in) : collation element list to compare.
 * uca_opt(in) : sorting options
 */
static int
compare_ce_list (UCA_COLL_CE_LIST * ce_list1, UCA_COLL_CE_LIST * ce_list2, UCA_OPTIONS * uca_opt)
{
  int result, i, weight_level;
  int numCodepoints = 0;

  assert (ce_list1 != NULL);
  assert (ce_list2 != NULL);
  assert (uca_opt != NULL);

  numCodepoints = MIN (ce_list1->num, ce_list2->num);

  if (numCodepoints == 0)
    {
      /* one of the chars has no codepoints attached */
      if (ce_list1->num > 0)
    {
      return 1;
    }
      else if (ce_list2->num > 0)
    {
      return -1;
    }

      return 0;
    }
  result = 0;

  numCodepoints = MAX (ce_list1->num, ce_list2->num);

  if (uca_opt->use_only_first_ce)
    {
      numCodepoints = MIN (numCodepoints, 1);
    }

  for (weight_level = 0; (weight_level < MAX_WEIGHT_LEVELS) && (result == 0); weight_level++)
    {
      if (weight_level + 1 > (int) (uca_opt->sett_strength)
      && ((uca_opt->sett_caseFirst + (uca_opt->sett_caseLevel ? 1 : 0) == 0)
          || ((uca_opt->sett_caseFirst + (uca_opt->sett_caseLevel ? 1 : 0) > 0) && weight_level != 2)))
    {
      continue;
    }

      for (i = 0; (i < numCodepoints) && (result == 0); i++)
    {
      if (GET_UCA_WEIGHT (ce_list1, i, weight_level) < GET_UCA_WEIGHT (ce_list2, i, weight_level))
        {
          result = -1;
        }
      else if (GET_UCA_WEIGHT (ce_list1, i, weight_level) > GET_UCA_WEIGHT (ce_list2, i, weight_level))
        {
          result = 1;
        }

      /* accents level */
      if (weight_level == 1 && uca_opt->sett_backwards == 1)
        {
          /* backwards */
          result = -result;
        }
      else if (weight_level == 2 && uca_opt->sett_caseFirst == 1)
        {
          /* caseFirst = upper : revert L3 DUCET order */
          result = -result;
        }
    }
    }

  return result;
}

/*
 * uca_process_collation - Calls all the functions which
 *             actually do the processing.
 * Returns: ER_LOC_GEN if a tailoring rule occurs;
 *      ER_OUT_OF_VIRTUAL_MEMORY if some memory allocation fails;
 *      NO_ERROR if tailoring is successful.
 * lc(in/out) : contains the collation settings and optimization results.
 *
 */
int
uca_process_collation (LOCALE_COLLATION * lc, bool is_verbose)
{
  int err_status = NO_ERROR;

  if (is_verbose)
    {
      printf ("Initializing UCA\n");
    }

  err_status = init_uca_instance (lc);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  if (is_verbose)
    {
      printf ("DUCET file has %d contractions\n", ducet.count_contr);
      printf ("Applying %d CUBRID tailoring rules\n", lc->tail_coll.cub_count_rules);
    }

  uca_tailoring_options = &(lc->tail_coll.uca_opt);
  err_status = apply_absolute_tailoring_rules (lc);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  if (is_verbose)
    {
      printf ("Build weight statistics\n");
    }

  err_status = compute_weights_per_level_stats ();
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  err_status = build_key_list_groups (lc);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  if (is_verbose)
    {
      printf ("Applying %d UCA tailoring rules\n", lc->tail_coll.count_rules);
    }
  err_status = apply_tailoring_rules (lc);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }

  if (lc->tail_coll.uca_opt.sett_expansions)
    {
      if (is_verbose)
    {
      printf ("Building optimized weights with expansions\n");
    }

      err_status = create_opt_ce_w_exp (lc);
      if (err_status != NO_ERROR)
    {
      goto exit;
    }
    }
  else
    {
      if (is_verbose)
    {
      printf ("Sorting weight keys lists\n");
    }
      sort_coll_key_lists (lc);

      if (is_verbose)
    {
      printf ("Building optimized weights\n");
    }

      err_status = create_opt_weights (lc);
      if (err_status != NO_ERROR)
    {
      goto exit;
    }
    }

  memcpy (&(lc->opt_coll.uca_opt), &(lc->tail_coll.uca_opt), sizeof (UCA_OPTIONS));
exit:
  destroy_uca_instance ();
  uca_tailoring_options = NULL;
  if (is_verbose)
    {
      printf ("UCA finished\n");
    }
  return err_status;
}

/*
 * apply_tailoring_rules - Loop through the tailoring rules in
 *             LOCALE_COLLATION::coll.rules, parse the composed
 *             rules if any, and call the function which does
 *             the tailoring e.g. execute the rule on the
 *             already processed data.
 * Returns: error status
 * lc (in/out) : collation settings and optimization results.
 */
static int
apply_tailoring_rules (LOCALE_COLLATION * lc)
{
  int i;
  UCA_COLL_KEY anchor_key;
  UCA_COLL_KEY ref_key;
  UCA_COLL_KEY tailor_key;
  unsigned char *ptr_uchar;
  int err_status = NO_ERROR;
  TAILOR_RULE *t_rule = NULL;
  char er_msg[ERR_MSG_SIZE];

  for (i = 0; i < lc->tail_coll.count_rules; i++)
    {
      unsigned int anchor_cp_list[LOC_MAX_UCA_CHARS_SEQ];
      unsigned int ref_cp_list[LOC_MAX_UCA_CHARS_SEQ];
      int anchor_cp_count = 0;
      int ref_cp_count = 0;
      int buf_size;
      int cp_found;
      int contr_id;

      t_rule = &(lc->tail_coll.rules[i]);

      /* anchor key : */
      if (t_rule->r_pos_type != RULE_POS_BUFFER)
    {
      int anchor_cp;

      assert (t_rule->r_pos_type > 0);
      assert (t_rule->r_pos_type < MAX_LOGICAL_POS);

      anchor_cp = logical_pos_cp[t_rule->r_pos_type];

      assert (anchor_cp >= 0 && anchor_cp <= MAX_UCA_CODEPOINT);
      if (anchor_cp >= lc->tail_coll.sett_max_cp)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Invalid anchor in rule :%d. Codepoint value too big", i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

      assert (anchor_cp > 0);
      make_coll_key (&anchor_key, COLL_KEY_TYPE_CP, anchor_cp);
      memcpy (&ref_key, &anchor_key, sizeof (UCA_COLL_KEY));
    }
      else
    {
      buf_size = strlen (t_rule->anchor_buf);
      ptr_uchar = (unsigned char *) t_rule->anchor_buf;

      cp_found =
        intl_utf8_to_cp_list (ptr_uchar, buf_size, anchor_cp_list, LOC_MAX_UCA_CHARS_SEQ, &anchor_cp_count);

      if (cp_found <= 0 || cp_found > anchor_cp_count)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Invalid anchor in rule :%d", i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

      if (anchor_cp_count == 1)
        {
          assert (*anchor_cp_list > 0);
          make_coll_key (&anchor_key, COLL_KEY_TYPE_CP, *anchor_cp_list);
        }
      else
        {
          assert (anchor_cp_count > 1);

          /* contraction or expansion */
          if ((lc->tail_coll.uca_opt.sett_contr_policy & CONTR_TAILORING_USE) != CONTR_TAILORING_USE
          && !lc->tail_coll.uca_opt.sett_expansions)
        {
          continue;
        }

          contr_id = find_contr_id (anchor_cp_list, anchor_cp_count, &curr_uca);

          if (contr_id != -1)
        {
          make_coll_key (&anchor_key, COLL_KEY_TYPE_CONTR, contr_id);
        }
          else
        {
          int exp_id;
          assert (contr_id == -1);

          /* this is an expansion */
          if (!lc->tail_coll.uca_opt.sett_expansions)
            {
              /* ignore expansions */
              continue;
            }

          exp_id = find_exp_id (anchor_cp_list, anchor_cp_count, &curr_uca);

          if (exp_id == -1)
            {
              exp_id = add_uca_contr_or_exp (lc, &curr_uca, anchor_cp_list, anchor_cp_count, COLL_KEY_TYPE_EXP);
            }

          if (exp_id == -1)
            {
              err_status = ER_LOC_GEN;
              snprintf (er_msg, sizeof (er_msg) - 1, "Invalid anchor in rule :%d." "Cannot create expansion",
                i);
              LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
              goto exit;
            }

          make_coll_key (&anchor_key, COLL_KEY_TYPE_EXP, exp_id);
        }
        }


      /* reference key */
      buf_size = t_rule->r_buf_size;
      ptr_uchar = (unsigned char *) t_rule->r_buf;
      cp_found = intl_utf8_to_cp_list (ptr_uchar, buf_size, ref_cp_list, LOC_MAX_UCA_CHARS_SEQ, &ref_cp_count);

      if (cp_found <= 0 || cp_found > ref_cp_count)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Invalid reference in rule :%d", i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

      if (ref_cp_count == 1)
        {
          if ((int) (*ref_cp_list) >= lc->tail_coll.sett_max_cp)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Invalid reference in rule :%d." " Codepoint value too big",
                i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

          assert (*ref_cp_list > 0);
          make_coll_key (&ref_key, COLL_KEY_TYPE_CP, *ref_cp_list);
        }
      else
        {
          assert (ref_cp_count > 1);

          /* contraction or expansion */
          if ((lc->tail_coll.uca_opt.sett_contr_policy & CONTR_TAILORING_USE) != CONTR_TAILORING_USE
          && !lc->tail_coll.uca_opt.sett_expansions)
        {
          continue;
        }

          contr_id = find_contr_id (ref_cp_list, ref_cp_count, &curr_uca);

          if (contr_id != -1)
        {
          make_coll_key (&ref_key, COLL_KEY_TYPE_CONTR, contr_id);
        }
          else
        {
          int exp_id;

          /* expansion */
          assert (contr_id == -1);

          if (!lc->tail_coll.uca_opt.sett_expansions)
            {
              continue;
            }

          exp_id = find_exp_id (ref_cp_list, ref_cp_count, &curr_uca);
          if (exp_id == -1)
            {
              exp_id = add_uca_contr_or_exp (lc, &curr_uca, ref_cp_list, ref_cp_count, COLL_KEY_TYPE_EXP);
            }

          if (exp_id == -1)
            {
              err_status = ER_LOC_GEN;
              snprintf (er_msg, sizeof (er_msg) - 1, "Invalid reference in rule: %d" "Cannot create expansion",
                i);
              LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
              goto exit;
            }

          make_coll_key (&ref_key, COLL_KEY_TYPE_EXP, exp_id);
        }
        }
    }

      if (t_rule->multiple_chars)
    {
      unsigned char *tailor_curr;
      unsigned char *tailor_next;
      unsigned char *tailor_end;

      tailor_curr = (unsigned char *) (t_rule->t_buf);
      tailor_next = tailor_curr;
      tailor_end = tailor_curr + t_rule->t_buf_size;

      while (tailor_next < tailor_end)
        {
          unsigned int tailor_cp =
        intl_utf8_to_cp (tailor_curr, CAST_STRLEN (tailor_end - tailor_curr), &tailor_next);

          assert (lc->tail_coll.sett_max_cp >= 0);
          if (tailor_cp >= (unsigned int) lc->tail_coll.sett_max_cp)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Invalid tailoring in rule :%d." "Codepoint : %4X too big", i,
                tailor_cp);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

          assert (tailor_cp > 0);
          make_coll_key (&tailor_key, COLL_KEY_TYPE_CP, tailor_cp);

          err_status = apply_tailoring_rule (t_rule->direction, &anchor_key, &tailor_key, &ref_key, t_rule->level);
          if (err_status != NO_ERROR)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Cannot apply :%d", i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

          tailor_curr = tailor_next;

          memcpy (&ref_key, &tailor_key, sizeof (UCA_COLL_KEY));
        }
    }
      else
    {
      unsigned int tailor_cp_list[LOC_MAX_UCA_CHARS_SEQ];
      int tailor_cp_count;

      buf_size = t_rule->t_buf_size;
      ptr_uchar = (unsigned char *) t_rule->t_buf;
      cp_found =
        intl_utf8_to_cp_list (ptr_uchar, buf_size, tailor_cp_list, LOC_MAX_UCA_CHARS_SEQ, &tailor_cp_count);

      if (cp_found <= 0 || cp_found > tailor_cp_count)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1,
            "Invalid tailoring in rule :%d." "Invalid number of codepoints: %d", i, cp_found);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

      if (tailor_cp_count > 1)
        {
          /* contraction */
          if ((lc->tail_coll.uca_opt.sett_contr_policy & CONTR_TAILORING_USE) != CONTR_TAILORING_USE)
        {
          continue;
        }

          contr_id = find_contr_id (tailor_cp_list, tailor_cp_count, &curr_uca);

          if (contr_id == -1)
        {
          contr_id = add_uca_contr_or_exp (lc, &curr_uca, tailor_cp_list, tailor_cp_count, COLL_KEY_TYPE_CONTR);
        }

          if (contr_id == -1)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Rule :%d. Cannot create contraction.", i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

          make_coll_key (&tailor_key, COLL_KEY_TYPE_CONTR, contr_id);
        }
      else
        {
          if ((int) (*tailor_cp_list) >= lc->tail_coll.sett_max_cp)
        {
          err_status = ER_LOC_GEN;
          snprintf (er_msg, sizeof (er_msg) - 1, "Invalid tailoring in rule :%d." " Codepoint value too big",
                i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }

          assert (*tailor_cp_list > 0);
          make_coll_key (&tailor_key, COLL_KEY_TYPE_CP, *tailor_cp_list);
        }

      err_status = apply_tailoring_rule (t_rule->direction, &anchor_key, &tailor_key, &ref_key, t_rule->level);
      if (err_status != NO_ERROR)
        {
          snprintf (er_msg, sizeof (er_msg) - 1, "Rule :%d", i);
          LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
          goto exit;
        }
    }
    }

exit:
  return err_status;
}

/*
 * compute_weights_per_level_stats - Build statistics regarding the number of
 *                   occurences of each weight on each level
 *                   in each collation element.
 */

static int
compute_weights_per_level_stats (void)
{
  int depth, weight_level;
  int cp, i;
  UCA_COLL_CE_LIST *uca_cp = curr_uca.coll_cp;
  int used_weights[MAX_WEIGHT_LEVELS];
  UCA_W max_weight_val[MAX_WEIGHT_LEVELS];
  int err_status = NO_ERROR;

  for (cp = 0; cp <= MAX_UCA_CODEPOINT; cp++)
    {
      for (depth = 0; depth < uca_cp[cp].num; depth++)
    {
      for (weight_level = 0; weight_level < MAX_WEIGHT_LEVELS; weight_level++)
        {
          UCA_W w = GET_UCA_WEIGHT (&(uca_cp[cp]), depth, weight_level);
          w_occurences[weight_level][w]++;
        }
    }
    }

  for (i = 0; i < curr_uca.count_contr; i++)
    {
      UCA_CONTRACTION *contr = &(curr_uca.coll_contr[i]);

      for (depth = 0; depth < contr->ce.num; depth++)
    {
      for (weight_level = 0; weight_level < MAX_WEIGHT_LEVELS; weight_level++)
        {
          UCA_W w = GET_UCA_WEIGHT (&(contr->ce), depth, weight_level);
          w_occurences[weight_level][w]++;
        }
    }
    }

  /* how many weight values are really used */
  for (weight_level = 0; weight_level < MAX_WEIGHT_LEVELS; weight_level++)
    {
      used_weights[weight_level] = 0;
      max_weight_val[weight_level] = 0;
      for (i = 0; i < MAX_UCA_WEIGHT + 1; i++)
    {
      if (w_occurences[weight_level][i] != 0)
        {
          used_weights[weight_level]++;
          max_weight_val[weight_level] = i;
        }
    }
    }

  if (max_weight_val[1] > 0x1ff || max_weight_val[2] > 0x7f + 1)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Cannot store weights. Max weight values exceeded", ER_LOC_GEN, true);
      goto exit;
    }

exit:
  return err_status;
}

#if 0
/*
 * compact_weight_values - rewrites the weight values for one level in UCA
 *             storage so that the weights are compact values.
 *
 *  return : error code
 *  level(in): level to compact weights
 *  max_weight(in): max weight value at this level
 *
 * Note : this function uses the global 'w_occurences' array
 */
static int
compact_weight_values (const int level, const UCA_W max_weight)
{
  UCA_W *w_filter = NULL;
  UCA_COLL_CE_LIST *uca_cp = curr_uca.coll_cp;
  int err_status = NO_ERROR;
  int cp, i, depth;

  assert (level >= 0 && level <= 3);

  w_filter = (UCA_W *) malloc ((max_weight + 1) * sizeof (UCA_W));
  if (w_filter == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  memset (w_filter, 0xffff, (max_weight + 1) * sizeof (UCA_W));

  build_weight_remap_filter (w_occurences[level], max_weight, w_filter);

  /* reassign new weights and reset weight statistics for this level */
  memset (w_occurences[level], 0, (MAX_UCA_WEIGHT + 1) * sizeof (UCA_W));

  for (cp = 0; cp <= MAX_UCA_CODEPOINT; cp++)
    {
      for (depth = 0; depth < uca_cp[cp].num; depth++)
    {
      UCA_W w = GET_UCA_WEIGHT (&(uca_cp[cp]), depth, level);

      w = w_filter[w];
      assert (w != 0xffff);
      SET_UCA_WEIGHT (&(uca_cp[cp]), depth, level, w);

      w_occurences[level][w]++;
    }
    }

  for (i = 0; i < curr_uca.count_contr; i++)
    {
      UCA_CONTRACTION *contr = &(curr_uca.coll_contr[i]);

      for (depth = 0; depth < contr->ce.num; depth++)
    {
      UCA_W w = GET_UCA_WEIGHT (&(contr->ce), depth, level);

      w = w_filter[w];
      assert (w != 0xffff);
      SET_UCA_WEIGHT (&(contr->ce), depth, level, w);

      w_occurences[level][w]++;
    }
    }

exit:
  if (w_filter != NULL)
    {
      free (w_filter);
      w_filter = NULL;
    }

  return err_status;
}

/*
 * build_weight_remap_filter - builds a filter for remapping weights to a
 *                 compact value range.
 *
 *  return :
 *  w_ocurr(in): occurences array (number of ocurrences for each weight value)
 *  max_weight(in): maximum weight before compacting
 *  w_filter(in/out): weight filter
 *
 */
static void
build_weight_remap_filter (const UCA_W * w_ocurr, const int max_weight, UCA_W * w_filter)
{
  int w;
  int last_used_w;

  assert (max_weight > 0 && max_weight <= 0xffff);
  assert (w_ocurr != NULL);

  last_used_w = 0;

  for (w = 0; w <= max_weight; w++)
    {
      if (w_ocurr[w] > 0)
    {
      w_filter[w] = last_used_w++;
    }
    }

  assert (last_used_w < max_weight);
}
#endif

/*
 * build_key_list_groups - builds the collation key lists for each L1 weight
 *             value
 * Returns: error status
 * lc (in): the collation settings.
 */
static int
build_key_list_groups (LOCALE_COLLATION * lc)
{
  int cp, wv, i;
  int err_status = NO_ERROR;

  for (wv = 0; wv <= MAX_UCA_WEIGHT; wv++)
    {
      weight_key_list[wv].list_count = 0;
      if (w_occurences[0][wv] == 0)
    {
      weight_key_list[wv].key_list = NULL;
    }
      else
    {
      weight_key_list[wv].key_list = (UCA_COLL_KEY *) malloc (w_occurences[0][wv] * sizeof (UCA_COLL_KEY));

      if (weight_key_list[wv].key_list == NULL)
        {
          err_status = ER_LOC_GEN;
          LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
          goto exit;
        }
    }
    }

  for (cp = 0; cp < lc->tail_coll.sett_max_cp; cp++)
    {
      wv = GET_UCA_WEIGHT (&(curr_uca.coll_cp[cp]), 0, 0);
      weight_key_list[wv].key_list[weight_key_list[wv].list_count].val.cp = cp;
      weight_key_list[wv].key_list[weight_key_list[wv].list_count].type = COLL_KEY_TYPE_CP;
      weight_key_list[wv].list_count++;
    }

  for (i = 0; i < curr_uca.count_contr; i++)
    {
      wv = GET_UCA_WEIGHT (&(curr_uca.coll_contr[i].ce), 0, 0);
      weight_key_list[wv].key_list[weight_key_list[wv].list_count].val.contr_id = i;
      weight_key_list[wv].key_list[weight_key_list[wv].list_count].type = COLL_KEY_TYPE_CONTR;
      weight_key_list[wv].list_count++;
    }

exit:
  return err_status;
}

/*
 * sort_coll_key_lists - Sorts all the collation keys lists grouped
 *           by weight values of level 1 weight
 * lc(in) : the collation settings.
 */
static void
sort_coll_key_lists (LOCALE_COLLATION * lc)
{
  int wv;

  for (wv = 0; wv <= MAX_UCA_WEIGHT; wv++)
    {
      sort_one_coll_key_list (lc, wv);
    }
}

/*
 * sort_one_coll_key_list - Sorts the collation keys grouped in a list
 *              having the same weight value (= weight index)
 *              at level 1 in UCA
 * lc(in) : the collation settings.
 * weight_index(in) : the index of the collation keys list to be sorted.
 *
 * Note : The collation keys list located at weight_key_list[weight_index] was
 *    built from collation keys (codepoints or contractions) which have on
 *    level 1 weight the same value
 */
static void
sort_one_coll_key_list (LOCALE_COLLATION * lc, int weight_index)
{
  assert (weight_index <= MAX_UCA_WEIGHT);

  if (weight_key_list[weight_index].list_count <= 0)
    {
      return;
    }

  qsort (weight_key_list[weight_index].key_list, weight_key_list[weight_index].list_count, sizeof (UCA_COLL_KEY),
     uca_comp_func_coll_key_fo);
}

/*
 * uca_comp_func_coll_key_fo - compare function for sorting collatable
 *                 elements according to UCA algorithm, with
 *                 full order
 *
 *  Note: This function is used in the first step of computing 'next' sequence
 *    If result of 'uca_comp_func_coll_key' is zero, the comparison
 *    is performed on codepooints values. The purpose is to provide a
 *    'deterministic comparison' in order to eliminate unpredictable
 *    results of sort algorithm (qsort) when computing 'next' fields.
 *
 */
static int
uca_comp_func_coll_key_fo (const void *arg1, const void *arg2)
{
  UCA_COLL_KEY *pos1_key;
  UCA_COLL_KEY *pos2_key;
  int cmp;

  pos1_key = (UCA_COLL_KEY *) arg1;
  pos2_key = (UCA_COLL_KEY *) arg2;

  cmp = uca_comp_func_coll_key (arg1, arg2);

  if (cmp == 0)
    {
      if (pos1_key->type == pos2_key->type)
    {
      return pos1_key->val.cp - pos2_key->val.cp;
    }
      else if (pos1_key->type == COLL_KEY_TYPE_CONTR)
    {
      return 1;
    }
      else
    {
      return -1;
    }
    }

  return cmp;
}

/*
 * uca_comp_func_coll_key - compare function for sorting collatable elements
 *              according to UCA algorithm
 *
 *  Note: this function is used to sort collatable elements according to
 *    CEs tables and UCA settins (sorting options)
 *    The elements in array are of UCA_COLL_KEY type, keys which
 *    may be Unicode points or contractions
 *
 */
static int
uca_comp_func_coll_key (const void *arg1, const void *arg2)
{
  UCA_COLL_KEY *pos1_key;
  UCA_COLL_KEY *pos2_key;
  UCA_COLL_CE_LIST *ce_list1;
  UCA_COLL_CE_LIST *ce_list2;
  UCA_OPTIONS *uca_opt = uca_tailoring_options;

  assert (uca_opt != NULL);

  pos1_key = (UCA_COLL_KEY *) arg1;
  pos2_key = (UCA_COLL_KEY *) arg2;

  ce_list1 = get_ce_list_from_coll_key (pos1_key);
  ce_list2 = get_ce_list_from_coll_key (pos2_key);

  return compare_ce_list (ce_list1, ce_list2, uca_opt);
}

/*
 * get_ce_list_from_coll_key - get collation element list associated to a
 *                 collation key using current UCA storage
 *
 * Returns: collation element list
 * key(in) : UCA colllation key
 */
static UCA_COLL_CE_LIST *
get_ce_list_from_coll_key (const UCA_COLL_KEY * key)
{
  assert (key != NULL);

  if (key->type == COLL_KEY_TYPE_CP)
    {
      if (key->val.cp <= MAX_UCA_CODEPOINT)
    {
      return &(curr_uca.coll_cp[key->val.cp]);
    }
    }
  else if (key->type == COLL_KEY_TYPE_CONTR)
    {
      if (key->val.contr_id < curr_uca.count_contr)
    {
      return &(curr_uca.coll_contr[key->val.contr_id].ce);
    }
    }
  else
    {
      assert (key->type == COLL_KEY_TYPE_EXP);

      if (key->val.exp_id < curr_uca.count_exp)
    {
      return &(curr_uca.coll_exp[key->val.exp_id].ce);
    }
    }

  return NULL;
}

/*
 * create_opt_weights - Analyze the weight_key_list, compare collation
 *          elements fo weight keys based on locale settings,
 *          set the new weights for the keys, create the
 *          next_key array containing the relationship
 *          key -> next key in collation.
 * Returns: error status
 * lc(in/out) : contains the collation settings and optimization results.
 */
static int
create_opt_weights (LOCALE_COLLATION * lc)
{
  UCA_COLL_KEY *equal_key_list = NULL;
  int weight_index;
  int equal_key_count, i;
  unsigned int current_weight;
  int err_status = NO_ERROR;
  UCA_COLL_KEY *prev_key = NULL;
  UCA_COLL_KEY max_cp_key;
  UCA_COLL_CE_LIST *prev_ce_list = NULL;

  weight_index = 0;

  assert (lc->tail_coll.sett_max_cp <= MAX_UCA_CODEPOINT + 1 && lc->tail_coll.sett_max_cp > 0);

  equal_key_list = (UCA_COLL_KEY *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_KEY));
  if (equal_key_list == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  lc->opt_coll.weights = (unsigned int *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));
  if (lc->opt_coll.weights == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  lc->opt_coll.next_cp = (unsigned int *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));
  if (lc->opt_coll.next_cp == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  memset (lc->opt_coll.weights, 0xff, (MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));

  memset (lc->opt_coll.next_cp, 0xff, (MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));

  /* weights */
  current_weight = 0;
  for (weight_index = 0; weight_index <= MAX_UCA_WEIGHT; weight_index++)
    {
      UCA_W key_cursor;

      for (key_cursor = 0; key_cursor < weight_key_list[weight_index].list_count; key_cursor++)
    {
      UCA_COLL_CE_LIST *curr_ce_list = NULL;
      UCA_COLL_KEY *curr_key = &(weight_key_list[weight_index].key_list[key_cursor]);

      curr_ce_list = get_ce_list_from_coll_key (curr_key);
      if (curr_ce_list == NULL)
        {
          err_status = ER_LOC_GEN;
          LOG_LOCALE_ERROR ("Invalid collation element for key", ER_LOC_GEN, true);
          goto exit;
        }

      if ((prev_key != NULL) && (compare_ce_list (prev_ce_list, curr_ce_list, &(lc->tail_coll.uca_opt)) != 0))
        {
          /* keys compare differently */
          current_weight++;
        }

      if (curr_key->type == COLL_KEY_TYPE_CP)
        {
          assert (curr_key->val.cp >= 0 && curr_key->val.cp < lc->tail_coll.sett_max_cp);
          lc->opt_coll.weights[curr_key->val.cp] = current_weight;
        }
      else
        {
          assert (curr_key->type == COLL_KEY_TYPE_CONTR);

          err_status = add_opt_coll_contraction (lc, curr_key, current_weight, false);
          if (err_status != NO_ERROR)
        {
          goto exit;
        }
        }

      prev_key = curr_key;
      prev_ce_list = curr_ce_list;
    }
    }

  /* set 'next' values */
  equal_key_count = 0;
  for (weight_index = 0; weight_index <= MAX_UCA_WEIGHT; weight_index++)
    {
      UCA_W key_cursor;

      for (key_cursor = 0; key_cursor < weight_key_list[weight_index].list_count; key_cursor++)
    {
      UCA_COLL_CE_LIST *curr_ce_list = NULL;
      UCA_COLL_KEY *curr_key = &(weight_key_list[weight_index].key_list[key_cursor]);

      curr_ce_list = get_ce_list_from_coll_key (curr_key);
      if (curr_ce_list == NULL)
        {
          err_status = ER_LOC_GEN;
          LOG_LOCALE_ERROR ("Invalid collation element for key", ER_LOC_GEN, true);
          goto exit;
        }

      if ((prev_key != NULL) && (compare_ce_list (prev_ce_list, curr_ce_list, &(lc->tail_coll.uca_opt)) != 0))
        {
          /* keys compare differently */
          /* set next key for all previous equal keys */
          for (i = 0; i < equal_key_count; i++)
        {
          set_next_value_for_coll_key (lc, &(equal_key_list[i]), curr_key);
        }

          equal_key_count = 0;
        }

      memcpy (&(equal_key_list[equal_key_count++]), curr_key, sizeof (UCA_COLL_KEY));

      prev_key = curr_key;
      prev_ce_list = curr_ce_list;
    }
    }

  /* set 'next' for remaining collation key to max codepoint */
  make_coll_key (&max_cp_key, COLL_KEY_TYPE_CP, lc->tail_coll.sett_max_cp - 1);

  for (i = 0; i < equal_key_count; i++)
    {
      set_next_value_for_coll_key (lc, &(equal_key_list[i]), &max_cp_key);
    }

  lc->opt_coll.w_count = lc->tail_coll.sett_max_cp;

  for (i = 0; i < lc->opt_coll.w_count; i++)
    {
      if (lc->opt_coll.weights[i] == 0xffffffff || lc->opt_coll.next_cp[i] == 0xffffffff)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Internal error. Generated " "weight value or next CP value is invalid", ER_LOC_GEN, true);
      goto exit;
    }
    }

  /* optimize contractions */
  if (lc->opt_coll.count_contr > 0)
    {
      err_status = optimize_coll_contractions (lc);
    }

exit:
  if (equal_key_list != NULL)
    {
      free (equal_key_list);
      equal_key_list = NULL;
    }

  return err_status;
}

/*
 * optimize_coll_contractions - optimizes collation contractions list so that
 *                  contractions are stored in binary ascending
 *              order
 * Returns: error status
 *
 * lc(in/out) : contains the collation settings and optimization results
 */
static int
optimize_coll_contractions (LOCALE_COLLATION * lc)
{
  UCA_COLL_CONTR_ID *initial_coll_tag = NULL;
  int i;
  int err_status = NO_ERROR;
  int cp;

  assert (lc != NULL);
  assert (lc->opt_coll.count_contr > 0);

  initial_coll_tag = (UCA_COLL_CONTR_ID *) malloc (lc->opt_coll.count_contr * sizeof (UCA_COLL_CONTR_ID));
  if (initial_coll_tag == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  for (i = 0; i < lc->opt_coll.count_contr; i++)
    {
      memcpy (&(initial_coll_tag[i].contr_ref), &(lc->opt_coll.contr_list[i]), sizeof (COLL_CONTRACTION));
      initial_coll_tag[i].pos_id = i;
    }

  /* sort contractions (binary order) */
  qsort (initial_coll_tag, lc->opt_coll.count_contr, sizeof (UCA_COLL_CONTR_ID), comp_func_coll_contr_bin);

  /* adjust 'next' contractions values for all codepoints */
  for (cp = 0; cp < lc->opt_coll.w_count; cp++)
    {
      unsigned int next_seq = lc->opt_coll.next_cp[cp];
      int curr_idx = -1;
      int opt_idx;
      bool found = false;

      if (!INTL_IS_NEXT_CONTR (next_seq))
    {
      continue;
    }

      curr_idx = INTL_GET_NEXT_CONTR_ID (next_seq);

      assert (curr_idx < lc->opt_coll.count_contr);

      /* find index in sorted contractions */
      for (opt_idx = 0; opt_idx < lc->opt_coll.count_contr; opt_idx++)
    {
      if (initial_coll_tag[opt_idx].pos_id == curr_idx)
        {
          found = true;
          break;
        }
    }

      if (!found)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Internal error. Cannot adjust " "contraction id after optimization", ER_LOC_GEN, true);
      goto exit;
    }

      assert (found == true);

      lc->opt_coll.next_cp[cp] = opt_idx | INTL_MASK_CONTR;
    }

  /* adjust 'next' contractions values for all contractions */
  for (i = 0; i < lc->opt_coll.count_contr; i++)
    {
      unsigned int next_seq = initial_coll_tag[i].contr_ref.next;
      int curr_idx = -1;
      int opt_idx;
      bool found = false;

      if (!INTL_IS_NEXT_CONTR (next_seq))
    {
      continue;
    }

      curr_idx = INTL_GET_NEXT_CONTR_ID (next_seq);

      assert (curr_idx < lc->opt_coll.count_contr);

      /* find index in sorted contractions */
      for (opt_idx = 0; opt_idx < lc->opt_coll.count_contr; opt_idx++)
    {
      if (initial_coll_tag[opt_idx].pos_id == curr_idx)
        {
          found = true;
          break;
        }
    }

      if (!found)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Internal error. Cannot adjust " "contraction id after optimization", ER_LOC_GEN, true);
      goto exit;
    }

      assert (found == true);

      initial_coll_tag[i].contr_ref.next = opt_idx | INTL_MASK_CONTR;
    }

  /* overwrite contractions in sorted order */
  for (i = 0; i < lc->opt_coll.count_contr; i++)
    {
      memcpy (&(lc->opt_coll.contr_list[i]), &(initial_coll_tag[i].contr_ref), sizeof (COLL_CONTRACTION));
    }

  /* first contraction index array */
  lc->opt_coll.cp_first_contr_array = (int *) malloc (lc->opt_coll.w_count * sizeof (int));
  if (lc->opt_coll.cp_first_contr_array == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  for (cp = 0; cp < lc->opt_coll.w_count; cp++)
    {
      lc->opt_coll.cp_first_contr_array[cp] = -1;
    }

  lc->opt_coll.contr_min_size = (lc->opt_coll.count_contr > 0) ? LOC_MAX_UCA_CHARS_SEQ * INTL_UTF8_MAX_CHAR_SIZE : 0;

  for (i = 0; i < lc->opt_coll.count_contr; i++)
    {
      unsigned char *c_buf = (unsigned char *) lc->opt_coll.contr_list[i].c_buf;
      unsigned char *dummy;
      int c_buf_size = strlen ((char *) c_buf);
      unsigned int cp;

      assert (c_buf_size < LOC_MAX_UCA_CHARS_SEQ * INTL_UTF8_MAX_CHAR_SIZE);
      lc->opt_coll.contr_list[i].size = (unsigned char) c_buf_size;

      lc->opt_coll.contr_min_size = MIN (lc->opt_coll.contr_min_size, c_buf_size);

      /* get first code-point */
      cp = intl_utf8_to_cp (c_buf, c_buf_size, &dummy);

      if (cp < (unsigned int) lc->opt_coll.w_count && lc->opt_coll.cp_first_contr_array[cp] == -1)
    {
      lc->opt_coll.cp_first_contr_array[cp] = i;
    }
    }

  /* compute interval of codepoints with contractions */
  lc->opt_coll.cp_first_contr_offset = -1;
  for (cp = 0; cp < lc->opt_coll.w_count; cp++)
    {
      if (lc->opt_coll.cp_first_contr_array[cp] != -1)
    {
      lc->opt_coll.cp_first_contr_offset = cp;
      break;
    }
    }

  assert (lc->opt_coll.cp_first_contr_offset < MAX_UNICODE_CHARS);

  for (cp = lc->opt_coll.w_count - 1; cp >= 0; cp--)
    {
      if (lc->opt_coll.cp_first_contr_array[cp] != -1)
    {
      lc->opt_coll.cp_first_contr_count = cp - lc->opt_coll.cp_first_contr_offset + 1;
      break;
    }
    }

  assert ((int) lc->opt_coll.cp_first_contr_count < lc->opt_coll.w_count);

  if (lc->opt_coll.cp_first_contr_offset > 0)
    {
      for (i = 0; i < (int) lc->opt_coll.cp_first_contr_count; i++)
    {
      lc->opt_coll.cp_first_contr_array[i] =
        lc->opt_coll.cp_first_contr_array[i + lc->opt_coll.cp_first_contr_offset];
    }
    }

exit:
  if (initial_coll_tag != NULL)
    {
      free (initial_coll_tag);
      initial_coll_tag = NULL;
    }

  return err_status;
}

/*
 * set_next_value_for_coll_key - sets the next collation key
 * Returns: error status
 *
 * lc(in/out) : contains the collation settings and optimization results
 * coll_key(in) : collation key
 * next_key(in) : next key value to set
 */
static int
set_next_value_for_coll_key (LOCALE_COLLATION * lc, const UCA_COLL_KEY * coll_key, const UCA_COLL_KEY * next_key)
{
  /* set next key for all previous equal keys */
  if (next_key->type == COLL_KEY_TYPE_CP)
    {
      if (coll_key->type == COLL_KEY_TYPE_CP)
    {
      lc->opt_coll.next_cp[coll_key->val.cp] = next_key->val.cp;
    }
      else
    {
      assert (coll_key->type == COLL_KEY_TYPE_CONTR);
      assert (coll_key->val.contr_id < lc->opt_coll.count_contr);
      lc->opt_coll.contr_list[coll_key->val.contr_id].next = next_key->val.cp;
    }
    }
  else
    {
      assert (next_key->type == COLL_KEY_TYPE_CONTR);
      assert (next_key->val.contr_id < lc->opt_coll.count_contr);

      if (coll_key->type == COLL_KEY_TYPE_CP)
    {
      lc->opt_coll.next_cp[coll_key->val.cp] = next_key->val.contr_id | INTL_MASK_CONTR;
    }
      else
    {
      assert (coll_key->type == COLL_KEY_TYPE_CONTR);
      assert (coll_key->val.contr_id < lc->opt_coll.count_contr);

      lc->opt_coll.contr_list[coll_key->val.contr_id].next = next_key->val.contr_id | INTL_MASK_CONTR;
    }
    }

  return NO_ERROR;
}

/*
 * add_opt_coll_contraction - adds an contraction item to optimized collation
 * Returns: error status
 *
 * lc(in/out) : contains the collation settings and optimization results
 * contr_key(in) : collation key
 * wv(in) : weight value to assign for optimized contraction
 * use_expansions(in) :
 */
static int
add_opt_coll_contraction (LOCALE_COLLATION * lc, const UCA_COLL_KEY * contr_key, const unsigned int wv,
              bool use_expansions)
{
  COLL_CONTRACTION *opt_contr = NULL;
  UCA_CONTRACTION *uca_contr;
  char *p_buf = NULL;
  int err_status = NO_ERROR;
  int i;

  assert (contr_key != NULL);
  assert (contr_key->type == COLL_KEY_TYPE_CONTR);

  assert (contr_key->val.contr_id < curr_uca.count_contr);
  uca_contr = &(curr_uca.coll_contr[contr_key->val.contr_id]);

  lc->opt_coll.contr_list =
    (COLL_CONTRACTION *) realloc (lc->opt_coll.contr_list, (lc->opt_coll.count_contr + 1) * sizeof (COLL_CONTRACTION));

  if (lc->opt_coll.contr_list == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  opt_contr = &(lc->opt_coll.contr_list[lc->opt_coll.count_contr++]);
  memset (opt_contr, 0, sizeof (COLL_CONTRACTION));

  p_buf = opt_contr->c_buf;

  assert (uca_contr->cp_count > 1);

  for (i = 0; i < uca_contr->cp_count; i++)
    {
      int utf8_size = intl_cp_to_utf8 (uca_contr->cp_list[i],
                       (unsigned char *) p_buf);
      p_buf += utf8_size;
    }
  opt_contr->cp_count = uca_contr->cp_count;
  *p_buf = '\0';

  opt_contr->wv = wv;
  assert (p_buf - opt_contr->c_buf < (int) sizeof (opt_contr->c_buf));
  opt_contr->size = (unsigned char) (p_buf - opt_contr->c_buf);

  if (use_expansions)
    {
      UCA_COLL_CE_LIST *ce_list;

      memset (opt_contr->uca_w_l13, 0, sizeof (opt_contr->uca_w_l13));
      if (lc->tail_coll.uca_opt.sett_strength >= TAILOR_QUATERNARY)
    {
      memset (opt_contr->uca_w_l4, 0, sizeof (opt_contr->uca_w_l4));
    }

      ce_list = get_ce_list_from_coll_key (contr_key);

      assert (ce_list != NULL);
      opt_contr->uca_num = ce_list->num;

      assert (opt_contr->uca_num > 0);

      build_compressed_uca_w_l13 (ce_list, opt_contr->uca_w_l13);
      if (lc->tail_coll.uca_opt.sett_strength >= TAILOR_QUATERNARY)
    {
      for (i = 0; i < MAX_UCA_EXP_CE; i++)
        {
          opt_contr->uca_w_l4[i] = GET_UCA_WEIGHT (ce_list, i, 3);
        }
    }
    }

exit:
  return err_status;
}

/*
 * uca_free_data - Unloads data used by UCA
 * Returns:
 */
void
uca_free_data (void)
{
  if (ducet.coll_cp != NULL)
    {
      free (ducet.coll_cp);
      ducet.coll_cp = NULL;
    }

  if (ducet.coll_contr)
    {
      free (ducet.coll_contr);
      ducet.coll_contr = NULL;
    }

  assert (ducet.coll_exp == NULL);

  ducet.max_contr = 0;
  ducet.count_contr = 0;

  *(ducet.prev_file_path) = '0';
}

/*
 * apply_tailoring_rule_cp - Calls the functions which apply the rules
 *               based on their type.
 * Returns: ER_LOC_GEN if cannot apply rule, or the rule has an invald type;
 *      ER_OUT_OF_VIRTUAL_MEMORY if moving the tailored codepoint inside
 *      the weight stats array fails;
 *      NO_ERROR if tailoring is successful.
 * dir(in) : 0 = after, 1 = before.
 * anchor_key(in) : the anchor for which the rule is applied.
 * key(in) : key to be tailored.
 * ref_key(in) : the key previously tailored (or anchor if this is the
 *      first rule having "anchor" as the anchor key).
 * lvl(in) : weight level used for tailoring (see T_LEVEL for values).
 *
 * Note : Alter the collation elements of the collated element in order for
 *    the keys to comply with the specified rule after collation and
 *    optimizations.
 *    If the rule is a "before" rule, checks if it can be applied.
 */
static int
apply_tailoring_rule (TAILOR_DIR dir, UCA_COLL_KEY * anchor_key, UCA_COLL_KEY * key, UCA_COLL_KEY * ref_key,
              T_LEVEL lvl)
{
  if (lvl == TAILOR_IDENTITY)
    {
      return apply_tailoring_rule_identity (key, ref_key);
    }

  return apply_tailoring_rule_w_dir (dir, anchor_key, key, ref_key, lvl);
}

/*
 * apply_tailoring_rule_identity - Apply an identity tailoring rule.
 *
 * Returns: error status
 * key(in)     : collation key to be tailored.
 * ref_key(in) : collation key to which we tailor key as identical.
 */
static int
apply_tailoring_rule_identity (UCA_COLL_KEY * key, UCA_COLL_KEY * ref_key)
{
  UCA_W current_weight;
  int err_status = NO_ERROR;
  UCA_COLL_CE_LIST *ce_list_key = NULL;
  UCA_COLL_CE_LIST *ce_list_ref_key = NULL;

  assert (key != NULL);
  assert (ref_key != NULL);

  ce_list_key = get_ce_list_from_coll_key (key);
  ce_list_ref_key = get_ce_list_from_coll_key (ref_key);

  if (ce_list_key == NULL || ce_list_ref_key == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Cannot apply identity rule. Collation key not found", ER_LOC_GEN, true);
      goto exit;
    }

  /* Make sure the reference is represented in DUCET. */
  assert (ce_list_ref_key->num > 0);

  current_weight = GET_UCA_WEIGHT (ce_list_key, 0, 0);
  if (current_weight != GET_UCA_WEIGHT (ce_list_ref_key, 0, 0))
    {
      err_status = change_key_weight_list (key, current_weight, GET_UCA_WEIGHT (ce_list_ref_key, 0, 0));
      if (err_status != NO_ERROR)
    {
      goto exit;
    }
    }

  assert (ce_list_key != ce_list_ref_key);
  memcpy (ce_list_key, ce_list_ref_key, sizeof (UCA_COLL_CE_LIST));

exit:
  return err_status;
}

/*
 * apply_tailoring_rule_w_dir - Applies an After/Before -rule
 * Returns: error status
 *
 * dir(in)   : direction : 0 after, 1 before
 * anchor_key(in): the anchor for which the rule is applied.
 * key(in)   : key to be tailored.
 * ref_key(in)   : the key previously tailored (or anchor if this is the
 *         first rule having "anchor" as the anchor key).
 * lvl(in) : weight level used for tailoring (see T_LEVEL for values).
 *
 * Note : Alter the collation elements of the collated element in order for
 *    the symbols to comply with the specified rule after collation and
 *    optimizations.
 *    Collisions are avoided.
 */
static int
apply_tailoring_rule_w_dir (TAILOR_DIR dir, UCA_COLL_KEY * anchor_key, UCA_COLL_KEY * key, UCA_COLL_KEY * ref_key,
                T_LEVEL lvl)
{
  int i, j;
  bool collation_finished, overflow = false;
  UCA_W current_weight;
  UCA_COLL_CE_LIST new_ce;
  int err_status = NO_ERROR;
  UCA_COLL_CE_LIST *ce_list_key = NULL;
  UCA_COLL_CE_LIST *ce_list_anchor_key = NULL;
  UCA_COLL_CE_LIST *ce_list_ref_key = NULL;

  assert (key != NULL);
  assert (ref_key != NULL);

  ce_list_key = get_ce_list_from_coll_key (key);
  ce_list_ref_key = get_ce_list_from_coll_key (ref_key);
  ce_list_anchor_key = get_ce_list_from_coll_key (anchor_key);

  if (ce_list_key == NULL || ce_list_ref_key == NULL || ce_list_anchor_key == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Cannot apply identity rule. Collation key not found", ER_LOC_GEN, true);
      goto exit;
    }

  /* Make sure the anchor and ref codepoint are represented in DUCET. */
  assert (ce_list_anchor_key->num > 0);
  assert (ce_list_ref_key->num > 0);

  /* Clone anchor weights (uca element full clone) */
  memcpy (&new_ce, ce_list_anchor_key, sizeof (UCA_COLL_CE_LIST));

  new_ce.num = MAX (new_ce.num, ce_list_ref_key->num);

  /* overwrite with reference weights up to level */
  for (j = 0; j < (int) lvl; j++)
    {
      for (i = 0; i < (int) (new_ce.num); i++)
    {
      SET_UCA_WEIGHT (&(new_ce), i, j, GET_UCA_WEIGHT (ce_list_ref_key, i, j));
    }
    }

  collation_finished = false;

  while (!collation_finished)
    {
      if (new_ce.num > MAX_UCA_EXP_CE)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Error when applying after-rule. " "Collation element list overflow.", ER_LOC_GEN, true);
      goto exit;
    }

      assert (new_ce.num >= 1);
      assert (lvl >= 1);

      if (dir == TAILOR_AFTER)
    {
      UCA_W w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
      /* AFTER */
      overflow = false;

      /* Try increasing the weight value on the last collation element. */
      if (w_val >= MAX_UCA_WEIGHT)
        {
          new_ce.num++;
          overflow = true;
        }

      w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
      SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, w_val + 1);
    }
      else
    {
      UCA_W w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
      /* BEFORE */
      assert (dir == TAILOR_BEFORE);

      /* Try decreasing the weight value on the last collation element. */
      if (w_val > 0)
        {
          SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, w_val - 1);
        }
      else
        {
          int ce_index = (int) (new_ce.num);

          while (ce_index > 0 && GET_UCA_WEIGHT (&new_ce, ce_index - 1, lvl - 1) == 0)
        {
          ce_index--;
        }

          if (ce_index <= 0)
        {
          err_status = ER_LOC_GEN;
          LOG_LOCALE_ERROR ("Applying before-rule. Collation element" " list underflow.", ER_LOC_GEN, true);
          goto exit;
        }
          SET_UCA_WEIGHT (&new_ce, ce_index - 1, lvl - 1, MAX_UCA_WEIGHT);
        }
    }

      if (get_key_with_ce_sublist (&new_ce, lvl) != NULL)
    {
      /* If a collision occurs, go further into the collation element list */

      if (dir == TAILOR_AFTER)
        {
          /* AFTER */
          if (!overflow)
        {
          UCA_W w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
          /* Revert weight increase */
          SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, w_val - 1);
        }
          /* Add a new collation element. */
          new_ce.num++;
        }
      else
        {
          /* BEFORE */
          assert (dir == TAILOR_BEFORE);

          /* Add a new collation element. */
          new_ce.num++;
          /* Maximize the weight on the new collation element. */
          SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, MAX_UCA_WEIGHT);
        }
    }
      else
    {
      collation_finished = true;
    }
    }

  current_weight = GET_UCA_WEIGHT (ce_list_key, 0, 0);
  if (GET_UCA_WEIGHT (&new_ce, 0, 0) != current_weight)
    {
      err_status = change_key_weight_list (key, current_weight, GET_UCA_WEIGHT (&new_ce, 0, 0));
      if (err_status != NO_ERROR)
    {
      goto exit;
    }
    }

  memcpy (ce_list_key, &new_ce, sizeof (UCA_COLL_CE_LIST));

exit:
  return err_status;
}

/*
 * get_key_with_ce_sublist - Tests if a collation key's collation element list
 *               matches uca_item up to lvl(level) and
 *               uca_item.num.
 * Returns  : collation key if found, or
 *        NULL if no partial match was found.
 * uca_item(in) : an object of type UCA_COLL_CE_LIST, containing the list of
 *        collation elements to search for.
 * lvl(in) : the level up to which the matching should be done.
 */
static UCA_COLL_KEY *
get_key_with_ce_sublist (UCA_COLL_CE_LIST * uca_item, const int lvl)
{
  UCA_W weight_index;
  int i, ce_index, level_index;
  bool found;

  assert (uca_item != NULL);

  weight_index = GET_UCA_WEIGHT (uca_item, 0, 0);

  if (weight_key_list[weight_index].list_count == 0)
    {
      return NULL;
    }

  for (i = 0; i < weight_key_list[weight_index].list_count; i++)
    {
      UCA_COLL_KEY *key = &(weight_key_list[weight_index].key_list[i]);
      UCA_COLL_CE_LIST *ce_list = get_ce_list_from_coll_key (key);

      if (ce_list == NULL)
    {
      continue;
    }

      found = true;
      for (level_index = 0; level_index < lvl && found; level_index++)
    {
      for (ce_index = 0; ce_index < uca_item->num; ce_index++)
        {
          if (GET_UCA_WEIGHT (ce_list, ce_index, level_index) != GET_UCA_WEIGHT (uca_item, ce_index, level_index))
        {
          found = false;
          break;
        }
        }
    }

      if (found)
    {
      return key;
    }
    }

  return NULL;
}

/*
 * make_coll_key - creates a collation key
 * Returns :
 * key(in/out) : key to create
 * type(in) : key type
 * key_id(in) : collation key identifier (codepoint or cotraction key)
 */
static void
make_coll_key (UCA_COLL_KEY * key, UCA_COLL_KEY_TYPE type, const int key_id)
{
  assert (key != NULL);

  assert (key_id >= 0);

  memset (key, 0, sizeof (UCA_COLL_KEY));

  if (type == COLL_KEY_TYPE_CP)
    {
      key->type = COLL_KEY_TYPE_CP;
      key->val.cp = key_id;
    }
  else if (type == COLL_KEY_TYPE_CONTR)
    {
      key->type = COLL_KEY_TYPE_CONTR;
      key->val.contr_id = key_id;
    }
  else
    {
      assert (type == COLL_KEY_TYPE_EXP);

      key->type = COLL_KEY_TYPE_EXP;
      key->val.exp_id = key_id;
    }
}

/*
 * find_contr_id - searches for a contraction
 * Returns : id of contraction or -1 if not found
 * cp_array(in) : codepoints array to create key
 * cp_count(in) : # of codepoints in array
 * st(in) : storage to check
 */
static int
find_contr_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st)
{
  bool found;
  int i;

  assert (cp_array != NULL);
  assert (cp_count > 0);
  assert (st != NULL);

  if (cp_count > LOC_MAX_UCA_CHARS_SEQ)
    {
      return -1;
    }

  found = false;
  for (i = 0; i < st->count_contr; i++)
    {
      int j;
      UCA_CP *st_cp_list;

      if (cp_count != st->coll_contr[i].cp_count)
    {
      continue;
    }

      st_cp_list = st->coll_contr[i].cp_list;

      found = true;
      for (j = 0; j < cp_count; j++)
    {
      assert (cp_array[j] < MAX_UNICODE_CHARS);

      if (cp_array[j] != (unsigned int) (st_cp_list[j]))
        {
          found = false;
          break;
        }
    }

      if (found)
    {
      return i;
    }
    }

  return -1;
}

/*
 * find_exp_id - searches for an expansion
 * Returns : id of expansion or -1 if not found
 * cp_array(in) : codepoints array to create key
 * cp_count(in) : # of codepoints in array
 * st(in) : storage to check
 */
static int
find_exp_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st)
{
  bool found;
  int i;

  assert (cp_array != NULL);
  assert (cp_count > 0);
  assert (st != NULL);

  if (cp_count > LOC_MAX_UCA_CHARS_SEQ)
    {
      return -1;
    }

  found = false;
  for (i = 0; i < st->count_exp; i++)
    {
      int j;
      UCA_CP *st_cp_list;

      if (cp_count != st->coll_exp[i].cp_count)
    {
      continue;
    }

      st_cp_list = st->coll_exp[i].cp_list;

      found = true;
      for (j = 0; j < cp_count; j++)
    {
      assert (cp_array[j] < MAX_UNICODE_CHARS);

      if (cp_array[j] != (unsigned int) (st_cp_list[j]))
        {
          found = false;
          break;
        }
    }

      if (found)
    {
      return i;
    }
    }

  return -1;
}

/*
 * string_to_coll_ce_list - Parse a string to a collation element list
 * Retuns : NO_ERROR(0) if parsing is successful;
 *      ER_LOC_GEN if parsing fails.
 * s (in) : NULL terminated string to parse.
 * ui (in/out) : parsed collation element list.
 */
static int
string_to_coll_ce_list (char *s, UCA_COLL_CE_LIST * ui)
{
  UCA_COLL_CE_LIST uca_item;
  int weight_index, ce_index, state;
  UCA_W weight;
  bool error_found;
  char *str;
  char *end_ptr;
  char c;
  int err_status = NO_ERROR;

  memset (&uca_item, 0, sizeof (UCA_COLL_CE_LIST));

  str = s;
  weight_index = 0;
  ce_index = 0;
  state = 0;
  error_found = false;
  while (*str && !error_found)
    {
      int result = 0;
      int val;

      switch (state)
    {
    case 0:     /* read a '[' (first char from the standard string representation * of a collation
                 * element) */
      c = str[0];
      if (c != '[')
        {
          error_found = true;
          break;
        }
      str++;
      state = 1;
      break;
    case 1:     /* read a weight value in string format, in hex */
      if (weight_index == MAX_WEIGHT_LEVELS)
        {
          state = 3;
          break;
        }
      /* validate weight, to be below 0xFFFF */
      result = str_to_int32 (&val, &end_ptr, str, 16);
      if (result != 0 || val > MAX_UCA_WEIGHT)
        {
          error_found = true;
          break;
        }
      weight = (UCA_W) val;
      SET_UCA_WEIGHT (&uca_item, ce_index, weight_index, weight);
      str = end_ptr;
      if (weight_index != 3)
        {
          state = 2;
        }
      else
        {
          state = 3;
        }
      weight_index++;
      break;
    case 2:     /* Read a dot '.' = the weight separator inside the * string representation of a
                 * collation element */
      c = str[0];
      if (c != '.')
        {
          error_found = true;
          break;
        }
      str++;
      state = 1;
      break;
    case 3:     /* Read a ']' (last char from the standard string representation * of a collation
                 * element) */
      c = str[0];
      if (c != ']')
        {
          error_found = true;
          break;
        }
      str++;
      state = 0;
      weight_index = 0;
      ce_index++;
      break;
    }
    }

  if (!error_found)
    {
      uca_item.num = ce_index;
      memcpy (ui, &uca_item, sizeof (UCA_COLL_CE_LIST));
    }

  if (error_found)
    {
      switch (state)
    {
    case 0:
      LOG_LOCALE_ERROR ("Invalid collation element list. '[' expected.", ER_LOC_GEN, true);
      break;
    case 1:
      LOG_LOCALE_ERROR ("Invalid collation element list." "Weight out of 0x0000 - 0xFFFF range.", ER_LOC_GEN, true);
      break;
    case 2:
      LOG_LOCALE_ERROR ("Invalid collation element list. '.' expected.", ER_LOC_GEN, true);
      break;
    case 3:
      LOG_LOCALE_ERROR ("Invalid collation element list. ']' expected.", ER_LOC_GEN, true);
      break;
    }
      err_status = ER_LOC_GEN;
    }

  return err_status;
}

/*
 * apply_absolute_tailoring_rules - Function for applying the tailor
 *                  rules strored in lc->tail_coll.cub_rules.
 * Retuns : ER_LOC_GEN if an invalid rule is found;
 *      NO_ERROR(0) if parsing is successful.
 * lc (in/out) : locale settings and optimization results.
 */
static int
apply_absolute_tailoring_rules (LOCALE_COLLATION * lc)
{
  int rule_index, weight_index;
  int ce_index;
  UCA_COLL_CE_LIST weight;
  UCA_COLL_CE_LIST step;
  UCA_COLL_CE_LIST weight_offset;
  UCA_CP cp_index;
  bool is_overflow;
  bool is_ce_empty;
  UCA_CP start_cp;
  UCA_CP end_cp;
  int err_status = NO_ERROR;
  CUBRID_TAILOR_RULE *ct_rule;
  UCA_COLL_CE_LIST *uca_cp = curr_uca.coll_cp;

  for (rule_index = 0; rule_index < lc->tail_coll.cub_count_rules; rule_index++)
    {
      ct_rule = &(lc->tail_coll.cub_rules[rule_index]);
      if (strlen (ct_rule->step) == 0)
    {
      strcpy (ct_rule->step, "[0001.0000.0000.0000]\0");
    }
      if (string_to_coll_ce_list (ct_rule->step, &step) != 0)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Invalid collation element list for range step.", ER_LOC_GEN, true);
      goto exit;
    }
      if (string_to_coll_ce_list (ct_rule->start_weight, &weight) != 0)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Invalid collation element list for starting weight.", ER_LOC_GEN, true);
      goto exit;
    }

      /* Parse the char buffers for start and end codepoint */
      if (read_cp_from_tag ((unsigned char *) (ct_rule->start_cp_buf), ct_rule->start_cp_buf_type, &start_cp) !=
      NO_ERROR)
    {
      goto exit;
    }
      if (read_cp_from_tag ((unsigned char *) (ct_rule->end_cp_buf), ct_rule->end_cp_buf_type, &end_cp) != NO_ERROR)
    {
      goto exit;
    }

      /* Validate starting weight, step and the number of codepoints in the range to be tailored, so that there are no
       * overflows above MAX_WEIGHT (0xFFFF). */
      is_overflow = false;
      for (weight_index = 0; weight_index < MAX_WEIGHT_LEVELS; weight_index++)
    {
      for (ce_index = 0; ce_index < step.num; ce_index++)
        {
          if (GET_UCA_WEIGHT (&step, ce_index, weight_index) * (end_cp - start_cp) > MAX_UCA_WEIGHT)
        {
          is_overflow = true;
          break;
        }
        }
      if (is_overflow)
        {
          break;
        }
    }

      if (is_overflow)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("Weight range overflow" "Weight or step too big.", ER_LOC_GEN, true);
      goto exit;
    }

      memcpy (&weight_offset, &weight, sizeof (UCA_COLL_CE_LIST));
      weight_offset.num = (weight.num > step.num) ? weight.num : step.num;

      for (cp_index = start_cp; cp_index <= end_cp; cp_index++)
    {
      memcpy (&uca_cp[cp_index], &weight_offset, sizeof (UCA_COLL_CE_LIST));
      for (weight_index = 0; weight_index < MAX_WEIGHT_LEVELS; weight_index++)
        {
          for (ce_index = 0; ce_index < weight_offset.num; ce_index++)
        {
          SET_UCA_WEIGHT (&weight_offset, ce_index, weight_index,
                  GET_UCA_WEIGHT (&step, ce_index, weight_index));
        }
        }
      /* Remove any collation elements with all weight values zero. */
      is_ce_empty = true;
      while (is_ce_empty)
        {
          for (weight_index = 0; weight_index < MAX_WEIGHT_LEVELS; weight_index++)
        {
          if (GET_UCA_WEIGHT (&(uca_cp[cp_index]), uca_cp[cp_index].num - 1, weight_index) != 0)
            {
              is_ce_empty = false;
            }
        }
          if (is_ce_empty && uca_cp[cp_index].num > 1)
        {
          uca_cp[cp_index].num--;
        }
          if (uca_cp[cp_index].num == 1)
        {
          break;
        }
        }
    }
    }

exit:
  return err_status;
}

/*
 * add_key_to_weight_stats_list - Adds a collation key to the list of
 *      keys having the selected weight inside the first
 *      collation element on level 1
 * Returns : error code
 * key(in): the key to add in a list.
 * wv(in) : the weight value corresponding to the list where the key will be
 *      added.
 */
static int
add_key_to_weight_stats_list (const UCA_COLL_KEY * key, UCA_W wv)
{
  int err_status = NO_ERROR;

  assert (key != NULL);

  if ((weight_key_list[wv].key_list =
       (UCA_COLL_KEY *) realloc (weight_key_list[wv].key_list,
                 (weight_key_list[wv].list_count + 1) * sizeof (UCA_COLL_KEY))) == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }
  memcpy (&(weight_key_list[wv].key_list[weight_key_list[wv].list_count]), key, sizeof (UCA_COLL_KEY));
  weight_key_list[wv].list_count++;

exit:
  return err_status;
}

/*
 * remove_key_from_weight_stats_list - Remove a collation key from the list of
 *      keys having a selected weight inside the first
 *      collation element on level 1
 * Returns : error status
 * cp(in)       : the codepoint to remove from the list.
 * weight_index(in) : the identifying weight value for the list where
 *            the codepoint will be removed from.
 */
static int
remove_key_from_weight_stats_list (const UCA_COLL_KEY * key, UCA_W wv)
{
  int found_at, i;
  int err_status = NO_ERROR;

  if (weight_key_list[wv].list_count == 0)
    {
      goto exit;
    }
  else if (weight_key_list[wv].list_count == 1)
    {
      if (memcmp (&(weight_key_list[wv].key_list[0]), key, sizeof (UCA_COLL_KEY)) == 0)
    {
      weight_key_list[wv].list_count = 0;
      memset (&(weight_key_list[wv].key_list[0]), 0, sizeof (UCA_COLL_KEY));
      goto exit;
    }
    }

  assert (weight_key_list[wv].list_count > 1);

  found_at = -1;
  for (i = 0; i < weight_key_list[wv].list_count; i++)
    {
      if (memcmp (&(weight_key_list[wv].key_list[i]), key, sizeof (UCA_COLL_KEY)) == 0)
    {
      found_at = i;
      break;
    }
    }

  assert (found_at != -1);

  for (i = found_at; i < weight_key_list[wv].list_count - 1; i++)
    {
      memcpy (&(weight_key_list[wv].key_list[i]), &(weight_key_list[wv].key_list[i + 1]), sizeof (UCA_COLL_KEY));
    }

  weight_key_list[wv].list_count--;

  /* zero last element */
  i = weight_key_list[wv].list_count;
  memset (&(weight_key_list[wv].key_list[i]), 0, sizeof (UCA_COLL_KEY));

exit:
  return err_status;
}

/*
 * change_key_weight_list - Removes a collation key from the list of
 *      keys having a selected weight inside the first
 *      collation element on level 1, and moving it to another
 *      simmilar list.
 * Returns : error status
 * key(in) : the key to move
 * w_from(in) : the identifying weight value for the list from where
 *      the key will be removed.
 * w_to(in) :  the identifying weight value for the list where the key will be
 *         added.
 */
static int
change_key_weight_list (const UCA_COLL_KEY * key, UCA_W w_from, UCA_W w_to)
{
  int err_status = NO_ERROR;

  assert (key != NULL);

  err_status = remove_key_from_weight_stats_list (key, w_from);
  if (err_status != NO_ERROR)
    {
      goto exit;
    }
  err_status = add_key_to_weight_stats_list (key, w_to);

exit:
  return err_status;
}

/*
 * new_contraction - creates new UCA contraction element
 * Returns : pointer to newly created contraction, or NULL if memory cannot be
 *       allocated
 *
 * storage(in/out)   : storage for UCA contraction
 *
 */
static UCA_CONTRACTION *
new_contraction (UCA_STORAGE * storage)
{
  UCA_CONTRACTION *contr = NULL;

  assert (storage != NULL);

  if (storage->count_contr >= storage->max_contr)
    {
      storage->coll_contr =
    (UCA_CONTRACTION *) realloc (storage->coll_contr,
                     sizeof (UCA_CONTRACTION) * (storage->max_contr + UCA_CONTR_EXP_CNT_GROW));

      if (storage->coll_contr == NULL)
    {
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      return NULL;
    }

      storage->max_contr += UCA_CONTR_EXP_CNT_GROW;
    }

  assert (storage->coll_contr != NULL);

  contr = &(storage->coll_contr[storage->count_contr++]);
  memset (contr, 0, sizeof (UCA_CONTRACTION));

  return contr;
}

/*
 * new_expansion - creates new UCA expansion element
 * Returns : pointer to newly created expansion, or NULL if memory cannot be
 *       allocated
 *
 * storage(in/out)   : storage for UCA expansion
 *
 */
static UCA_EXPANSION *
new_expansion (UCA_STORAGE * storage)
{
  UCA_EXPANSION *exp = NULL;

  assert (storage != NULL);

  if (storage->count_exp >= storage->max_exp)
    {
      storage->coll_exp =
    (UCA_EXPANSION *) realloc (storage->coll_exp,
                   sizeof (UCA_EXPANSION) * (storage->max_exp + UCA_CONTR_EXP_CNT_GROW));

      if (storage->coll_exp == NULL)
    {
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      return NULL;
    }

      storage->max_exp += UCA_CONTR_EXP_CNT_GROW;
    }

  assert (storage->coll_exp != NULL);

  exp = &(storage->coll_exp[storage->count_exp++]);
  memset (exp, 0, sizeof (UCA_EXPANSION));

  return exp;
}

/*
 * add_uca_contr_or_exp - creates an UCA contraction or expansion from
 *            codepoints
 * Returns : id of uca contraction/expansion or -1 if not be created
 *
 * lc(in): locale data
 * storage(in/out): storage to create in
 * cp_array(in): list of codepoints
 * cp_count(in): number of codepoints
 *
 */
static int
add_uca_contr_or_exp (LOCALE_COLLATION * lc, UCA_STORAGE * storage, const unsigned int *cp_array, const int cp_count,
              const UCA_COLL_KEY_TYPE seq_type)
{
  UCA_CHR_SEQ *uca_seq = NULL;
  UCA_COLL_KEY contr_key;
  int ce_count;
  int i;

  assert (storage != NULL);
  assert (cp_array != NULL);
  assert (cp_count > 1);
  assert (cp_count <= LOC_MAX_UCA_CHARS_SEQ);

  if (seq_type == COLL_KEY_TYPE_CONTR)
    {
      uca_seq = new_contraction (storage);
    }
  else
    {
      assert (seq_type == COLL_KEY_TYPE_EXP);
      uca_seq = new_expansion (storage);
    }

  if (uca_seq == NULL)
    {
      return -1;
    }

  uca_seq->cp_count = cp_count;
  for (i = 0; i < cp_count; i++)
    {
      if ((int) cp_array[i] >= lc->tail_coll.sett_max_cp)
    {
      LOG_LOCALE_ERROR ("Codepoint value in sequence exceeds " "maximum allowed codepoint", ER_LOC_GEN, true);
      return -1;
    }

      uca_seq->cp_list[i] = (UCA_CP) cp_array[i];
    }

  /* build collation element list from CE of codepoints */
  ce_count = 0;
  for (i = 0; i < cp_count; i++)
    {
      UCA_COLL_KEY key_cp;
      UCA_COLL_CE_LIST *cp_ce_list;
      int lev;
      int ce_index;

      make_coll_key (&key_cp, COLL_KEY_TYPE_CP, cp_array[i]);

      cp_ce_list = get_ce_list_from_coll_key (&key_cp);
      if (cp_ce_list == NULL)
    {
      LOG_LOCALE_ERROR ("Cannot find CE list for key", ER_LOC_GEN, true);
      return -1;
    }

      assert (cp_ce_list->num < MAX_UCA_EXP_CE);

      if (ce_count + cp_ce_list->num >= MAX_UCA_EXP_CE)
    {
      LOG_LOCALE_ERROR ("Cannot create contraction." "Too many collation elements", ER_LOC_GEN, true);
      return -1;
    }

      /* copy all CE of codepoint */
      for (ce_index = 0; ce_index < cp_ce_list->num; ce_index++)
    {
      for (lev = 0; lev < MAX_WEIGHT_LEVELS; lev++)
        {
          SET_UCA_WEIGHT (&(uca_seq->ce), ce_index + ce_count, lev, GET_UCA_WEIGHT (cp_ce_list, ce_index, lev));
        }
    }

      ce_count += cp_ce_list->num;
    }

  uca_seq->ce.num = ce_count;

  if (seq_type == COLL_KEY_TYPE_EXP)
    {
      assert (storage->count_exp > 0);
      return storage->count_exp - 1;
    }

  assert (seq_type == COLL_KEY_TYPE_CONTR);

  assert (storage->count_contr > 0);

  /* contraction key to statistics */
  make_coll_key (&contr_key, COLL_KEY_TYPE_CONTR, storage->count_contr - 1);
  if (add_key_to_weight_stats_list (&contr_key, GET_UCA_WEIGHT (&(uca_seq->ce), 0, 0)) != NO_ERROR)
    {
      return -1;
    }

  w_occurences[0][GET_UCA_WEIGHT (&(uca_seq->ce), 0, 0)]++;

  return storage->count_contr - 1;
}

/*
 * read_cp_from_tag - reads a codepoint value from a string tag
 * Returns : error status
 *
 * buffer(in): string buffer (nul terminated)
 * type(in): type of stored codepoint
 * cp(out): codepoint value
 *
 */
static int
read_cp_from_tag (unsigned char *buffer, CP_BUF_TYPE type, UCA_CP * cp)
{
  int temp_cp = 0;
  int result = 0;
  int err_status = NO_ERROR;
  char *chr_ptr;
  unsigned char *dummy;
  char err_msg[ERR_MSG_SIZE];

  assert (buffer != NULL);
  assert (cp != NULL);

  if (*buffer == '\0')
    {
      err_status = ER_LOC_GEN;
      snprintf (err_msg, sizeof (err_msg) - 1, "Tag has no content");
      LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
      goto exit;
    }
  else if (type == BUF_TYPE_CHAR)
    {
      dummy = buffer;
      if (intl_count_utf8_chars (dummy, strlen ((char *) dummy)) > 1)
    {
      err_status = ER_LOC_GEN;
      snprintf (err_msg, sizeof (err_msg) - 1, "Multiple chars found in codepoint tag." "Tag content: %s", buffer);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
      goto exit;
    }

      dummy = buffer;
      temp_cp = intl_utf8_to_cp (buffer, strlen ((char *) buffer), &dummy);

      if (temp_cp > 0xFFFF || temp_cp < 0)
    {
      err_status = ER_LOC_GEN;
      snprintf (err_msg, sizeof (err_msg) - 1, "Codepoint found in tag was out of range." "Tag content: %s",
            buffer);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
      goto exit;
    }
    }
  else if (type == BUF_TYPE_CODE)
    {
      chr_ptr = NULL;

      result = str_to_int32 (&temp_cp, &chr_ptr, (const char *) buffer, 16);
      if (result != 0 || temp_cp > 0xFFFF || temp_cp < 0)
    {
      err_status = ER_LOC_GEN;
      snprintf (err_msg, sizeof (err_msg) - 1, "Codepoint found in tag was out of range." "Tag content: %s",
            buffer);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
      goto exit;
    }
      else if (temp_cp == 0 && (chr_ptr == (char *) buffer))
    {
      /* If tag content does not start with a hex number. */
      err_status = ER_LOC_GEN;
      snprintf (err_msg, sizeof (err_msg) - 1, "No valid codepoint could be found in tag." "Tag content: %s",
            buffer);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
      goto exit;
    }
      else if (temp_cp > 0 && strlen (chr_ptr) != 0)
    {
      /* If tag content looks like "1234ZZZZ". */
      err_status = ER_LOC_GEN;
      snprintf (err_msg, sizeof (err_msg) - 1, "Encountered codepoint tag with invalid content." "Tag content: %s",
            buffer);
      LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
      goto exit;
    }
    }
  *cp = (UCA_CP) temp_cp;
exit:
  return err_status;
}


/*
 * comp_func_coll_contr_bin - compare function for sorting contractions list
 *
 *  Note : Contractions are sorted in binary order
 *     Elements in array to be sorted are of COLL_CONTRACTION type
 */
static int
comp_func_coll_contr_bin (const void *arg1, const void *arg2)
{
  UCA_COLL_CONTR_ID *c1 = (UCA_COLL_CONTR_ID *) arg1;
  UCA_COLL_CONTR_ID *c2 = (UCA_COLL_CONTR_ID *) arg2;

  return strcmp (c1->contr_ref.c_buf, c2->contr_ref.c_buf);
}

/*
 * UCA with expansion support
 */

/*
 * create_opt_ce_w_exp - creates weight tables and next seq array for UCA
 *           sorting with expansions
 *  lc(in) : locale struct
 */
static int
create_opt_ce_w_exp (LOCALE_COLLATION * lc)
{
  UCA_COLL_KEY key;
  UCA_COLL_CE_LIST *ce_list;
  UCA_L13_W *uca_w_l13 = NULL;
  UCA_L4_W *uca_w_l4 = NULL;
  char *uca_exp_num = NULL;
  int uca_w_array_size_l13;
  int uca_w_array_size_l4;
  int err_status = NO_ERROR;
  int i;
  int cp;
  int max_num = 0;
  unsigned int *coll_key_list = NULL;
  int coll_key_list_cnt = 0;
  bool use_level_4;
  UCA_OPTIONS uca_exp_next_opt = { TAILOR_PRIMARY, false, false, 0, true, CONTR_IGNORE, true,
    MATCH_CONTR_BOUND_FORBID
  };
  UCA_OPTIONS *saved_uca_opt = NULL;

  coll_key_list = (unsigned int *) malloc ((curr_uca.count_contr + lc->tail_coll.sett_max_cp) * sizeof (unsigned int));
  if (coll_key_list == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  for (cp = 0; cp < lc->tail_coll.sett_max_cp; cp++)
    {
      make_coll_key (&key, COLL_KEY_TYPE_CP, cp);
      ce_list = get_ce_list_from_coll_key (&key);

      max_num = MAX (max_num, ce_list->num);
    }

  use_level_4 = (lc->tail_coll.uca_opt.sett_strength >= TAILOR_QUATERNARY) ? true : false;

  uca_w_array_size_l13 = lc->tail_coll.sett_max_cp * max_num * sizeof (UCA_L13_W);
  uca_w_array_size_l4 = lc->tail_coll.sett_max_cp * max_num * sizeof (UCA_L4_W);
  uca_w_l13 = (UCA_L13_W *) malloc (uca_w_array_size_l13);
  uca_exp_num = (char *) malloc (lc->tail_coll.sett_max_cp);

  if (uca_w_l13 == NULL || uca_exp_num == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  memset (uca_w_l13, 0, uca_w_array_size_l13);
  memset (uca_exp_num, 0, lc->tail_coll.sett_max_cp);

  /* do not generate L4 if tailoring level doesn't require it */
  if (use_level_4)
    {
      uca_w_l4 = (UCA_L4_W *) malloc (uca_w_array_size_l4);
      if (uca_w_l4 == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

      memset (uca_w_l4, 0, uca_w_array_size_l4);
    }

  lc->opt_coll.next_cp = (unsigned int *) malloc (lc->tail_coll.sett_max_cp * sizeof (unsigned int));
  if (lc->opt_coll.next_cp == NULL)
    {
      err_status = ER_LOC_GEN;
      LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
      goto exit;
    }

  memset (lc->opt_coll.next_cp, 0xff, lc->tail_coll.sett_max_cp * sizeof (unsigned int));

  lc->opt_coll.uca_w_l13 = uca_w_l13;
  lc->opt_coll.uca_w_l4 = uca_w_l4;
  lc->opt_coll.uca_num = uca_exp_num;
  lc->opt_coll.uca_exp_num = max_num;
  lc->opt_coll.w_count = lc->tail_coll.sett_max_cp;

  for (cp = 0; cp < lc->tail_coll.sett_max_cp; cp++)
    {
      make_coll_key (&key, COLL_KEY_TYPE_CP, cp);
      ce_list = get_ce_list_from_coll_key (&key);

      uca_exp_num[cp] = ce_list->num;

      assert (uca_exp_num[cp] > 0);

      build_compressed_uca_w_l13 (ce_list, &(uca_w_l13[cp * max_num]));
      if (use_level_4)
    {
      build_uca_w_l4 (ce_list, &(uca_w_l4[cp * max_num]));
    }

      coll_key_list[coll_key_list_cnt++] = cp;
    }

  assert (lc->opt_coll.count_contr == 0);

  for (i = 0; i < curr_uca.count_contr; i++)
    {
      make_coll_key (&key, COLL_KEY_TYPE_CONTR, i);
      ce_list = get_ce_list_from_coll_key (&key);

      coll_key_list[coll_key_list_cnt++] = i | INTL_MASK_CONTR;

      err_status = add_opt_coll_contraction (lc, &key, 0, true);
      if (err_status != NO_ERROR)
    {
      goto exit;
    }
    }

  qsort (coll_key_list, coll_key_list_cnt, sizeof (unsigned int), uca_comp_func_coll_list_exp_fo);

  saved_uca_opt = uca_tailoring_options;
  uca_tailoring_options = &uca_exp_next_opt;

  /* TODO : optimize this to speed up 'next' computing */
  for (i = 0; i < coll_key_list_cnt - 1; i++)
    {
      UCA_COLL_KEY curr_key;
      UCA_COLL_KEY next_key;
      unsigned int curr_pos = coll_key_list[i];
      unsigned int next_pos = 0;
      int j;

      if (INTL_IS_NEXT_CONTR (curr_pos))
    {
      make_coll_key (&curr_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (curr_pos));
    }
      else
    {
      make_coll_key (&curr_key, COLL_KEY_TYPE_CP, curr_pos);
    }

      /* 'next' item is the first element which has weight level 1 greater then current item */
      for (j = i + 1; j < coll_key_list_cnt; j++)
    {
      next_pos = coll_key_list[j];

      if (uca_comp_func_coll_list_exp (&curr_pos, &next_pos) < 0)
        {
          break;
        }
    }

      if (INTL_IS_NEXT_CONTR (next_pos))
    {
      make_coll_key (&next_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (next_pos));
    }
      else
    {
      make_coll_key (&next_key, COLL_KEY_TYPE_CP, next_pos);
    }

      set_next_value_for_coll_key (lc, &curr_key, &next_key);
    }

  uca_tailoring_options = saved_uca_opt;

  /* set next for last key to itself */
  {
    UCA_COLL_KEY curr_key;
    unsigned int curr_pos = coll_key_list[coll_key_list_cnt - 1];
    if (INTL_IS_NEXT_CONTR (curr_pos))
      {
    make_coll_key (&curr_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (curr_pos));
      }
    else
      {
    make_coll_key (&curr_key, COLL_KEY_TYPE_CP, curr_pos);
      }
    set_next_value_for_coll_key (lc, &curr_key, &curr_key);
  }

  if (lc->opt_coll.count_contr > 0)
    {
      err_status = optimize_coll_contractions (lc);
    }

exit:
  if (coll_key_list != NULL)
    {
      free (coll_key_list);
    }

  return err_status;
}

/*
 * uca_comp_func_coll_list_exp_fo - compare function for sorting collatable
 *                  elements according to UCA algorithm,
 *                  with full order
 *
 *  Note: This function is used in the first step of computing 'next' sequence
 *    If result of 'uca_comp_func_coll_list_exp' is zero, the comparison
 *    is performed on codepooints values. The purpose is to provide a
 *    'deterministic comparison' in order to eliminate unpredictable
 *    results of sort algorithm (qsort) when computing 'next' fields.
 */
static int
uca_comp_func_coll_list_exp_fo (const void *arg1, const void *arg2)
{
  unsigned int pos1;
  unsigned int pos2;
  int cmp;

  pos1 = *((unsigned int *) arg1);
  pos2 = *((unsigned int *) arg2);

  cmp = uca_comp_func_coll_list_exp (arg1, arg2);

  return (cmp == 0) ? (int) (pos1 - pos2) : cmp;
}

/*
 * uca_comp_func_coll_list_exp - compare function for sorting collatable
 *               elements according to UCA algorithm
 *
 *  Note: this function is used to sort collatable elements according to
 *    CEs tables and UCA settins (sorting options)
 *    The elements in array are 32 bit unsigned integers, keys which
 *    may be Unicode points or contractions (when highest bit is set)
 *
 */
static int
uca_comp_func_coll_list_exp (const void *arg1, const void *arg2)
{
  UCA_COLL_KEY pos1_key;
  UCA_COLL_KEY pos2_key;
  unsigned int pos1;
  unsigned int pos2;

  pos1 = *((unsigned int *) arg1);
  pos2 = *((unsigned int *) arg2);

  if (INTL_IS_NEXT_CONTR (pos1))
    {
      make_coll_key (&pos1_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (pos1));
    }
  else
    {
      make_coll_key (&pos1_key, COLL_KEY_TYPE_CP, pos1);
    }

  if (INTL_IS_NEXT_CONTR (pos2))
    {
      make_coll_key (&pos2_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (pos2));
    }
  else
    {
      make_coll_key (&pos2_key, COLL_KEY_TYPE_CP, pos2);
    }

  return uca_comp_func_coll_key (&pos1_key, &pos2_key);
}

/*
 * build_compressed_uca_w_l13 - builds the equivalent UCA weights for levels
 *                  1 to 3 of a collation element list
 *
 *  Note :
 *  The encoding of levels 1 to 3 is :
 *  33333332 22222222 1111111 1111111
 *  Ranges
 *  L1 = 0000-ffff
 *  L2 = 0000-01ff
 *  L3 = 0000-007f
 *
 */
static void
build_compressed_uca_w_l13 (const UCA_COLL_CE_LIST * ce_list, UCA_L13_W * uca_w_l13)
{
  int i;

  assert (ce_list != NULL);
  assert (uca_w_l13 != NULL);

  for (i = 0; i < ce_list->num; i++)
    {
      UCA_W w1 = GET_UCA_WEIGHT (ce_list, i, 0);
      UCA_W w2 = GET_UCA_WEIGHT (ce_list, i, 1);
      UCA_W w3 = GET_UCA_WEIGHT (ce_list, i, 2);
      UCA_L13_W w_l123 = w3 & 0x0000007f;

      w_l123 <<= 9;
      w_l123 |= w2 & 0x000001ff;
      w_l123 <<= 16;

      w_l123 |= w1 & 0x0000ffff;

      uca_w_l13[i] = w_l123;
    }
}

/*
 * build_uca_w_l4 - builds the equivalent UCA weights for level 4 from a
 *          collation element list
 */
static void
build_uca_w_l4 (const UCA_COLL_CE_LIST * ce_list, UCA_L4_W * uca_w_l4)
{
  int i;

  assert (ce_list != NULL);
  assert (uca_w_l4 != NULL);

  for (i = 0; i < ce_list->num; i++)
    {
      uca_w_l4[i] = GET_UCA_WEIGHT (ce_list, i, 3);
    }
}

#endif //#if defined(SA_MODE)