CUBRID Engine  latest
uca_support.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /*
20  * uca_support.c : Unicode Collation Algorithm support
21  */
22 #include <assert.h>
23 
24 #include <errno.h>
25 #include "utility.h"
26 #include "environment_variable.h"
27 #include "locale_support.h"
28 #include "error_manager.h"
29 #include "porting.h"
30 
31 #include "intl_support.h"
32 #include "uca_support.h"
33 #include "unicode_support.h"
34 
35 #if defined (SUPPRESS_STRLEN_WARNING)
36 #define strlen(s1) ((int) strlen(s1))
37 #endif /* defined (SUPPRESS_STRLEN_WARNING) */
38 
39 #define DUCET_FILE "ducet.txt"
40 
41 #define MAX_WEIGHT_LEVELS 4
42 
43 #define MAX_UCA_WEIGHT 0xFFFF
44 #define MAX_UCA_CODEPOINT 0xFFFF
45 
46 #define UCA_CONTR_EXP_CNT_GROW 8
47 
48 #define MAX_LOGICAL_POS 14
49 
50 /* Collation element */
51 typedef struct uca_coll_ce UCA_COLL_CE;
53 {
55 };
56 
59 {
60  unsigned char num;
62 };
63 
64 #define GET_UCA_WEIGHT(ce_list,i,w) ((ce_list)->ce[(i)].weight[(w)])
65 #define SET_UCA_WEIGHT(ce_list,i,w,val) \
66  do { \
67  (ce_list)->ce[(i)].weight[(w)] = (val); \
68  } while (0);
69 
70 /* UCA contraction */
71 typedef struct uca_chr_seq UCA_CHR_SEQ;
73 {
75  int cp_count;
76 
78 };
79 
82 
83 /* Contraction, ID tuple, used for reconciliation after contractions list optimization */
86 {
88  int pos_id;
89 };
90 
91 /* Collation keys : single codepoint, multiple codepoints (contractions) */
92 typedef enum
93 {
98 
99 typedef struct uca_coll_key UCA_COLL_KEY;
101 {
103  union
104  {
105  int cp;
106  int contr_id;
107  int exp_id;
108  } val;
109 };
110 
111 typedef struct uca_storage UCA_STORAGE;
112 
115 {
118 };
119 
121 {
122  /* single code-point CE */
124 
125  /* contractions CE */
129 
130  /* tailoring expansions CE */
132  int max_exp;
134 
135  /* settings for previous instance (used only by DUCET storage) */
136  char prev_file_path[PATH_MAX];
138 };
139 
140 static UCA_STORAGE ducet = {
141  NULL,
142  NULL, 0, 0,
143  NULL, 0, 0,
144  "", CONTR_IGNORE
145 };
146 
148  NULL,
149  NULL, 0, 0,
150  NULL, 0, 0,
151  "", CONTR_IGNORE
152 };
153 
155 
157 
159 
160 /* used for sorting */
162 
163 static int load_ducet (const char *file_path, const int sett_contr_policy);
164 static int init_uca_instance (LOCALE_COLLATION * lc);
165 static int destroy_uca_instance (void);
166 static int build_key_list_groups (LOCALE_COLLATION * lc);
167 static void sort_coll_key_lists (LOCALE_COLLATION * lc);
168 static void sort_one_coll_key_list (LOCALE_COLLATION * lc, int weight_index);
169 static int uca_comp_func_coll_key_fo (const void *arg1, const void *arg2);
170 static int uca_comp_func_coll_key (const void *arg1, const void *arg2);
172 static int create_opt_weights (LOCALE_COLLATION * lc);
174 static int set_next_value_for_coll_key (LOCALE_COLLATION * lc, const UCA_COLL_KEY * coll_key,
175  const UCA_COLL_KEY * next_key);
176 static int add_opt_coll_contraction (LOCALE_COLLATION * lc, const UCA_COLL_KEY * contr_key, const unsigned int wv,
177  bool use_expansions);
178 static int compare_ce_list (UCA_COLL_CE_LIST * ce_list1, UCA_COLL_CE_LIST * ce_list2, UCA_OPTIONS * uca_opt);
179 static UCA_COLL_KEY *get_key_with_ce_sublist (UCA_COLL_CE_LIST * uca_item, const int lvl);
180 static void make_coll_key (UCA_COLL_KEY * key, UCA_COLL_KEY_TYPE type, const int key_id);
181 static int find_contr_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st);
182 static int find_exp_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st);
183 static int apply_tailoring_rule (TAILOR_DIR dir, UCA_COLL_KEY * anchor_key, UCA_COLL_KEY * key, UCA_COLL_KEY * ref_key,
184  T_LEVEL lvl);
185 static int apply_tailoring_rule_identity (UCA_COLL_KEY * key, UCA_COLL_KEY * ref_key);
186 static int apply_tailoring_rule_w_dir (TAILOR_DIR dir, UCA_COLL_KEY * anchor_key, UCA_COLL_KEY * key,
187  UCA_COLL_KEY * ref_key, T_LEVEL lvl);
188 static int apply_tailoring_rules (LOCALE_COLLATION * lc);
189 static int compute_weights_per_level_stats (void);
190 #if 0
191 static int compact_weight_values (const int level, const UCA_W max_weight);
192 static void build_weight_remap_filter (const UCA_W * w_ocurr, const int max_weight, UCA_W * w_filter);
193 #endif
194 static int add_key_to_weight_stats_list (const UCA_COLL_KEY * key, UCA_W wv);
195 static int remove_key_from_weight_stats_list (const UCA_COLL_KEY * key, UCA_W wv);
196 static int change_key_weight_list (const UCA_COLL_KEY * key, UCA_W w_from, UCA_W w_to);
197 static int string_to_coll_ce_list (char *s, UCA_COLL_CE_LIST * ui);
199 static UCA_CONTRACTION *new_contraction (UCA_STORAGE * storage);
200 static int add_uca_contr_or_exp (LOCALE_COLLATION * lc, UCA_STORAGE * storage, const unsigned int *cp_array,
201  const int cp_count, const UCA_COLL_KEY_TYPE seq_type);
202 static int read_cp_from_tag (unsigned char *buffer, CP_BUF_TYPE type, UCA_CP * cp);
203 
204 static int comp_func_coll_contr_bin (const void *arg1, const void *arg2);
205 
206 static int create_opt_ce_w_exp (LOCALE_COLLATION * lc);
207 
208 static int uca_comp_func_coll_list_exp_fo (const void *arg1, const void *arg2);
209 static int uca_comp_func_coll_list_exp (const void *arg1, const void *arg2);
210 
211 static void build_compressed_uca_w_l13 (const UCA_COLL_CE_LIST * ce_list, UCA_L13_W * uca_w_l13);
212 static void build_uca_w_l4 (const UCA_COLL_CE_LIST * ce_list, UCA_L4_W * uca_w_l4);
213 /*
214  * load_ducet - Read the DUCET file (standardised and availabe at Unicode.org)
215  * into the ducet array; parse it and load all the information in
216  * the ducet storage.
217  * Returns: error status
218  *
219  * file_path(in): file path for DUCET file
220  * sett_contr_policy(in): behavior for contractions
221  *
222  */
223 static int
224 load_ducet (const char *file_path, const int sett_contr_policy)
225 {
226  FILE *f = NULL;
227  char str[256];
228  char str_ref[256];
229  char str_orig[256];
230  char *weights[64];
231  int lines = 1;
232  int i;
233  int err_status = NO_ERROR;
234  char err_msg[ERR_MSG_SIZE];
235  unsigned int cp;
236  int w;
237 
238  UCA_COLL_CE_LIST *temp_ducet = NULL;
239  UCA_COLL_CE_LIST *ducet_cp = NULL;
240 
241  assert (file_path != NULL);
242 
243  if (strcmp (file_path, ducet.prev_file_path) == 0 && sett_contr_policy == ducet.prev_contr_policy)
244  {
245  /* already loaded */
246  return NO_ERROR;
247  }
248 
249  uca_free_data ();
250 
251  temp_ducet = (UCA_COLL_CE_LIST *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));
252  if (temp_ducet == NULL)
253  {
254  err_status = ER_LOC_GEN;
255  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
256  goto exit;
257  }
258 
259  ducet_cp = ducet.coll_cp = temp_ducet;
260 
261  for (i = 0; i <= MAX_UCA_CODEPOINT; i++)
262  {
263  ducet_cp[i].num = 0;
264  for (w = 0; w < MAX_WEIGHT_LEVELS; w++)
265  {
266  int ce_pos;
267 
268  for (ce_pos = 0; ce_pos < MAX_UCA_EXP_CE; ce_pos++)
269  {
270  SET_UCA_WEIGHT (&(ducet_cp[i]), ce_pos, w, 0);
271  }
272  }
273  }
274 
275  f = fopen_ex (file_path, "rt");
276  if (f == NULL)
277  {
278  err_status = ER_LOC_GEN;
279  snprintf_dots_truncate (err_msg, sizeof (err_msg) - 1, "Cannot open file %s", file_path);
280  LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
281  goto exit;
282  }
283 
284  assert (RULE_POS_LAST_TRAIL < sizeof (logical_pos_cp) / sizeof (logical_pos_cp[0]));
285  for (i = 0; i < (int) (sizeof (logical_pos_cp) / sizeof (logical_pos_cp[0])); i++)
286  {
287  logical_pos_cp[i] = -1;
288  }
289 
290  while (fgets (str, sizeof (str), f))
291  {
292  char *comment;
293  char *weight;
294  char *s, *save;
295  int codenum;
296  bool is_variable = false;
297  char is_ignorable[4] = { -1, -1, -1, -1 };
298  UCA_COLL_CE_LIST *ce_list = NULL;
299  UCA_CP cp_list[LOC_MAX_UCA_CHARS_SEQ];
300  uint32 cp_int_list[LOC_MAX_UCA_CHARS_SEQ];
301  int cp_list_count = 0;
302  bool is_allowed = true;
303  int val;
304 
305  lines++;
306 
307  strcpy (str_ref, str);
308  strcpy (str_orig, str);
309  str_to_int32 (&val, &s, str, 16);
310  cp = (unsigned int) val;
311 
312  /* Skip comment lines and unicode values above max allowed CP */
313  if (str[0] == '#' || (cp > MAX_UCA_CODEPOINT))
314  {
315  continue;
316  }
317 
318  if ((comment = strchr (str, '#')))
319  {
320  /* Remove comments found after the weight list. */
321  *comment++ = '\0';
322  for (; *comment == ' '; comment++);
323  }
324  else
325  {
326  /* Skip empty lines and @version line. */
327  continue;
328  }
329 
330  if ((weight = strchr (str, ';')))
331  {
332  /* Drop chars after ';' until '[' reached */
333  *weight++ = '\0';
334  for (; *weight == ' '; weight++);
335  /* At this point, we have the unicode representation stored in str and weight items stored in the weight
336  * string var */
337  }
338  else
339  {
340  /* If the "if" tests above pass, but the line does not contain ';' then the line is not valid, so skip it */
341  continue;
342  }
343 
344  /* Count number of chars in Uncode item e.g. codenum will be greater that 1 if contraction found. */
345  codenum = 1;
346  cp_int_list[cp_list_count++] = cp;
347  str_to_int32 (&val, &s, str, 16);
348  codenum += string_to_int_array (s, &(cp_int_list[1]), LOC_MAX_UCA_CHARS_SEQ - 1, " \t");
349 
350  if (codenum > LOC_MAX_UCA_CHARS_SEQ)
351  {
352  /* If the number of codepoints found in the input string is greater than LOC_MAX_UCA_CHARS_SEQ, only the
353  * first LOC_MAX_UCA_CHARS_SEQ are used, and no error is thrown. */
354  codenum = LOC_MAX_UCA_CHARS_SEQ;
355  }
356 
357  cp_list_count = codenum;
358 
359  for (i = 0; i < codenum; i++)
360  {
361  if (cp_int_list[i] > MAX_UCA_CODEPOINT)
362  {
363  is_allowed = false;
364  cp_list_count = i;
365  break;
366  }
367  cp_list[i] = (UCA_CP) cp_int_list[i];
368  }
369 
370  if (!is_allowed)
371  {
372  continue;
373  }
374 
375  if (codenum > 1)
376  {
377  UCA_CONTRACTION *contr = NULL;
378 
379  if ((sett_contr_policy & CONTR_DUCET_USE) != CONTR_DUCET_USE)
380  {
381  continue;
382  }
383 
384  assert (codenum <= LOC_MAX_UCA_CHARS_SEQ);
385 
386  codenum = MIN (codenum, LOC_MAX_UCA_CHARS_SEQ);
387 
388  contr = new_contraction (&ducet);
389  if (contr == NULL)
390  {
391  err_status = ER_OUT_OF_VIRTUAL_MEMORY;
392  goto exit;
393  }
394 
395  contr->cp_count = cp_list_count;
396  memcpy (contr->cp_list, cp_list, cp_list_count * sizeof (UCA_CP));
397 
398  ce_list = &(contr->ce);
399  }
400  else
401  {
402  ce_list = &(ducet_cp[cp]);
403  }
404 
405  assert (ce_list != NULL);
406 
407  ce_list->num = 0;
408  s = strtok_r (weight, " []", &save);
409  while (s)
410  {
411  /* Count the number of collation elements of the current char */
412  weights[ce_list->num] = s;
413  s = strtok_r (NULL, " []", &save);
414  ce_list->num++;
415  }
416 
417  for (w = 0; w < ce_list->num; w++)
418  {
419  int partnum;
420 
421  partnum = 0;
422  s = weights[w];
423 
424  is_variable = ((*s) == '*') ? true : false;
425 
426  /* Now, in weights[...] we have the collation elements stored as strings We decompose the N collation
427  * elements into 4 weight values and store them into uca[code].weight[1..4][1..N] */
428  while (*s)
429  {
430  char *endptr;
431  int result = 0;
432  int val;
433 
434  result = str_to_int32 (&val, &endptr, s + 1, 16);
435  SET_UCA_WEIGHT (ce_list, w, partnum, (int) val);
436 
437  assert (partnum < 4);
438  if (val == 0 && is_ignorable[partnum] == -1)
439  {
440  is_ignorable[partnum] = 1;
441  }
442  else
443  {
444  is_ignorable[partnum] = 0;
445  }
446 
447  s = endptr;
448  partnum++;
449  }
450  }
451 
452  if (is_variable)
453  {
456  {
458  }
459  }
460 
461  assert (is_ignorable[0] == 0 || is_ignorable[0] == 1);
462  assert (is_ignorable[1] == 0 || is_ignorable[1] == 1);
463  assert (is_ignorable[2] == 0 || is_ignorable[2] == 1);
464 
465  if (is_ignorable[0] == 1)
466  {
469  {
471  }
472  }
473 
474  if (is_ignorable[1] == 1)
475  {
478  {
480  }
481  }
482 
483  if (is_ignorable[2] == 1)
484  {
487  {
489  }
490  }
491 
492  if (is_ignorable[0] == 0 && is_ignorable[1] == 0 && is_ignorable[2] == 0)
493  {
496  {
498  }
499  }
500  }
501 
502  fclose (f);
503  f = NULL;
504 
505  /* Set implicit weights for unicode values not found in the DUCET file */
506  for (cp = 0; cp <= MAX_UCA_CODEPOINT; cp++)
507  {
508  unsigned int base, aaaa, bbbb;
509 
510  /* Skip if the Unicode value was found in the DUCET file */
511  if (ducet_cp[cp].num)
512  {
513  continue;
514  }
515 
516  /*
517  * 3400;<CJK Ideograph Extension A, First> 4DB5;<CJK Ideograph Extension A, Last> 4E00;<CJK Ideograph, First>
518  * 9FA5;<CJK Ideograph, Last> */
519 
520  if (cp >= 0x3400 && cp <= 0x4DB5)
521  {
522  base = 0xFB80;
523  }
524  else if (cp >= 0x4E00 && cp <= 0x9FA5)
525  {
526  base = 0xFB40;
527  }
528  else
529  {
530  base = 0xFBC0;
531  }
532 
533  aaaa = base + (cp >> 15);
534  bbbb = (cp & 0x7FFF) | 0x8000;
535  SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 0, aaaa);
536  SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 0, bbbb);
537 
538  SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 1, 0x0020);
539  SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 1, 0x0000);
540 
541  SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 2, 0x0002);
542  SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 2, 0x0000);
543 
544  SET_UCA_WEIGHT (&(ducet_cp[cp]), 0, 3, 0x0001);
545  SET_UCA_WEIGHT (&(ducet_cp[cp]), 1, 3, 0x0000);
546 
547  ducet_cp[cp].num = 2;
548  }
549 
550 exit:
551  strncpy (ducet.prev_file_path, file_path, sizeof (ducet.prev_file_path));
552  ducet.prev_file_path[sizeof (ducet.prev_file_path) - 1] = '\0';
553 
554  ducet.prev_contr_policy = sett_contr_policy;
555 
556  if (f != NULL)
557  {
558  fclose (f);
559  }
560 
561  return err_status;
562 }
563 
564 /*
565  * init_uca_instance - Prepares one UCA instance for processing
566  * Returns : ERR_OUT_OF_VIRTUAL_MEMORY - if an allocation fails;
567  * NO_ERROR - success if otherwise.
568  */
569 static int
571 {
572  int i;
573  int err_status = NO_ERROR;
574  UCA_COLL_CE_LIST *uca_cp = NULL;
575  char ducet_file_path[PATH_MAX];
576 
577  assert (lc != NULL);
578 
579  uca_cp = (UCA_COLL_CE_LIST *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));
580  if (uca_cp == NULL)
581  {
582  err_status = ER_LOC_GEN;
583  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
584  goto exit;
585  }
586  memset (uca_cp, 0, (MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));
587 
588  curr_uca.coll_cp = uca_cp;
589 
590  envvar_localedatadir_file (ducet_file_path, sizeof (ducet_file_path), DUCET_FILE);
591  err_status = load_ducet (ducet_file_path, lc->tail_coll.uca_opt.sett_contr_policy);
592 
593  if (err_status != NO_ERROR)
594  {
595  goto exit;
596  }
597  memcpy (uca_cp, ducet.coll_cp, (MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_CE_LIST));
598 
599  /* copy contractions */
600  for (i = 0; i < ducet.count_contr; i++)
601  {
602  UCA_CONTRACTION *ducet_contr = &(ducet.coll_contr[i]);
603  UCA_CONTRACTION *uca_contr;
604  int j;
605 
606  for (j = 0; j < ducet_contr->cp_count; j++)
607  {
608  if (ducet_contr->cp_list[j] >= lc->tail_coll.sett_max_cp)
609  {
610  err_status = ER_LOC_GEN;
611  LOG_LOCALE_ERROR ("Codepoint in DUCET contraction exceeds " "maximum allowed codepoint", ER_LOC_GEN,
612  true);
613  goto exit;
614  }
615  }
616 
617  uca_contr = new_contraction (&curr_uca);
618 
619  if (uca_contr == NULL)
620  {
621  err_status = ER_OUT_OF_VIRTUAL_MEMORY;
622  goto exit;
623  }
624 
625  uca_contr->cp_count = ducet_contr->cp_count;
626  memcpy (uca_contr->cp_list, ducet_contr->cp_list, ducet_contr->cp_count * sizeof (UCA_CP));
627  memcpy (&(uca_contr->ce), &(ducet_contr->ce), sizeof (UCA_COLL_CE_LIST));
628  }
629 
630  for (i = 0; i < MAX_WEIGHT_LEVELS; i++)
631  {
632  w_occurences[i] = (UCA_W *) malloc ((MAX_UCA_WEIGHT + 1) * sizeof (UCA_W));
633 
634  if (w_occurences[i] == NULL)
635  {
636  err_status = ER_LOC_GEN;
637  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
638  goto exit;
639  }
640 
641  memset (w_occurences[i], 0, (MAX_UCA_WEIGHT + 1) * sizeof (UCA_W));
642  }
643 
644  weight_key_list = (UCA_WEIGHT_KEY_LIST *) malloc ((MAX_UCA_WEIGHT + 1) * sizeof (UCA_WEIGHT_KEY_LIST));
645  if (weight_key_list == NULL)
646  {
647  err_status = ER_LOC_GEN;
648  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
649  goto exit;
650  }
651  memset (weight_key_list, 0, (MAX_UCA_WEIGHT + 1) * sizeof (UCA_WEIGHT_KEY_LIST));
652 
653 exit:
654  return err_status;
655 }
656 
657 /*
658  * destroy_uca_instance - Unload the UCa array and all auxiliary arrays.
659  * Returns : NO_ERROR.
660  */
661 static int
663 {
664  int i;
665 
666  if (curr_uca.coll_cp != NULL)
667  {
668  free (curr_uca.coll_cp);
669  curr_uca.coll_cp = NULL;
670  }
671 
672  if (curr_uca.coll_contr != NULL)
673  {
674  assert (curr_uca.max_contr > 0);
675 
676  free (curr_uca.coll_contr);
677  curr_uca.coll_contr = NULL;
678  curr_uca.max_contr = 0;
679  curr_uca.count_contr = 0;
680  }
681 
682  if (curr_uca.coll_exp != NULL)
683  {
684  assert (curr_uca.max_exp > 0);
685 
686  free (curr_uca.coll_exp);
687  curr_uca.coll_exp = NULL;
688  curr_uca.max_exp = 0;
689  curr_uca.count_exp = 0;
690  }
691 
692  for (i = 0; i < MAX_WEIGHT_LEVELS; i++)
693  {
694  if (w_occurences[i] != NULL)
695  {
696  free (w_occurences[i]);
697  w_occurences[i] = NULL;
698  }
699  }
700 
701  if (weight_key_list != NULL)
702  {
703  for (i = 0; i <= MAX_UCA_WEIGHT; i++)
704  {
705  if (weight_key_list[i].key_list != NULL)
706  {
707  free (weight_key_list[i].key_list);
708  weight_key_list[i].key_list = NULL;
709  }
710  }
711  free (weight_key_list);
712  weight_key_list = NULL;
713  }
714 
715  return NO_ERROR;
716 }
717 
718 /*
719  * compare_ce_list - compares two lists of collation elements using the
720  * settings of locale
721  * Returns : -1 if ce_list1 collates before ce_list2;
722  * 0 if ce_list1 collates on the same level as ce_list2;
723  * 1 if ce_list1 collates after ce_list2.
724  * ce_list1 (in) : collation element list to compare.
725  * ce_list2 (in) : collation element list to compare.
726  * uca_opt(in) : sorting options
727  */
728 static int
730 {
731  int result, i, weight_level;
732  int numCodepoints = 0;
733 
734  assert (ce_list1 != NULL);
735  assert (ce_list2 != NULL);
736  assert (uca_opt != NULL);
737 
738  numCodepoints = MIN (ce_list1->num, ce_list2->num);
739 
740  if (numCodepoints == 0)
741  {
742  /* one of the chars has no codepoints attached */
743  if (ce_list1->num > 0)
744  {
745  return 1;
746  }
747  else if (ce_list2->num > 0)
748  {
749  return -1;
750  }
751 
752  return 0;
753  }
754  result = 0;
755 
756  numCodepoints = MAX (ce_list1->num, ce_list2->num);
757 
758  if (uca_opt->use_only_first_ce)
759  {
760  numCodepoints = MIN (numCodepoints, 1);
761  }
762 
763  for (weight_level = 0; (weight_level < MAX_WEIGHT_LEVELS) && (result == 0); weight_level++)
764  {
765  if (weight_level + 1 > (int) (uca_opt->sett_strength)
766  && ((uca_opt->sett_caseFirst + (uca_opt->sett_caseLevel ? 1 : 0) == 0)
767  || ((uca_opt->sett_caseFirst + (uca_opt->sett_caseLevel ? 1 : 0) > 0) && weight_level != 2)))
768  {
769  continue;
770  }
771 
772  for (i = 0; (i < numCodepoints) && (result == 0); i++)
773  {
774  if (GET_UCA_WEIGHT (ce_list1, i, weight_level) < GET_UCA_WEIGHT (ce_list2, i, weight_level))
775  {
776  result = -1;
777  }
778  else if (GET_UCA_WEIGHT (ce_list1, i, weight_level) > GET_UCA_WEIGHT (ce_list2, i, weight_level))
779  {
780  result = 1;
781  }
782 
783  /* accents level */
784  if (weight_level == 1 && uca_opt->sett_backwards == 1)
785  {
786  /* backwards */
787  result = -result;
788  }
789  else if (weight_level == 2 && uca_opt->sett_caseFirst == 1)
790  {
791  /* caseFirst = upper : revert L3 DUCET order */
792  result = -result;
793  }
794  }
795  }
796 
797  return result;
798 }
799 
800 /*
801  * uca_process_collation - Calls all the functions which
802  * actually do the processing.
803  * Returns: ER_LOC_GEN if a tailoring rule occurs;
804  * ER_OUT_OF_VIRTUAL_MEMORY if some memory allocation fails;
805  * NO_ERROR if tailoring is successful.
806  * lc(in/out) : contains the collation settings and optimization results.
807  *
808  */
809 int
811 {
812  int err_status = NO_ERROR;
813 
814  if (is_verbose)
815  {
816  printf ("Initializing UCA\n");
817  }
818 
819  err_status = init_uca_instance (lc);
820  if (err_status != NO_ERROR)
821  {
822  goto exit;
823  }
824 
825  if (is_verbose)
826  {
827  printf ("DUCET file has %d contractions\n", ducet.count_contr);
828  printf ("Applying %d CUBRID tailoring rules\n", lc->tail_coll.cub_count_rules);
829  }
830 
831  uca_tailoring_options = &(lc->tail_coll.uca_opt);
832  err_status = apply_absolute_tailoring_rules (lc);
833  if (err_status != NO_ERROR)
834  {
835  goto exit;
836  }
837 
838  if (is_verbose)
839  {
840  printf ("Build weight statistics\n");
841  }
842 
843  err_status = compute_weights_per_level_stats ();
844  if (err_status != NO_ERROR)
845  {
846  goto exit;
847  }
848 
849  err_status = build_key_list_groups (lc);
850  if (err_status != NO_ERROR)
851  {
852  goto exit;
853  }
854 
855  if (is_verbose)
856  {
857  printf ("Applying %d UCA tailoring rules\n", lc->tail_coll.count_rules);
858  }
859  err_status = apply_tailoring_rules (lc);
860  if (err_status != NO_ERROR)
861  {
862  goto exit;
863  }
864 
866  {
867  if (is_verbose)
868  {
869  printf ("Building optimized weights with expansions\n");
870  }
871 
872  err_status = create_opt_ce_w_exp (lc);
873  if (err_status != NO_ERROR)
874  {
875  goto exit;
876  }
877  }
878  else
879  {
880  if (is_verbose)
881  {
882  printf ("Sorting weight keys lists\n");
883  }
884  sort_coll_key_lists (lc);
885 
886  if (is_verbose)
887  {
888  printf ("Building optimized weights\n");
889  }
890 
891  err_status = create_opt_weights (lc);
892  if (err_status != NO_ERROR)
893  {
894  goto exit;
895  }
896  }
897 
898  memcpy (&(lc->opt_coll.uca_opt), &(lc->tail_coll.uca_opt), sizeof (UCA_OPTIONS));
899 exit:
901  uca_tailoring_options = NULL;
902  if (is_verbose)
903  {
904  printf ("UCA finished\n");
905  }
906  return err_status;
907 }
908 
909 /*
910  * apply_tailoring_rules - Loop through the tailoring rules in
911  * LOCALE_COLLATION::coll.rules, parse the composed
912  * rules if any, and call the function which does
913  * the tailoring e.g. execute the rule on the
914  * already processed data.
915  * Returns: error status
916  * lc (in/out) : collation settings and optimization results.
917  */
918 static int
920 {
921  int i;
922  UCA_COLL_KEY anchor_key;
923  UCA_COLL_KEY ref_key;
924  UCA_COLL_KEY tailor_key;
925  unsigned char *ptr_uchar;
926  int err_status = NO_ERROR;
927  TAILOR_RULE *t_rule = NULL;
928  char er_msg[ERR_MSG_SIZE];
929 
930  for (i = 0; i < lc->tail_coll.count_rules; i++)
931  {
932  unsigned int anchor_cp_list[LOC_MAX_UCA_CHARS_SEQ];
933  unsigned int ref_cp_list[LOC_MAX_UCA_CHARS_SEQ];
934  int anchor_cp_count = 0;
935  int ref_cp_count = 0;
936  int buf_size;
937  int cp_found;
938  int contr_id;
939 
940  t_rule = &(lc->tail_coll.rules[i]);
941 
942  /* anchor key : */
943  if (t_rule->r_pos_type != RULE_POS_BUFFER)
944  {
945  int anchor_cp;
946 
947  assert (t_rule->r_pos_type > 0);
948  assert (t_rule->r_pos_type < MAX_LOGICAL_POS);
949 
950  anchor_cp = logical_pos_cp[t_rule->r_pos_type];
951 
952  assert (anchor_cp >= 0 && anchor_cp <= MAX_UCA_CODEPOINT);
953  if (anchor_cp >= lc->tail_coll.sett_max_cp)
954  {
955  err_status = ER_LOC_GEN;
956  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid anchor in rule :%d. Codepoint value too big", i);
957  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
958  goto exit;
959  }
960 
961  assert (anchor_cp > 0);
962  make_coll_key (&anchor_key, COLL_KEY_TYPE_CP, anchor_cp);
963  memcpy (&ref_key, &anchor_key, sizeof (UCA_COLL_KEY));
964  }
965  else
966  {
967  buf_size = strlen (t_rule->anchor_buf);
968  ptr_uchar = (unsigned char *) t_rule->anchor_buf;
969 
970  cp_found =
971  intl_utf8_to_cp_list (ptr_uchar, buf_size, anchor_cp_list, LOC_MAX_UCA_CHARS_SEQ, &anchor_cp_count);
972 
973  if (cp_found <= 0 || cp_found > anchor_cp_count)
974  {
975  err_status = ER_LOC_GEN;
976  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid anchor in rule :%d", i);
977  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
978  goto exit;
979  }
980 
981  if (anchor_cp_count == 1)
982  {
983  assert (*anchor_cp_list > 0);
984  make_coll_key (&anchor_key, COLL_KEY_TYPE_CP, *anchor_cp_list);
985  }
986  else
987  {
988  assert (anchor_cp_count > 1);
989 
990  /* contraction or expansion */
993  {
994  continue;
995  }
996 
997  contr_id = find_contr_id (anchor_cp_list, anchor_cp_count, &curr_uca);
998 
999  if (contr_id != -1)
1000  {
1001  make_coll_key (&anchor_key, COLL_KEY_TYPE_CONTR, contr_id);
1002  }
1003  else
1004  {
1005  int exp_id;
1006  assert (contr_id == -1);
1007 
1008  /* this is an expansion */
1010  {
1011  /* ignore expansions */
1012  continue;
1013  }
1014 
1015  exp_id = find_exp_id (anchor_cp_list, anchor_cp_count, &curr_uca);
1016 
1017  if (exp_id == -1)
1018  {
1019  exp_id = add_uca_contr_or_exp (lc, &curr_uca, anchor_cp_list, anchor_cp_count, COLL_KEY_TYPE_EXP);
1020  }
1021 
1022  if (exp_id == -1)
1023  {
1024  err_status = ER_LOC_GEN;
1025  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid anchor in rule :%d." "Cannot create expansion",
1026  i);
1027  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1028  goto exit;
1029  }
1030 
1031  make_coll_key (&anchor_key, COLL_KEY_TYPE_EXP, exp_id);
1032  }
1033  }
1034 
1035 
1036  /* reference key */
1037  buf_size = t_rule->r_buf_size;
1038  ptr_uchar = (unsigned char *) t_rule->r_buf;
1039  cp_found = intl_utf8_to_cp_list (ptr_uchar, buf_size, ref_cp_list, LOC_MAX_UCA_CHARS_SEQ, &ref_cp_count);
1040 
1041  if (cp_found <= 0 || cp_found > ref_cp_count)
1042  {
1043  err_status = ER_LOC_GEN;
1044  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid reference in rule :%d", i);
1045  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1046  goto exit;
1047  }
1048 
1049  if (ref_cp_count == 1)
1050  {
1051  if ((int) (*ref_cp_list) >= lc->tail_coll.sett_max_cp)
1052  {
1053  err_status = ER_LOC_GEN;
1054  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid reference in rule :%d." " Codepoint value too big",
1055  i);
1056  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1057  goto exit;
1058  }
1059 
1060  assert (*ref_cp_list > 0);
1061  make_coll_key (&ref_key, COLL_KEY_TYPE_CP, *ref_cp_list);
1062  }
1063  else
1064  {
1065  assert (ref_cp_count > 1);
1066 
1067  /* contraction or expansion */
1070  {
1071  continue;
1072  }
1073 
1074  contr_id = find_contr_id (ref_cp_list, ref_cp_count, &curr_uca);
1075 
1076  if (contr_id != -1)
1077  {
1078  make_coll_key (&ref_key, COLL_KEY_TYPE_CONTR, contr_id);
1079  }
1080  else
1081  {
1082  int exp_id;
1083 
1084  /* expansion */
1085  assert (contr_id == -1);
1086 
1088  {
1089  continue;
1090  }
1091 
1092  exp_id = find_exp_id (ref_cp_list, ref_cp_count, &curr_uca);
1093  if (exp_id == -1)
1094  {
1095  exp_id = add_uca_contr_or_exp (lc, &curr_uca, ref_cp_list, ref_cp_count, COLL_KEY_TYPE_EXP);
1096  }
1097 
1098  if (exp_id == -1)
1099  {
1100  err_status = ER_LOC_GEN;
1101  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid reference in rule: %d" "Cannot create expansion",
1102  i);
1103  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1104  goto exit;
1105  }
1106 
1107  make_coll_key (&ref_key, COLL_KEY_TYPE_EXP, exp_id);
1108  }
1109  }
1110  }
1111 
1112  if (t_rule->multiple_chars)
1113  {
1114  unsigned char *tailor_curr;
1115  unsigned char *tailor_next;
1116  unsigned char *tailor_end;
1117 
1118  tailor_curr = (unsigned char *) (t_rule->t_buf);
1119  tailor_next = tailor_curr;
1120  tailor_end = tailor_curr + t_rule->t_buf_size;
1121 
1122  while (tailor_next < tailor_end)
1123  {
1124  unsigned int tailor_cp =
1125  intl_utf8_to_cp (tailor_curr, CAST_STRLEN (tailor_end - tailor_curr), &tailor_next);
1126 
1127  assert (lc->tail_coll.sett_max_cp >= 0);
1128  if (tailor_cp >= (unsigned int) lc->tail_coll.sett_max_cp)
1129  {
1130  err_status = ER_LOC_GEN;
1131  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid tailoring in rule :%d." "Codepoint : %4X too big", i,
1132  tailor_cp);
1133  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1134  goto exit;
1135  }
1136 
1137  assert (tailor_cp > 0);
1138  make_coll_key (&tailor_key, COLL_KEY_TYPE_CP, tailor_cp);
1139 
1140  err_status = apply_tailoring_rule (t_rule->direction, &anchor_key, &tailor_key, &ref_key, t_rule->level);
1141  if (err_status != NO_ERROR)
1142  {
1143  err_status = ER_LOC_GEN;
1144  snprintf (er_msg, sizeof (er_msg) - 1, "Cannot apply :%d", i);
1145  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1146  goto exit;
1147  }
1148 
1149  tailor_curr = tailor_next;
1150 
1151  memcpy (&ref_key, &tailor_key, sizeof (UCA_COLL_KEY));
1152  }
1153  }
1154  else
1155  {
1156  unsigned int tailor_cp_list[LOC_MAX_UCA_CHARS_SEQ];
1157  int tailor_cp_count;
1158 
1159  buf_size = t_rule->t_buf_size;
1160  ptr_uchar = (unsigned char *) t_rule->t_buf;
1161  cp_found =
1162  intl_utf8_to_cp_list (ptr_uchar, buf_size, tailor_cp_list, LOC_MAX_UCA_CHARS_SEQ, &tailor_cp_count);
1163 
1164  if (cp_found <= 0 || cp_found > tailor_cp_count)
1165  {
1166  err_status = ER_LOC_GEN;
1167  snprintf (er_msg, sizeof (er_msg) - 1,
1168  "Invalid tailoring in rule :%d." "Invalid number of codepoints: %d", i, cp_found);
1169  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1170  goto exit;
1171  }
1172 
1173  if (tailor_cp_count > 1)
1174  {
1175  /* contraction */
1177  {
1178  continue;
1179  }
1180 
1181  contr_id = find_contr_id (tailor_cp_list, tailor_cp_count, &curr_uca);
1182 
1183  if (contr_id == -1)
1184  {
1185  contr_id = add_uca_contr_or_exp (lc, &curr_uca, tailor_cp_list, tailor_cp_count, COLL_KEY_TYPE_CONTR);
1186  }
1187 
1188  if (contr_id == -1)
1189  {
1190  err_status = ER_LOC_GEN;
1191  snprintf (er_msg, sizeof (er_msg) - 1, "Rule :%d. Cannot create contraction.", i);
1192  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1193  goto exit;
1194  }
1195 
1196  make_coll_key (&tailor_key, COLL_KEY_TYPE_CONTR, contr_id);
1197  }
1198  else
1199  {
1200  if ((int) (*tailor_cp_list) >= lc->tail_coll.sett_max_cp)
1201  {
1202  err_status = ER_LOC_GEN;
1203  snprintf (er_msg, sizeof (er_msg) - 1, "Invalid tailoring in rule :%d." " Codepoint value too big",
1204  i);
1205  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1206  goto exit;
1207  }
1208 
1209  assert (*tailor_cp_list > 0);
1210  make_coll_key (&tailor_key, COLL_KEY_TYPE_CP, *tailor_cp_list);
1211  }
1212 
1213  err_status = apply_tailoring_rule (t_rule->direction, &anchor_key, &tailor_key, &ref_key, t_rule->level);
1214  if (err_status != NO_ERROR)
1215  {
1216  snprintf (er_msg, sizeof (er_msg) - 1, "Rule :%d", i);
1217  LOG_LOCALE_ERROR (er_msg, ER_LOC_GEN, true);
1218  goto exit;
1219  }
1220  }
1221  }
1222 
1223 exit:
1224  return err_status;
1225 }
1226 
1227 /*
1228  * compute_weights_per_level_stats - Build statistics regarding the number of
1229  * occurences of each weight on each level
1230  * in each collation element.
1231  */
1232 
1233 static int
1235 {
1236  int depth, weight_level;
1237  int cp, i;
1238  UCA_COLL_CE_LIST *uca_cp = curr_uca.coll_cp;
1239  int used_weights[MAX_WEIGHT_LEVELS];
1240  UCA_W max_weight_val[MAX_WEIGHT_LEVELS];
1241  int err_status = NO_ERROR;
1242 
1243  for (cp = 0; cp <= MAX_UCA_CODEPOINT; cp++)
1244  {
1245  for (depth = 0; depth < uca_cp[cp].num; depth++)
1246  {
1247  for (weight_level = 0; weight_level < MAX_WEIGHT_LEVELS; weight_level++)
1248  {
1249  UCA_W w = GET_UCA_WEIGHT (&(uca_cp[cp]), depth, weight_level);
1250  w_occurences[weight_level][w]++;
1251  }
1252  }
1253  }
1254 
1255  for (i = 0; i < curr_uca.count_contr; i++)
1256  {
1257  UCA_CONTRACTION *contr = &(curr_uca.coll_contr[i]);
1258 
1259  for (depth = 0; depth < contr->ce.num; depth++)
1260  {
1261  for (weight_level = 0; weight_level < MAX_WEIGHT_LEVELS; weight_level++)
1262  {
1263  UCA_W w = GET_UCA_WEIGHT (&(contr->ce), depth, weight_level);
1264  w_occurences[weight_level][w]++;
1265  }
1266  }
1267  }
1268 
1269  /* how many weight values are really used */
1270  for (weight_level = 0; weight_level < MAX_WEIGHT_LEVELS; weight_level++)
1271  {
1272  used_weights[weight_level] = 0;
1273  max_weight_val[weight_level] = 0;
1274  for (i = 0; i < MAX_UCA_WEIGHT + 1; i++)
1275  {
1276  if (w_occurences[weight_level][i] != 0)
1277  {
1278  used_weights[weight_level]++;
1279  max_weight_val[weight_level] = i;
1280  }
1281  }
1282  }
1283 
1284  if (max_weight_val[1] > 0x1ff || max_weight_val[2] > 0x7f + 1)
1285  {
1286  err_status = ER_LOC_GEN;
1287  LOG_LOCALE_ERROR ("Cannot store weights. Max weight values exceeded", ER_LOC_GEN, true);
1288  goto exit;
1289  }
1290 
1291 exit:
1292  return err_status;
1293 }
1294 
1295 #if 0
1296 /*
1297  * compact_weight_values - rewrites the weight values for one level in UCA
1298  * storage so that the weights are compact values.
1299  *
1300  * return : error code
1301  * level(in): level to compact weights
1302  * max_weight(in): max weight value at this level
1303  *
1304  * Note : this function uses the global 'w_occurences' array
1305  */
1306 static int
1307 compact_weight_values (const int level, const UCA_W max_weight)
1308 {
1309  UCA_W *w_filter = NULL;
1310  UCA_COLL_CE_LIST *uca_cp = curr_uca.coll_cp;
1311  int err_status = NO_ERROR;
1312  int cp, i, depth;
1313 
1314  assert (level >= 0 && level <= 3);
1315 
1316  w_filter = (UCA_W *) malloc ((max_weight + 1) * sizeof (UCA_W));
1317  if (w_filter == NULL)
1318  {
1319  err_status = ER_LOC_GEN;
1320  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
1321  goto exit;
1322  }
1323 
1324  memset (w_filter, 0xffff, (max_weight + 1) * sizeof (UCA_W));
1325 
1326  build_weight_remap_filter (w_occurences[level], max_weight, w_filter);
1327 
1328  /* reassign new weights and reset weight statistics for this level */
1329  memset (w_occurences[level], 0, (MAX_UCA_WEIGHT + 1) * sizeof (UCA_W));
1330 
1331  for (cp = 0; cp <= MAX_UCA_CODEPOINT; cp++)
1332  {
1333  for (depth = 0; depth < uca_cp[cp].num; depth++)
1334  {
1335  UCA_W w = GET_UCA_WEIGHT (&(uca_cp[cp]), depth, level);
1336 
1337  w = w_filter[w];
1338  assert (w != 0xffff);
1339  SET_UCA_WEIGHT (&(uca_cp[cp]), depth, level, w);
1340 
1341  w_occurences[level][w]++;
1342  }
1343  }
1344 
1345  for (i = 0; i < curr_uca.count_contr; i++)
1346  {
1347  UCA_CONTRACTION *contr = &(curr_uca.coll_contr[i]);
1348 
1349  for (depth = 0; depth < contr->ce.num; depth++)
1350  {
1351  UCA_W w = GET_UCA_WEIGHT (&(contr->ce), depth, level);
1352 
1353  w = w_filter[w];
1354  assert (w != 0xffff);
1355  SET_UCA_WEIGHT (&(contr->ce), depth, level, w);
1356 
1357  w_occurences[level][w]++;
1358  }
1359  }
1360 
1361 exit:
1362  if (w_filter != NULL)
1363  {
1364  free (w_filter);
1365  w_filter = NULL;
1366  }
1367 
1368  return err_status;
1369 }
1370 
1371 /*
1372  * build_weight_remap_filter - builds a filter for remapping weights to a
1373  * compact value range.
1374  *
1375  * return :
1376  * w_ocurr(in): occurences array (number of ocurrences for each weight value)
1377  * max_weight(in): maximum weight before compacting
1378  * w_filter(in/out): weight filter
1379  *
1380  */
1381 static void
1382 build_weight_remap_filter (const UCA_W * w_ocurr, const int max_weight, UCA_W * w_filter)
1383 {
1384  int w;
1385  int last_used_w;
1386 
1387  assert (max_weight > 0 && max_weight <= 0xffff);
1388  assert (w_ocurr != NULL);
1389 
1390  last_used_w = 0;
1391 
1392  for (w = 0; w <= max_weight; w++)
1393  {
1394  if (w_ocurr[w] > 0)
1395  {
1396  w_filter[w] = last_used_w++;
1397  }
1398  }
1399 
1400  assert (last_used_w < max_weight);
1401 }
1402 #endif
1403 
1404 /*
1405  * build_key_list_groups - builds the collation key lists for each L1 weight
1406  * value
1407  * Returns: error status
1408  * lc (in): the collation settings.
1409  */
1410 static int
1412 {
1413  int cp, wv, i;
1414  int err_status = NO_ERROR;
1415 
1416  for (wv = 0; wv <= MAX_UCA_WEIGHT; wv++)
1417  {
1418  weight_key_list[wv].list_count = 0;
1419  if (w_occurences[0][wv] == 0)
1420  {
1421  weight_key_list[wv].key_list = NULL;
1422  }
1423  else
1424  {
1425  weight_key_list[wv].key_list = (UCA_COLL_KEY *) malloc (w_occurences[0][wv] * sizeof (UCA_COLL_KEY));
1426 
1427  if (weight_key_list[wv].key_list == NULL)
1428  {
1429  err_status = ER_LOC_GEN;
1430  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
1431  goto exit;
1432  }
1433  }
1434  }
1435 
1436  for (cp = 0; cp < lc->tail_coll.sett_max_cp; cp++)
1437  {
1438  wv = GET_UCA_WEIGHT (&(curr_uca.coll_cp[cp]), 0, 0);
1439  weight_key_list[wv].key_list[weight_key_list[wv].list_count].val.cp = cp;
1440  weight_key_list[wv].key_list[weight_key_list[wv].list_count].type = COLL_KEY_TYPE_CP;
1441  weight_key_list[wv].list_count++;
1442  }
1443 
1444  for (i = 0; i < curr_uca.count_contr; i++)
1445  {
1446  wv = GET_UCA_WEIGHT (&(curr_uca.coll_contr[i].ce), 0, 0);
1447  weight_key_list[wv].key_list[weight_key_list[wv].list_count].val.contr_id = i;
1448  weight_key_list[wv].key_list[weight_key_list[wv].list_count].type = COLL_KEY_TYPE_CONTR;
1449  weight_key_list[wv].list_count++;
1450  }
1451 
1452 exit:
1453  return err_status;
1454 }
1455 
1456 /*
1457  * sort_coll_key_lists - Sorts all the collation keys lists grouped
1458  * by weight values of level 1 weight
1459  * lc(in) : the collation settings.
1460  */
1461 static void
1463 {
1464  int wv;
1465 
1466  for (wv = 0; wv <= MAX_UCA_WEIGHT; wv++)
1467  {
1468  sort_one_coll_key_list (lc, wv);
1469  }
1470 }
1471 
1472 /*
1473  * sort_one_coll_key_list - Sorts the collation keys grouped in a list
1474  * having the same weight value (= weight index)
1475  * at level 1 in UCA
1476  * lc(in) : the collation settings.
1477  * weight_index(in) : the index of the collation keys list to be sorted.
1478  *
1479  * Note : The collation keys list located at weight_key_list[weight_index] was
1480  * built from collation keys (codepoints or contractions) which have on
1481  * level 1 weight the same value
1482  */
1483 static void
1485 {
1486  assert (weight_index <= MAX_UCA_WEIGHT);
1487 
1488  if (weight_key_list[weight_index].list_count <= 0)
1489  {
1490  return;
1491  }
1492 
1493  qsort (weight_key_list[weight_index].key_list, weight_key_list[weight_index].list_count, sizeof (UCA_COLL_KEY),
1495 }
1496 
1497 /*
1498  * uca_comp_func_coll_key_fo - compare function for sorting collatable
1499  * elements according to UCA algorithm, with
1500  * full order
1501  *
1502  * Note: This function is used in the first step of computing 'next' sequence
1503  * If result of 'uca_comp_func_coll_key' is zero, the comparison
1504  * is performed on codepooints values. The purpose is to provide a
1505  * 'deterministic comparison' in order to eliminate unpredictable
1506  * results of sort algorithm (qsort) when computing 'next' fields.
1507  *
1508  */
1509 static int
1510 uca_comp_func_coll_key_fo (const void *arg1, const void *arg2)
1511 {
1512  UCA_COLL_KEY *pos1_key;
1513  UCA_COLL_KEY *pos2_key;
1514  int cmp;
1515 
1516  pos1_key = (UCA_COLL_KEY *) arg1;
1517  pos2_key = (UCA_COLL_KEY *) arg2;
1518 
1519  cmp = uca_comp_func_coll_key (arg1, arg2);
1520 
1521  if (cmp == 0)
1522  {
1523  if (pos1_key->type == pos2_key->type)
1524  {
1525  return pos1_key->val.cp - pos2_key->val.cp;
1526  }
1527  else if (pos1_key->type == COLL_KEY_TYPE_CONTR)
1528  {
1529  return 1;
1530  }
1531  else
1532  {
1533  return -1;
1534  }
1535  }
1536 
1537  return cmp;
1538 }
1539 
1540 /*
1541  * uca_comp_func_coll_key - compare function for sorting collatable elements
1542  * according to UCA algorithm
1543  *
1544  * Note: this function is used to sort collatable elements according to
1545  * CEs tables and UCA settins (sorting options)
1546  * The elements in array are of UCA_COLL_KEY type, keys which
1547  * may be Unicode points or contractions
1548  *
1549  */
1550 static int
1551 uca_comp_func_coll_key (const void *arg1, const void *arg2)
1552 {
1553  UCA_COLL_KEY *pos1_key;
1554  UCA_COLL_KEY *pos2_key;
1555  UCA_COLL_CE_LIST *ce_list1;
1556  UCA_COLL_CE_LIST *ce_list2;
1558 
1559  assert (uca_opt != NULL);
1560 
1561  pos1_key = (UCA_COLL_KEY *) arg1;
1562  pos2_key = (UCA_COLL_KEY *) arg2;
1563 
1564  ce_list1 = get_ce_list_from_coll_key (pos1_key);
1565  ce_list2 = get_ce_list_from_coll_key (pos2_key);
1566 
1567  return compare_ce_list (ce_list1, ce_list2, uca_opt);
1568 }
1569 
1570 /*
1571  * get_ce_list_from_coll_key - get collation element list associated to a
1572  * collation key using current UCA storage
1573  *
1574  * Returns: collation element list
1575  * key(in) : UCA colllation key
1576  */
1577 static UCA_COLL_CE_LIST *
1579 {
1580  assert (key != NULL);
1581 
1582  if (key->type == COLL_KEY_TYPE_CP)
1583  {
1584  if (key->val.cp <= MAX_UCA_CODEPOINT)
1585  {
1586  return &(curr_uca.coll_cp[key->val.cp]);
1587  }
1588  }
1589  else if (key->type == COLL_KEY_TYPE_CONTR)
1590  {
1591  if (key->val.contr_id < curr_uca.count_contr)
1592  {
1593  return &(curr_uca.coll_contr[key->val.contr_id].ce);
1594  }
1595  }
1596  else
1597  {
1598  assert (key->type == COLL_KEY_TYPE_EXP);
1599 
1600  if (key->val.exp_id < curr_uca.count_exp)
1601  {
1602  return &(curr_uca.coll_exp[key->val.exp_id].ce);
1603  }
1604  }
1605 
1606  return NULL;
1607 }
1608 
1609 /*
1610  * create_opt_weights - Analyze the weight_key_list, compare collation
1611  * elements fo weight keys based on locale settings,
1612  * set the new weights for the keys, create the
1613  * next_key array containing the relationship
1614  * key -> next key in collation.
1615  * Returns: error status
1616  * lc(in/out) : contains the collation settings and optimization results.
1617  */
1618 static int
1620 {
1621  UCA_COLL_KEY *equal_key_list = NULL;
1622  int weight_index;
1623  int equal_key_count, i;
1624  unsigned int current_weight;
1625  int err_status = NO_ERROR;
1626  UCA_COLL_KEY *prev_key = NULL;
1627  UCA_COLL_KEY max_cp_key;
1628  UCA_COLL_CE_LIST *prev_ce_list = NULL;
1629 
1630  weight_index = 0;
1631 
1633 
1634  equal_key_list = (UCA_COLL_KEY *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (UCA_COLL_KEY));
1635  if (equal_key_list == NULL)
1636  {
1637  err_status = ER_LOC_GEN;
1638  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
1639  goto exit;
1640  }
1641 
1642  lc->opt_coll.weights = (unsigned int *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));
1643  if (lc->opt_coll.weights == NULL)
1644  {
1645  err_status = ER_LOC_GEN;
1646  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
1647  goto exit;
1648  }
1649 
1650  lc->opt_coll.next_cp = (unsigned int *) malloc ((MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));
1651  if (lc->opt_coll.next_cp == NULL)
1652  {
1653  err_status = ER_LOC_GEN;
1654  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
1655  goto exit;
1656  }
1657 
1658  memset (lc->opt_coll.weights, 0xff, (MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));
1659 
1660  memset (lc->opt_coll.next_cp, 0xff, (MAX_UCA_CODEPOINT + 1) * sizeof (unsigned int));
1661 
1662  /* weights */
1663  current_weight = 0;
1664  for (weight_index = 0; weight_index <= MAX_UCA_WEIGHT; weight_index++)
1665  {
1666  UCA_W key_cursor;
1667 
1668  for (key_cursor = 0; key_cursor < weight_key_list[weight_index].list_count; key_cursor++)
1669  {
1670  UCA_COLL_CE_LIST *curr_ce_list = NULL;
1671  UCA_COLL_KEY *curr_key = &(weight_key_list[weight_index].key_list[key_cursor]);
1672 
1673  curr_ce_list = get_ce_list_from_coll_key (curr_key);
1674  if (curr_ce_list == NULL)
1675  {
1676  err_status = ER_LOC_GEN;
1677  LOG_LOCALE_ERROR ("Invalid collation element for key", ER_LOC_GEN, true);
1678  goto exit;
1679  }
1680 
1681  if ((prev_key != NULL) && (compare_ce_list (prev_ce_list, curr_ce_list, &(lc->tail_coll.uca_opt)) != 0))
1682  {
1683  /* keys compare differently */
1684  current_weight++;
1685  }
1686 
1687  if (curr_key->type == COLL_KEY_TYPE_CP)
1688  {
1689  assert (curr_key->val.cp >= 0 && curr_key->val.cp < lc->tail_coll.sett_max_cp);
1690  lc->opt_coll.weights[curr_key->val.cp] = current_weight;
1691  }
1692  else
1693  {
1694  assert (curr_key->type == COLL_KEY_TYPE_CONTR);
1695 
1696  err_status = add_opt_coll_contraction (lc, curr_key, current_weight, false);
1697  if (err_status != NO_ERROR)
1698  {
1699  goto exit;
1700  }
1701  }
1702 
1703  prev_key = curr_key;
1704  prev_ce_list = curr_ce_list;
1705  }
1706  }
1707 
1708  /* set 'next' values */
1709  equal_key_count = 0;
1710  for (weight_index = 0; weight_index <= MAX_UCA_WEIGHT; weight_index++)
1711  {
1712  UCA_W key_cursor;
1713 
1714  for (key_cursor = 0; key_cursor < weight_key_list[weight_index].list_count; key_cursor++)
1715  {
1716  UCA_COLL_CE_LIST *curr_ce_list = NULL;
1717  UCA_COLL_KEY *curr_key = &(weight_key_list[weight_index].key_list[key_cursor]);
1718 
1719  curr_ce_list = get_ce_list_from_coll_key (curr_key);
1720  if (curr_ce_list == NULL)
1721  {
1722  err_status = ER_LOC_GEN;
1723  LOG_LOCALE_ERROR ("Invalid collation element for key", ER_LOC_GEN, true);
1724  goto exit;
1725  }
1726 
1727  if ((prev_key != NULL) && (compare_ce_list (prev_ce_list, curr_ce_list, &(lc->tail_coll.uca_opt)) != 0))
1728  {
1729  /* keys compare differently */
1730  /* set next key for all previous equal keys */
1731  for (i = 0; i < equal_key_count; i++)
1732  {
1733  set_next_value_for_coll_key (lc, &(equal_key_list[i]), curr_key);
1734  }
1735 
1736  equal_key_count = 0;
1737  }
1738 
1739  memcpy (&(equal_key_list[equal_key_count++]), curr_key, sizeof (UCA_COLL_KEY));
1740 
1741  prev_key = curr_key;
1742  prev_ce_list = curr_ce_list;
1743  }
1744  }
1745 
1746  /* set 'next' for remaining collation key to max codepoint */
1747  make_coll_key (&max_cp_key, COLL_KEY_TYPE_CP, lc->tail_coll.sett_max_cp - 1);
1748 
1749  for (i = 0; i < equal_key_count; i++)
1750  {
1751  set_next_value_for_coll_key (lc, &(equal_key_list[i]), &max_cp_key);
1752  }
1753 
1755 
1756  for (i = 0; i < lc->opt_coll.w_count; i++)
1757  {
1758  if (lc->opt_coll.weights[i] == 0xffffffff || lc->opt_coll.next_cp[i] == 0xffffffff)
1759  {
1760  err_status = ER_LOC_GEN;
1761  LOG_LOCALE_ERROR ("Internal error. Generated " "weight value or next CP value is invalid", ER_LOC_GEN, true);
1762  goto exit;
1763  }
1764  }
1765 
1766  /* optimize contractions */
1767  if (lc->opt_coll.count_contr > 0)
1768  {
1769  err_status = optimize_coll_contractions (lc);
1770  }
1771 
1772 exit:
1773  if (equal_key_list != NULL)
1774  {
1775  free (equal_key_list);
1776  equal_key_list = NULL;
1777  }
1778 
1779  return err_status;
1780 }
1781 
1782 /*
1783  * optimize_coll_contractions - optimizes collation contractions list so that
1784  * contractions are stored in binary ascending
1785  * order
1786  * Returns: error status
1787  *
1788  * lc(in/out) : contains the collation settings and optimization results
1789  */
1790 static int
1792 {
1793  UCA_COLL_CONTR_ID *initial_coll_tag = NULL;
1794  int i;
1795  int err_status = NO_ERROR;
1796  int cp;
1797 
1798  assert (lc != NULL);
1799  assert (lc->opt_coll.count_contr > 0);
1800 
1801  initial_coll_tag = (UCA_COLL_CONTR_ID *) malloc (lc->opt_coll.count_contr * sizeof (UCA_COLL_CONTR_ID));
1802  if (initial_coll_tag == NULL)
1803  {
1804  err_status = ER_LOC_GEN;
1805  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
1806  goto exit;
1807  }
1808 
1809  for (i = 0; i < lc->opt_coll.count_contr; i++)
1810  {
1811  memcpy (&(initial_coll_tag[i].contr_ref), &(lc->opt_coll.contr_list[i]), sizeof (COLL_CONTRACTION));
1812  initial_coll_tag[i].pos_id = i;
1813  }
1814 
1815  /* sort contractions (binary order) */
1816  qsort (initial_coll_tag, lc->opt_coll.count_contr, sizeof (UCA_COLL_CONTR_ID), comp_func_coll_contr_bin);
1817 
1818  /* adjust 'next' contractions values for all codepoints */
1819  for (cp = 0; cp < lc->opt_coll.w_count; cp++)
1820  {
1821  unsigned int next_seq = lc->opt_coll.next_cp[cp];
1822  int curr_idx = -1;
1823  int opt_idx;
1824  bool found = false;
1825 
1826  if (!INTL_IS_NEXT_CONTR (next_seq))
1827  {
1828  continue;
1829  }
1830 
1831  curr_idx = INTL_GET_NEXT_CONTR_ID (next_seq);
1832 
1833  assert (curr_idx < lc->opt_coll.count_contr);
1834 
1835  /* find index in sorted contractions */
1836  for (opt_idx = 0; opt_idx < lc->opt_coll.count_contr; opt_idx++)
1837  {
1838  if (initial_coll_tag[opt_idx].pos_id == curr_idx)
1839  {
1840  found = true;
1841  break;
1842  }
1843  }
1844 
1845  if (!found)
1846  {
1847  err_status = ER_LOC_GEN;
1848  LOG_LOCALE_ERROR ("Internal error. Cannot adjust " "contraction id after optimization", ER_LOC_GEN, true);
1849  goto exit;
1850  }
1851 
1852  assert (found == true);
1853 
1854  lc->opt_coll.next_cp[cp] = opt_idx | INTL_MASK_CONTR;
1855  }
1856 
1857  /* adjust 'next' contractions values for all contractions */
1858  for (i = 0; i < lc->opt_coll.count_contr; i++)
1859  {
1860  unsigned int next_seq = initial_coll_tag[i].contr_ref.next;
1861  int curr_idx = -1;
1862  int opt_idx;
1863  bool found = false;
1864 
1865  if (!INTL_IS_NEXT_CONTR (next_seq))
1866  {
1867  continue;
1868  }
1869 
1870  curr_idx = INTL_GET_NEXT_CONTR_ID (next_seq);
1871 
1872  assert (curr_idx < lc->opt_coll.count_contr);
1873 
1874  /* find index in sorted contractions */
1875  for (opt_idx = 0; opt_idx < lc->opt_coll.count_contr; opt_idx++)
1876  {
1877  if (initial_coll_tag[opt_idx].pos_id == curr_idx)
1878  {
1879  found = true;
1880  break;
1881  }
1882  }
1883 
1884  if (!found)
1885  {
1886  err_status = ER_LOC_GEN;
1887  LOG_LOCALE_ERROR ("Internal error. Cannot adjust " "contraction id after optimization", ER_LOC_GEN, true);
1888  goto exit;
1889  }
1890 
1891  assert (found == true);
1892 
1893  initial_coll_tag[i].contr_ref.next = opt_idx | INTL_MASK_CONTR;
1894  }
1895 
1896  /* overwrite contractions in sorted order */
1897  for (i = 0; i < lc->opt_coll.count_contr; i++)
1898  {
1899  memcpy (&(lc->opt_coll.contr_list[i]), &(initial_coll_tag[i].contr_ref), sizeof (COLL_CONTRACTION));
1900  }
1901 
1902  /* first contraction index array */
1903  lc->opt_coll.cp_first_contr_array = (int *) malloc (lc->opt_coll.w_count * sizeof (int));
1904  if (lc->opt_coll.cp_first_contr_array == NULL)
1905  {
1906  err_status = ER_LOC_GEN;
1907  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
1908  goto exit;
1909  }
1910 
1911  for (cp = 0; cp < lc->opt_coll.w_count; cp++)
1912  {
1913  lc->opt_coll.cp_first_contr_array[cp] = -1;
1914  }
1915 
1917 
1918  for (i = 0; i < lc->opt_coll.count_contr; i++)
1919  {
1920  unsigned char *c_buf = (unsigned char *) lc->opt_coll.contr_list[i].c_buf;
1921  unsigned char *dummy;
1922  int c_buf_size = strlen ((char *) c_buf);
1923  unsigned int cp;
1924 
1926  lc->opt_coll.contr_list[i].size = (unsigned char) c_buf_size;
1927 
1928  lc->opt_coll.contr_min_size = MIN (lc->opt_coll.contr_min_size, c_buf_size);
1929 
1930  /* get first code-point */
1931  cp = intl_utf8_to_cp (c_buf, c_buf_size, &dummy);
1932 
1933  if (cp < (unsigned int) lc->opt_coll.w_count && lc->opt_coll.cp_first_contr_array[cp] == -1)
1934  {
1935  lc->opt_coll.cp_first_contr_array[cp] = i;
1936  }
1937  }
1938 
1939  /* compute interval of codepoints with contractions */
1941  for (cp = 0; cp < lc->opt_coll.w_count; cp++)
1942  {
1943  if (lc->opt_coll.cp_first_contr_array[cp] != -1)
1944  {
1946  break;
1947  }
1948  }
1949 
1951 
1952  for (cp = lc->opt_coll.w_count - 1; cp >= 0; cp--)
1953  {
1954  if (lc->opt_coll.cp_first_contr_array[cp] != -1)
1955  {
1957  break;
1958  }
1959  }
1960 
1962 
1963  if (lc->opt_coll.cp_first_contr_offset > 0)
1964  {
1965  for (i = 0; i < (int) lc->opt_coll.cp_first_contr_count; i++)
1966  {
1969  }
1970  }
1971 
1972 exit:
1973  if (initial_coll_tag != NULL)
1974  {
1975  free (initial_coll_tag);
1976  initial_coll_tag = NULL;
1977  }
1978 
1979  return err_status;
1980 }
1981 
1982 /*
1983  * set_next_value_for_coll_key - sets the next collation key
1984  * Returns: error status
1985  *
1986  * lc(in/out) : contains the collation settings and optimization results
1987  * coll_key(in) : collation key
1988  * next_key(in) : next key value to set
1989  */
1990 static int
1991 set_next_value_for_coll_key (LOCALE_COLLATION * lc, const UCA_COLL_KEY * coll_key, const UCA_COLL_KEY * next_key)
1992 {
1993  /* set next key for all previous equal keys */
1994  if (next_key->type == COLL_KEY_TYPE_CP)
1995  {
1996  if (coll_key->type == COLL_KEY_TYPE_CP)
1997  {
1998  lc->opt_coll.next_cp[coll_key->val.cp] = next_key->val.cp;
1999  }
2000  else
2001  {
2002  assert (coll_key->type == COLL_KEY_TYPE_CONTR);
2003  assert (coll_key->val.contr_id < lc->opt_coll.count_contr);
2004  lc->opt_coll.contr_list[coll_key->val.contr_id].next = next_key->val.cp;
2005  }
2006  }
2007  else
2008  {
2009  assert (next_key->type == COLL_KEY_TYPE_CONTR);
2010  assert (next_key->val.contr_id < lc->opt_coll.count_contr);
2011 
2012  if (coll_key->type == COLL_KEY_TYPE_CP)
2013  {
2014  lc->opt_coll.next_cp[coll_key->val.cp] = next_key->val.contr_id | INTL_MASK_CONTR;
2015  }
2016  else
2017  {
2018  assert (coll_key->type == COLL_KEY_TYPE_CONTR);
2019  assert (coll_key->val.contr_id < lc->opt_coll.count_contr);
2020 
2021  lc->opt_coll.contr_list[coll_key->val.contr_id].next = next_key->val.contr_id | INTL_MASK_CONTR;
2022  }
2023  }
2024 
2025  return NO_ERROR;
2026 }
2027 
2028 /*
2029  * add_opt_coll_contraction - adds an contraction item to optimized collation
2030  * Returns: error status
2031  *
2032  * lc(in/out) : contains the collation settings and optimization results
2033  * contr_key(in) : collation key
2034  * wv(in) : weight value to assign for optimized contraction
2035  * use_expansions(in) :
2036  */
2037 static int
2038 add_opt_coll_contraction (LOCALE_COLLATION * lc, const UCA_COLL_KEY * contr_key, const unsigned int wv,
2039  bool use_expansions)
2040 {
2041  COLL_CONTRACTION *opt_contr = NULL;
2042  UCA_CONTRACTION *uca_contr;
2043  char *p_buf = NULL;
2044  int err_status = NO_ERROR;
2045  int i;
2046 
2047  assert (contr_key != NULL);
2048  assert (contr_key->type == COLL_KEY_TYPE_CONTR);
2049 
2050  assert (contr_key->val.contr_id < curr_uca.count_contr);
2051  uca_contr = &(curr_uca.coll_contr[contr_key->val.contr_id]);
2052 
2053  lc->opt_coll.contr_list =
2054  (COLL_CONTRACTION *) realloc (lc->opt_coll.contr_list, (lc->opt_coll.count_contr + 1) * sizeof (COLL_CONTRACTION));
2055 
2056  if (lc->opt_coll.contr_list == NULL)
2057  {
2058  err_status = ER_LOC_GEN;
2059  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
2060  goto exit;
2061  }
2062 
2063  opt_contr = &(lc->opt_coll.contr_list[lc->opt_coll.count_contr++]);
2064  memset (opt_contr, 0, sizeof (COLL_CONTRACTION));
2065 
2066  p_buf = opt_contr->c_buf;
2067 
2068  assert (uca_contr->cp_count > 1);
2069 
2070  for (i = 0; i < uca_contr->cp_count; i++)
2071  {
2072  int utf8_size = intl_cp_to_utf8 (uca_contr->cp_list[i],
2073  (unsigned char *) p_buf);
2074  p_buf += utf8_size;
2075  }
2076  opt_contr->cp_count = uca_contr->cp_count;
2077  *p_buf = '\0';
2078 
2079  opt_contr->wv = wv;
2080  assert (p_buf - opt_contr->c_buf < (int) sizeof (opt_contr->c_buf));
2081  opt_contr->size = (unsigned char) (p_buf - opt_contr->c_buf);
2082 
2083  if (use_expansions)
2084  {
2085  UCA_COLL_CE_LIST *ce_list;
2086 
2087  memset (opt_contr->uca_w_l13, 0, sizeof (opt_contr->uca_w_l13));
2089  {
2090  memset (opt_contr->uca_w_l4, 0, sizeof (opt_contr->uca_w_l4));
2091  }
2092 
2093  ce_list = get_ce_list_from_coll_key (contr_key);
2094 
2095  assert (ce_list != NULL);
2096  opt_contr->uca_num = ce_list->num;
2097 
2098  assert (opt_contr->uca_num > 0);
2099 
2100  build_compressed_uca_w_l13 (ce_list, opt_contr->uca_w_l13);
2102  {
2103  for (i = 0; i < MAX_UCA_EXP_CE; i++)
2104  {
2105  opt_contr->uca_w_l4[i] = GET_UCA_WEIGHT (ce_list, i, 3);
2106  }
2107  }
2108  }
2109 
2110 exit:
2111  return err_status;
2112 }
2113 
2114 /*
2115  * uca_free_data - Unloads data used by UCA
2116  * Returns:
2117  */
2118 void
2120 {
2121  if (ducet.coll_cp != NULL)
2122  {
2123  free (ducet.coll_cp);
2124  ducet.coll_cp = NULL;
2125  }
2126 
2127  if (ducet.coll_contr)
2128  {
2129  free (ducet.coll_contr);
2130  ducet.coll_contr = NULL;
2131  }
2132 
2133  assert (ducet.coll_exp == NULL);
2134 
2135  ducet.max_contr = 0;
2136  ducet.count_contr = 0;
2137 
2138  *(ducet.prev_file_path) = '0';
2139 }
2140 
2141 /*
2142  * apply_tailoring_rule_cp - Calls the functions which apply the rules
2143  * based on their type.
2144  * Returns: ER_LOC_GEN if cannot apply rule, or the rule has an invald type;
2145  * ER_OUT_OF_VIRTUAL_MEMORY if moving the tailored codepoint inside
2146  * the weight stats array fails;
2147  * NO_ERROR if tailoring is successful.
2148  * dir(in) : 0 = after, 1 = before.
2149  * anchor_key(in) : the anchor for which the rule is applied.
2150  * key(in) : key to be tailored.
2151  * ref_key(in) : the key previously tailored (or anchor if this is the
2152  * first rule having "anchor" as the anchor key).
2153  * lvl(in) : weight level used for tailoring (see T_LEVEL for values).
2154  *
2155  * Note : Alter the collation elements of the collated element in order for
2156  * the keys to comply with the specified rule after collation and
2157  * optimizations.
2158  * If the rule is a "before" rule, checks if it can be applied.
2159  */
2160 static int
2162  T_LEVEL lvl)
2163 {
2164  if (lvl == TAILOR_IDENTITY)
2165  {
2166  return apply_tailoring_rule_identity (key, ref_key);
2167  }
2168 
2169  return apply_tailoring_rule_w_dir (dir, anchor_key, key, ref_key, lvl);
2170 }
2171 
2172 /*
2173  * apply_tailoring_rule_identity - Apply an identity tailoring rule.
2174  *
2175  * Returns: error status
2176  * key(in) : collation key to be tailored.
2177  * ref_key(in) : collation key to which we tailor key as identical.
2178  */
2179 static int
2181 {
2182  UCA_W current_weight;
2183  int err_status = NO_ERROR;
2184  UCA_COLL_CE_LIST *ce_list_key = NULL;
2185  UCA_COLL_CE_LIST *ce_list_ref_key = NULL;
2186 
2187  assert (key != NULL);
2188  assert (ref_key != NULL);
2189 
2190  ce_list_key = get_ce_list_from_coll_key (key);
2191  ce_list_ref_key = get_ce_list_from_coll_key (ref_key);
2192 
2193  if (ce_list_key == NULL || ce_list_ref_key == NULL)
2194  {
2195  err_status = ER_LOC_GEN;
2196  LOG_LOCALE_ERROR ("Cannot apply identity rule. Collation key not found", ER_LOC_GEN, true);
2197  goto exit;
2198  }
2199 
2200  /* Make sure the reference is represented in DUCET. */
2201  assert (ce_list_ref_key->num > 0);
2202 
2203  current_weight = GET_UCA_WEIGHT (ce_list_key, 0, 0);
2204  if (current_weight != GET_UCA_WEIGHT (ce_list_ref_key, 0, 0))
2205  {
2206  err_status = change_key_weight_list (key, current_weight, GET_UCA_WEIGHT (ce_list_ref_key, 0, 0));
2207  if (err_status != NO_ERROR)
2208  {
2209  goto exit;
2210  }
2211  }
2212 
2213  assert (ce_list_key != ce_list_ref_key);
2214  memcpy (ce_list_key, ce_list_ref_key, sizeof (UCA_COLL_CE_LIST));
2215 
2216 exit:
2217  return err_status;
2218 }
2219 
2220 /*
2221  * apply_tailoring_rule_w_dir - Applies an After/Before -rule
2222  * Returns: error status
2223  *
2224  * dir(in) : direction : 0 after, 1 before
2225  * anchor_key(in): the anchor for which the rule is applied.
2226  * key(in) : key to be tailored.
2227  * ref_key(in) : the key previously tailored (or anchor if this is the
2228  * first rule having "anchor" as the anchor key).
2229  * lvl(in) : weight level used for tailoring (see T_LEVEL for values).
2230  *
2231  * Note : Alter the collation elements of the collated element in order for
2232  * the symbols to comply with the specified rule after collation and
2233  * optimizations.
2234  * Collisions are avoided.
2235  */
2236 static int
2238  T_LEVEL lvl)
2239 {
2240  int i, j;
2241  bool collation_finished, overflow = false;
2242  UCA_W current_weight;
2243  UCA_COLL_CE_LIST new_ce;
2244  int err_status = NO_ERROR;
2245  UCA_COLL_CE_LIST *ce_list_key = NULL;
2246  UCA_COLL_CE_LIST *ce_list_anchor_key = NULL;
2247  UCA_COLL_CE_LIST *ce_list_ref_key = NULL;
2248 
2249  assert (key != NULL);
2250  assert (ref_key != NULL);
2251 
2252  ce_list_key = get_ce_list_from_coll_key (key);
2253  ce_list_ref_key = get_ce_list_from_coll_key (ref_key);
2254  ce_list_anchor_key = get_ce_list_from_coll_key (anchor_key);
2255 
2256  if (ce_list_key == NULL || ce_list_ref_key == NULL || ce_list_anchor_key == NULL)
2257  {
2258  err_status = ER_LOC_GEN;
2259  LOG_LOCALE_ERROR ("Cannot apply identity rule. Collation key not found", ER_LOC_GEN, true);
2260  goto exit;
2261  }
2262 
2263  /* Make sure the anchor and ref codepoint are represented in DUCET. */
2264  assert (ce_list_anchor_key->num > 0);
2265  assert (ce_list_ref_key->num > 0);
2266 
2267  /* Clone anchor weights (uca element full clone) */
2268  memcpy (&new_ce, ce_list_anchor_key, sizeof (UCA_COLL_CE_LIST));
2269 
2270  new_ce.num = MAX (new_ce.num, ce_list_ref_key->num);
2271 
2272  /* overwrite with reference weights up to level */
2273  for (j = 0; j < (int) lvl; j++)
2274  {
2275  for (i = 0; i < (int) (new_ce.num); i++)
2276  {
2277  SET_UCA_WEIGHT (&(new_ce), i, j, GET_UCA_WEIGHT (ce_list_ref_key, i, j));
2278  }
2279  }
2280 
2281  collation_finished = false;
2282 
2283  while (!collation_finished)
2284  {
2285  if (new_ce.num > MAX_UCA_EXP_CE)
2286  {
2287  err_status = ER_LOC_GEN;
2288  LOG_LOCALE_ERROR ("Error when applying after-rule. " "Collation element list overflow.", ER_LOC_GEN, true);
2289  goto exit;
2290  }
2291 
2292  assert (new_ce.num >= 1);
2293  assert (lvl >= 1);
2294 
2295  if (dir == TAILOR_AFTER)
2296  {
2297  UCA_W w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
2298  /* AFTER */
2299  overflow = false;
2300 
2301  /* Try increasing the weight value on the last collation element. */
2302  if (w_val >= MAX_UCA_WEIGHT)
2303  {
2304  new_ce.num++;
2305  overflow = true;
2306  }
2307 
2308  w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
2309  SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, w_val + 1);
2310  }
2311  else
2312  {
2313  UCA_W w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
2314  /* BEFORE */
2315  assert (dir == TAILOR_BEFORE);
2316 
2317  /* Try decreasing the weight value on the last collation element. */
2318  if (w_val > 0)
2319  {
2320  SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, w_val - 1);
2321  }
2322  else
2323  {
2324  int ce_index = (int) (new_ce.num);
2325 
2326  while (ce_index > 0 && GET_UCA_WEIGHT (&new_ce, ce_index - 1, lvl - 1) == 0)
2327  {
2328  ce_index--;
2329  }
2330 
2331  if (ce_index <= 0)
2332  {
2333  err_status = ER_LOC_GEN;
2334  LOG_LOCALE_ERROR ("Applying before-rule. Collation element" " list underflow.", ER_LOC_GEN, true);
2335  goto exit;
2336  }
2337  SET_UCA_WEIGHT (&new_ce, ce_index - 1, lvl - 1, MAX_UCA_WEIGHT);
2338  }
2339  }
2340 
2341  if (get_key_with_ce_sublist (&new_ce, lvl) != NULL)
2342  {
2343  /* If a collision occurs, go further into the collation element list */
2344 
2345  if (dir == TAILOR_AFTER)
2346  {
2347  /* AFTER */
2348  if (!overflow)
2349  {
2350  UCA_W w_val = GET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1);
2351  /* Revert weight increase */
2352  SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, w_val - 1);
2353  }
2354  /* Add a new collation element. */
2355  new_ce.num++;
2356  }
2357  else
2358  {
2359  /* BEFORE */
2360  assert (dir == TAILOR_BEFORE);
2361 
2362  /* Add a new collation element. */
2363  new_ce.num++;
2364  /* Maximize the weight on the new collation element. */
2365  SET_UCA_WEIGHT (&new_ce, new_ce.num - 1, lvl - 1, MAX_UCA_WEIGHT);
2366  }
2367  }
2368  else
2369  {
2370  collation_finished = true;
2371  }
2372  }
2373 
2374  current_weight = GET_UCA_WEIGHT (ce_list_key, 0, 0);
2375  if (GET_UCA_WEIGHT (&new_ce, 0, 0) != current_weight)
2376  {
2377  err_status = change_key_weight_list (key, current_weight, GET_UCA_WEIGHT (&new_ce, 0, 0));
2378  if (err_status != NO_ERROR)
2379  {
2380  goto exit;
2381  }
2382  }
2383 
2384  memcpy (ce_list_key, &new_ce, sizeof (UCA_COLL_CE_LIST));
2385 
2386 exit:
2387  return err_status;
2388 }
2389 
2390 /*
2391  * get_key_with_ce_sublist - Tests if a collation key's collation element list
2392  * matches uca_item up to lvl(level) and
2393  * uca_item.num.
2394  * Returns : collation key if found, or
2395  * NULL if no partial match was found.
2396  * uca_item(in) : an object of type UCA_COLL_CE_LIST, containing the list of
2397  * collation elements to search for.
2398  * lvl(in) : the level up to which the matching should be done.
2399  */
2400 static UCA_COLL_KEY *
2401 get_key_with_ce_sublist (UCA_COLL_CE_LIST * uca_item, const int lvl)
2402 {
2403  UCA_W weight_index;
2404  int i, ce_index, level_index;
2405  bool found;
2406 
2407  assert (uca_item != NULL);
2408 
2409  weight_index = GET_UCA_WEIGHT (uca_item, 0, 0);
2410 
2411  if (weight_key_list[weight_index].list_count == 0)
2412  {
2413  return NULL;
2414  }
2415 
2416  for (i = 0; i < weight_key_list[weight_index].list_count; i++)
2417  {
2418  UCA_COLL_KEY *key = &(weight_key_list[weight_index].key_list[i]);
2420 
2421  if (ce_list == NULL)
2422  {
2423  continue;
2424  }
2425 
2426  found = true;
2427  for (level_index = 0; level_index < lvl && found; level_index++)
2428  {
2429  for (ce_index = 0; ce_index < uca_item->num; ce_index++)
2430  {
2431  if (GET_UCA_WEIGHT (ce_list, ce_index, level_index) != GET_UCA_WEIGHT (uca_item, ce_index, level_index))
2432  {
2433  found = false;
2434  break;
2435  }
2436  }
2437  }
2438 
2439  if (found)
2440  {
2441  return key;
2442  }
2443  }
2444 
2445  return NULL;
2446 }
2447 
2448 /*
2449  * make_coll_key - creates a collation key
2450  * Returns :
2451  * key(in/out) : key to create
2452  * type(in) : key type
2453  * key_id(in) : collation key identifier (codepoint or cotraction key)
2454  */
2455 static void
2456 make_coll_key (UCA_COLL_KEY * key, UCA_COLL_KEY_TYPE type, const int key_id)
2457 {
2458  assert (key != NULL);
2459 
2460  assert (key_id >= 0);
2461 
2462  memset (key, 0, sizeof (UCA_COLL_KEY));
2463 
2464  if (type == COLL_KEY_TYPE_CP)
2465  {
2466  key->type = COLL_KEY_TYPE_CP;
2467  key->val.cp = key_id;
2468  }
2469  else if (type == COLL_KEY_TYPE_CONTR)
2470  {
2471  key->type = COLL_KEY_TYPE_CONTR;
2472  key->val.contr_id = key_id;
2473  }
2474  else
2475  {
2476  assert (type == COLL_KEY_TYPE_EXP);
2477 
2478  key->type = COLL_KEY_TYPE_EXP;
2479  key->val.exp_id = key_id;
2480  }
2481 }
2482 
2483 /*
2484  * find_contr_id - searches for a contraction
2485  * Returns : id of contraction or -1 if not found
2486  * cp_array(in) : codepoints array to create key
2487  * cp_count(in) : # of codepoints in array
2488  * st(in) : storage to check
2489  */
2490 static int
2491 find_contr_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st)
2492 {
2493  bool found;
2494  int i;
2495 
2496  assert (cp_array != NULL);
2497  assert (cp_count > 0);
2498  assert (st != NULL);
2499 
2500  if (cp_count > LOC_MAX_UCA_CHARS_SEQ)
2501  {
2502  return -1;
2503  }
2504 
2505  found = false;
2506  for (i = 0; i < st->count_contr; i++)
2507  {
2508  int j;
2509  UCA_CP *st_cp_list;
2510 
2511  if (cp_count != st->coll_contr[i].cp_count)
2512  {
2513  continue;
2514  }
2515 
2516  st_cp_list = st->coll_contr[i].cp_list;
2517 
2518  found = true;
2519  for (j = 0; j < cp_count; j++)
2520  {
2521  assert (cp_array[j] < MAX_UNICODE_CHARS);
2522 
2523  if (cp_array[j] != (unsigned int) (st_cp_list[j]))
2524  {
2525  found = false;
2526  break;
2527  }
2528  }
2529 
2530  if (found)
2531  {
2532  return i;
2533  }
2534  }
2535 
2536  return -1;
2537 }
2538 
2539 /*
2540  * find_exp_id - searches for an expansion
2541  * Returns : id of expansion or -1 if not found
2542  * cp_array(in) : codepoints array to create key
2543  * cp_count(in) : # of codepoints in array
2544  * st(in) : storage to check
2545  */
2546 static int
2547 find_exp_id (const unsigned int *cp_array, const int cp_count, UCA_STORAGE * st)
2548 {
2549  bool found;
2550  int i;
2551 
2552  assert (cp_array != NULL);
2553  assert (cp_count > 0);
2554  assert (st != NULL);
2555 
2556  if (cp_count > LOC_MAX_UCA_CHARS_SEQ)
2557  {
2558  return -1;
2559  }
2560 
2561  found = false;
2562  for (i = 0; i < st->count_exp; i++)
2563  {
2564  int j;
2565  UCA_CP *st_cp_list;
2566 
2567  if (cp_count != st->coll_exp[i].cp_count)
2568  {
2569  continue;
2570  }
2571 
2572  st_cp_list = st->coll_exp[i].cp_list;
2573 
2574  found = true;
2575  for (j = 0; j < cp_count; j++)
2576  {
2577  assert (cp_array[j] < MAX_UNICODE_CHARS);
2578 
2579  if (cp_array[j] != (unsigned int) (st_cp_list[j]))
2580  {
2581  found = false;
2582  break;
2583  }
2584  }
2585 
2586  if (found)
2587  {
2588  return i;
2589  }
2590  }
2591 
2592  return -1;
2593 }
2594 
2595 /*
2596  * string_to_coll_ce_list - Parse a string to a collation element list
2597  * Retuns : NO_ERROR(0) if parsing is successful;
2598  * ER_LOC_GEN if parsing fails.
2599  * s (in) : NULL terminated string to parse.
2600  * ui (in/out) : parsed collation element list.
2601  */
2602 static int
2604 {
2605  UCA_COLL_CE_LIST uca_item;
2606  int weight_index, ce_index, state;
2607  UCA_W weight;
2608  bool error_found;
2609  char *str;
2610  char *end_ptr;
2611  char c;
2612  int err_status = NO_ERROR;
2613 
2614  memset (&uca_item, 0, sizeof (UCA_COLL_CE_LIST));
2615 
2616  str = s;
2617  weight_index = 0;
2618  ce_index = 0;
2619  state = 0;
2620  error_found = false;
2621  while (strlen (str) > 0 && !error_found)
2622  {
2623  int result = 0;
2624  int val;
2625 
2626  switch (state)
2627  {
2628  case 0: /* read a '[' (first char from the standard string representation * of a collation
2629  * element) */
2630  c = str[0];
2631  if (c != '[')
2632  {
2633  error_found = true;
2634  break;
2635  }
2636  str++;
2637  state = 1;
2638  break;
2639  case 1: /* read a weight value in string format, in hex */
2640  if (weight_index == MAX_WEIGHT_LEVELS)
2641  {
2642  state = 3;
2643  break;
2644  }
2645  /* validate weight, to be below 0xFFFF */
2646  result = str_to_int32 (&val, &end_ptr, str, 16);
2647  if (result != 0 || val > MAX_UCA_WEIGHT)
2648  {
2649  error_found = true;
2650  break;
2651  }
2652  weight = (UCA_W) val;
2653  SET_UCA_WEIGHT (&uca_item, ce_index, weight_index, weight);
2654  str = end_ptr;
2655  if (weight_index != 3)
2656  {
2657  state = 2;
2658  }
2659  else
2660  {
2661  state = 3;
2662  }
2663  weight_index++;
2664  break;
2665  case 2: /* Read a dot '.' = the weight separator inside the * string representation of a
2666  * collation element */
2667  c = str[0];
2668  if (c != '.')
2669  {
2670  error_found = true;
2671  break;
2672  }
2673  str++;
2674  state = 1;
2675  break;
2676  case 3: /* Read a ']' (last char from the standard string representation * of a collation
2677  * element) */
2678  c = str[0];
2679  if (c != ']')
2680  {
2681  error_found = true;
2682  break;
2683  }
2684  str++;
2685  state = 0;
2686  weight_index = 0;
2687  ce_index++;
2688  break;
2689  }
2690  }
2691 
2692  if (!error_found)
2693  {
2694  uca_item.num = ce_index;
2695  memcpy (ui, &uca_item, sizeof (UCA_COLL_CE_LIST));
2696  }
2697 
2698  if (error_found)
2699  {
2700  switch (state)
2701  {
2702  case 0:
2703  LOG_LOCALE_ERROR ("Invalid collation element list. '[' expected.", ER_LOC_GEN, true);
2704  break;
2705  case 1:
2706  LOG_LOCALE_ERROR ("Invalid collation element list." "Weight out of 0x0000 - 0xFFFF range.", ER_LOC_GEN, true);
2707  break;
2708  case 2:
2709  LOG_LOCALE_ERROR ("Invalid collation element list. '.' expected.", ER_LOC_GEN, true);
2710  break;
2711  case 3:
2712  LOG_LOCALE_ERROR ("Invalid collation element list. ']' expected.", ER_LOC_GEN, true);
2713  break;
2714  }
2715  err_status = ER_LOC_GEN;
2716  }
2717 
2718  return err_status;
2719 }
2720 
2721 /*
2722  * apply_absolute_tailoring_rules - Function for applying the tailor
2723  * rules strored in lc->tail_coll.cub_rules.
2724  * Retuns : ER_LOC_GEN if an invalid rule is found;
2725  * NO_ERROR(0) if parsing is successful.
2726  * lc (in/out) : locale settings and optimization results.
2727  */
2728 static int
2730 {
2731  int rule_index, weight_index;
2732  int ce_index;
2734  UCA_COLL_CE_LIST step;
2735  UCA_COLL_CE_LIST weight_offset;
2736  UCA_CP cp_index;
2737  bool is_overflow;
2738  bool is_ce_empty;
2739  UCA_CP start_cp;
2740  UCA_CP end_cp;
2741  int err_status = NO_ERROR;
2742  CUBRID_TAILOR_RULE *ct_rule;
2743  UCA_COLL_CE_LIST *uca_cp = curr_uca.coll_cp;
2744 
2745  for (rule_index = 0; rule_index < lc->tail_coll.cub_count_rules; rule_index++)
2746  {
2747  ct_rule = &(lc->tail_coll.cub_rules[rule_index]);
2748  if (strlen (ct_rule->step) == 0)
2749  {
2750  strcpy (ct_rule->step, "[0001.0000.0000.0000]\0");
2751  }
2752  if (string_to_coll_ce_list (ct_rule->step, &step) != 0)
2753  {
2754  err_status = ER_LOC_GEN;
2755  LOG_LOCALE_ERROR ("Invalid collation element list for range step.", ER_LOC_GEN, true);
2756  goto exit;
2757  }
2758  if (string_to_coll_ce_list (ct_rule->start_weight, &weight) != 0)
2759  {
2760  err_status = ER_LOC_GEN;
2761  LOG_LOCALE_ERROR ("Invalid collation element list for starting weight.", ER_LOC_GEN, true);
2762  goto exit;
2763  }
2764 
2765  /* Parse the char buffers for start and end codepoint */
2766  if (read_cp_from_tag ((unsigned char *) (ct_rule->start_cp_buf), ct_rule->start_cp_buf_type, &start_cp) !=
2767  NO_ERROR)
2768  {
2769  goto exit;
2770  }
2771  if (read_cp_from_tag ((unsigned char *) (ct_rule->end_cp_buf), ct_rule->end_cp_buf_type, &end_cp) != NO_ERROR)
2772  {
2773  goto exit;
2774  }
2775 
2776  /* Validate starting weight, step and the number of codepoints in the range to be tailored, so that there are no
2777  * overflows above MAX_WEIGHT (0xFFFF). */
2778  is_overflow = false;
2779  for (weight_index = 0; weight_index < MAX_WEIGHT_LEVELS; weight_index++)
2780  {
2781  for (ce_index = 0; ce_index < step.num; ce_index++)
2782  {
2783  if (GET_UCA_WEIGHT (&step, ce_index, weight_index) * (end_cp - start_cp) > MAX_UCA_WEIGHT)
2784  {
2785  is_overflow = true;
2786  break;
2787  }
2788  }
2789  if (is_overflow)
2790  {
2791  break;
2792  }
2793  }
2794 
2795  if (is_overflow)
2796  {
2797  err_status = ER_LOC_GEN;
2798  LOG_LOCALE_ERROR ("Weight range overflow" "Weight or step too big.", ER_LOC_GEN, true);
2799  goto exit;
2800  }
2801 
2802  memcpy (&weight_offset, &weight, sizeof (UCA_COLL_CE_LIST));
2803  weight_offset.num = (weight.num > step.num) ? weight.num : step.num;
2804 
2805  for (cp_index = start_cp; cp_index <= end_cp; cp_index++)
2806  {
2807  memcpy (&uca_cp[cp_index], &weight_offset, sizeof (UCA_COLL_CE_LIST));
2808  for (weight_index = 0; weight_index < MAX_WEIGHT_LEVELS; weight_index++)
2809  {
2810  for (ce_index = 0; ce_index < weight_offset.num; ce_index++)
2811  {
2812  SET_UCA_WEIGHT (&weight_offset, ce_index, weight_index,
2813  GET_UCA_WEIGHT (&step, ce_index, weight_index));
2814  }
2815  }
2816  /* Remove any collation elements with all weight values zero. */
2817  is_ce_empty = true;
2818  while (is_ce_empty)
2819  {
2820  for (weight_index = 0; weight_index < MAX_WEIGHT_LEVELS; weight_index++)
2821  {
2822  if (GET_UCA_WEIGHT (&(uca_cp[cp_index]), uca_cp[cp_index].num - 1, weight_index) != 0)
2823  {
2824  is_ce_empty = false;
2825  }
2826  }
2827  if (is_ce_empty && uca_cp[cp_index].num > 1)
2828  {
2829  uca_cp[cp_index].num--;
2830  }
2831  if (uca_cp[cp_index].num == 1)
2832  {
2833  break;
2834  }
2835  }
2836  }
2837  }
2838 
2839 exit:
2840  return err_status;
2841 }
2842 
2843 /*
2844  * add_key_to_weight_stats_list - Adds a collation key to the list of
2845  * keys having the selected weight inside the first
2846  * collation element on level 1
2847  * Returns : error code
2848  * key(in): the key to add in a list.
2849  * wv(in) : the weight value corresponding to the list where the key will be
2850  * added.
2851  */
2852 static int
2854 {
2855  int err_status = NO_ERROR;
2856 
2857  assert (key != NULL);
2858 
2859  if ((weight_key_list[wv].key_list =
2860  (UCA_COLL_KEY *) realloc (weight_key_list[wv].key_list,
2861  (weight_key_list[wv].list_count + 1) * sizeof (UCA_COLL_KEY))) == NULL)
2862  {
2863  err_status = ER_LOC_GEN;
2864  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
2865  goto exit;
2866  }
2867  memcpy (&(weight_key_list[wv].key_list[weight_key_list[wv].list_count]), key, sizeof (UCA_COLL_KEY));
2868  weight_key_list[wv].list_count++;
2869 
2870 exit:
2871  return err_status;
2872 }
2873 
2874 /*
2875  * remove_key_from_weight_stats_list - Remove a collation key from the list of
2876  * keys having a selected weight inside the first
2877  * collation element on level 1
2878  * Returns : error status
2879  * cp(in) : the codepoint to remove from the list.
2880  * weight_index(in) : the identifying weight value for the list where
2881  * the codepoint will be removed from.
2882  */
2883 static int
2885 {
2886  int found_at, i;
2887  int err_status = NO_ERROR;
2888 
2889  if (weight_key_list[wv].list_count == 0)
2890  {
2891  goto exit;
2892  }
2893  else if (weight_key_list[wv].list_count == 1)
2894  {
2895  if (memcmp (&(weight_key_list[wv].key_list[0]), key, sizeof (UCA_COLL_KEY)) == 0)
2896  {
2897  weight_key_list[wv].list_count = 0;
2898  memset (&(weight_key_list[wv].key_list[0]), 0, sizeof (UCA_COLL_KEY));
2899  goto exit;
2900  }
2901  }
2902 
2903  assert (weight_key_list[wv].list_count > 1);
2904 
2905  found_at = -1;
2906  for (i = 0; i < weight_key_list[wv].list_count; i++)
2907  {
2908  if (memcmp (&(weight_key_list[wv].key_list[i]), key, sizeof (UCA_COLL_KEY)) == 0)
2909  {
2910  found_at = i;
2911  break;
2912  }
2913  }
2914 
2915  assert (found_at != -1);
2916 
2917  for (i = found_at; i < weight_key_list[wv].list_count - 1; i++)
2918  {
2919  memcpy (&(weight_key_list[wv].key_list[i]), &(weight_key_list[wv].key_list[i + 1]), sizeof (UCA_COLL_KEY));
2920  }
2921 
2922  weight_key_list[wv].list_count--;
2923 
2924  /* zero last element */
2925  i = weight_key_list[wv].list_count;
2926  memset (&(weight_key_list[wv].key_list[i]), 0, sizeof (UCA_COLL_KEY));
2927 
2928 exit:
2929  return err_status;
2930 }
2931 
2932 /*
2933  * change_key_weight_list - Removes a collation key from the list of
2934  * keys having a selected weight inside the first
2935  * collation element on level 1, and moving it to another
2936  * simmilar list.
2937  * Returns : error status
2938  * key(in) : the key to move
2939  * w_from(in) : the identifying weight value for the list from where
2940  * the key will be removed.
2941  * w_to(in) : the identifying weight value for the list where the key will be
2942  * added.
2943  */
2944 static int
2945 change_key_weight_list (const UCA_COLL_KEY * key, UCA_W w_from, UCA_W w_to)
2946 {
2947  int err_status = NO_ERROR;
2948 
2949  assert (key != NULL);
2950 
2951  err_status = remove_key_from_weight_stats_list (key, w_from);
2952  if (err_status != NO_ERROR)
2953  {
2954  goto exit;
2955  }
2956  err_status = add_key_to_weight_stats_list (key, w_to);
2957 
2958 exit:
2959  return err_status;
2960 }
2961 
2962 /*
2963  * new_contraction - creates new UCA contraction element
2964  * Returns : pointer to newly created contraction, or NULL if memory cannot be
2965  * allocated
2966  *
2967  * storage(in/out) : storage for UCA contraction
2968  *
2969  */
2970 static UCA_CONTRACTION *
2972 {
2973  UCA_CONTRACTION *contr = NULL;
2974 
2975  assert (storage != NULL);
2976 
2977  if (storage->count_contr >= storage->max_contr)
2978  {
2979  storage->coll_contr =
2980  (UCA_CONTRACTION *) realloc (storage->coll_contr,
2981  sizeof (UCA_CONTRACTION) * (storage->max_contr + UCA_CONTR_EXP_CNT_GROW));
2982 
2983  if (storage->coll_contr == NULL)
2984  {
2985  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
2986  return NULL;
2987  }
2988 
2989  storage->max_contr += UCA_CONTR_EXP_CNT_GROW;
2990  }
2991 
2992  assert (storage->coll_contr != NULL);
2993 
2994  contr = &(storage->coll_contr[storage->count_contr++]);
2995  memset (contr, 0, sizeof (UCA_CONTRACTION));
2996 
2997  return contr;
2998 }
2999 
3000 /*
3001  * new_expansion - creates new UCA expansion element
3002  * Returns : pointer to newly created expansion, or NULL if memory cannot be
3003  * allocated
3004  *
3005  * storage(in/out) : storage for UCA expansion
3006  *
3007  */
3008 static UCA_EXPANSION *
3010 {
3011  UCA_EXPANSION *exp = NULL;
3012 
3013  assert (storage != NULL);
3014 
3015  if (storage->count_exp >= storage->max_exp)
3016  {
3017  storage->coll_exp =
3018  (UCA_EXPANSION *) realloc (storage->coll_exp,
3019  sizeof (UCA_EXPANSION) * (storage->max_exp + UCA_CONTR_EXP_CNT_GROW));
3020 
3021  if (storage->coll_exp == NULL)
3022  {
3023  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
3024  return NULL;
3025  }
3026 
3027  storage->max_exp += UCA_CONTR_EXP_CNT_GROW;
3028  }
3029 
3030  assert (storage->coll_exp != NULL);
3031 
3032  exp = &(storage->coll_exp[storage->count_exp++]);
3033  memset (exp, 0, sizeof (UCA_EXPANSION));
3034 
3035  return exp;
3036 }
3037 
3038 /*
3039  * add_uca_contr_or_exp - creates an UCA contraction or expansion from
3040  * codepoints
3041  * Returns : id of uca contraction/expansion or -1 if not be created
3042  *
3043  * lc(in): locale data
3044  * storage(in/out): storage to create in
3045  * cp_array(in): list of codepoints
3046  * cp_count(in): number of codepoints
3047  *
3048  */
3049 static int
3050 add_uca_contr_or_exp (LOCALE_COLLATION * lc, UCA_STORAGE * storage, const unsigned int *cp_array, const int cp_count,
3051  const UCA_COLL_KEY_TYPE seq_type)
3052 {
3053  UCA_CHR_SEQ *uca_seq = NULL;
3054  UCA_COLL_KEY contr_key;
3055  int ce_count;
3056  int i;
3057 
3058  assert (storage != NULL);
3059  assert (cp_array != NULL);
3060  assert (cp_count > 1);
3061  assert (cp_count <= LOC_MAX_UCA_CHARS_SEQ);
3062 
3063  if (seq_type == COLL_KEY_TYPE_CONTR)
3064  {
3065  uca_seq = new_contraction (storage);
3066  }
3067  else
3068  {
3069  assert (seq_type == COLL_KEY_TYPE_EXP);
3070  uca_seq = new_expansion (storage);
3071  }
3072 
3073  if (uca_seq == NULL)
3074  {
3075  return -1;
3076  }
3077 
3078  uca_seq->cp_count = cp_count;
3079  for (i = 0; i < cp_count; i++)
3080  {
3081  if ((int) cp_array[i] >= lc->tail_coll.sett_max_cp)
3082  {
3083  LOG_LOCALE_ERROR ("Codepoint value in sequence exceeds " "maximum allowed codepoint", ER_LOC_GEN, true);
3084  return -1;
3085  }
3086 
3087  uca_seq->cp_list[i] = (UCA_CP) cp_array[i];
3088  }
3089 
3090  /* build collation element list from CE of codepoints */
3091  ce_count = 0;
3092  for (i = 0; i < cp_count; i++)
3093  {
3094  UCA_COLL_KEY key_cp;
3095  UCA_COLL_CE_LIST *cp_ce_list;
3096  int lev;
3097  int ce_index;
3098 
3099  make_coll_key (&key_cp, COLL_KEY_TYPE_CP, cp_array[i]);
3100 
3101  cp_ce_list = get_ce_list_from_coll_key (&key_cp);
3102  if (cp_ce_list == NULL)
3103  {
3104  LOG_LOCALE_ERROR ("Cannot find CE list for key", ER_LOC_GEN, true);
3105  return -1;
3106  }
3107 
3108  assert (cp_ce_list->num < MAX_UCA_EXP_CE);
3109 
3110  if (ce_count + cp_ce_list->num >= MAX_UCA_EXP_CE)
3111  {
3112  LOG_LOCALE_ERROR ("Cannot create contraction." "Too many collation elements", ER_LOC_GEN, true);
3113  return -1;
3114  }
3115 
3116  /* copy all CE of codepoint */
3117  for (ce_index = 0; ce_index < cp_ce_list->num; ce_index++)
3118  {
3119  for (lev = 0; lev < MAX_WEIGHT_LEVELS; lev++)
3120  {
3121  SET_UCA_WEIGHT (&(uca_seq->ce), ce_index + ce_count, lev, GET_UCA_WEIGHT (cp_ce_list, ce_index, lev));
3122  }
3123  }
3124 
3125  ce_count += cp_ce_list->num;
3126  }
3127 
3128  uca_seq->ce.num = ce_count;
3129 
3130  if (seq_type == COLL_KEY_TYPE_EXP)
3131  {
3132  assert (storage->count_exp > 0);
3133  return storage->count_exp - 1;
3134  }
3135 
3136  assert (seq_type == COLL_KEY_TYPE_CONTR);
3137 
3138  assert (storage->count_contr > 0);
3139 
3140  /* contraction key to statistics */
3141  make_coll_key (&contr_key, COLL_KEY_TYPE_CONTR, storage->count_contr - 1);
3142  if (add_key_to_weight_stats_list (&contr_key, GET_UCA_WEIGHT (&(uca_seq->ce), 0, 0)) != NO_ERROR)
3143  {
3144  return -1;
3145  }
3146 
3147  w_occurences[0][GET_UCA_WEIGHT (&(uca_seq->ce), 0, 0)]++;
3148 
3149  return storage->count_contr - 1;
3150 }
3151 
3152 /*
3153  * read_cp_from_tag - reads a codepoint value from a string tag
3154  * Returns : error status
3155  *
3156  * buffer(in): string buffer (nul terminated)
3157  * type(in): type of stored codepoint
3158  * cp(out): codepoint value
3159  *
3160  */
3161 static int
3162 read_cp_from_tag (unsigned char *buffer, CP_BUF_TYPE type, UCA_CP * cp)
3163 {
3164  int temp_cp = 0;
3165  int result = 0;
3166  int err_status = NO_ERROR;
3167  char *chr_ptr;
3168  unsigned char *dummy;
3169  char err_msg[ERR_MSG_SIZE];
3170 
3171  assert (buffer != NULL);
3172  assert (cp != NULL);
3173 
3174  if (*buffer == '\0')
3175  {
3176  err_status = ER_LOC_GEN;
3177  snprintf (err_msg, sizeof (err_msg) - 1, "Tag has no content");
3178  LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
3179  goto exit;
3180  }
3181  else if (type == BUF_TYPE_CHAR)
3182  {
3183  dummy = buffer;
3184  if (intl_count_utf8_chars (dummy, strlen ((char *) dummy)) > 1)
3185  {
3186  err_status = ER_LOC_GEN;
3187  snprintf (err_msg, sizeof (err_msg) - 1, "Multiple chars found in codepoint tag." "Tag content: %s", buffer);
3188  LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
3189  goto exit;
3190  }
3191 
3192  dummy = buffer;
3193  temp_cp = intl_utf8_to_cp (buffer, strlen ((char *) buffer), &dummy);
3194 
3195  if (temp_cp > 0xFFFF || temp_cp < 0)
3196  {
3197  err_status = ER_LOC_GEN;
3198  snprintf (err_msg, sizeof (err_msg) - 1, "Codepoint found in tag was out of range." "Tag content: %s",
3199  buffer);
3200  LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
3201  goto exit;
3202  }
3203  }
3204  else if (type == BUF_TYPE_CODE)
3205  {
3206  chr_ptr = NULL;
3207 
3208  result = str_to_int32 (&temp_cp, &chr_ptr, (const char *) buffer, 16);
3209  if (result != 0 || temp_cp > 0xFFFF || temp_cp < 0)
3210  {
3211  err_status = ER_LOC_GEN;
3212  snprintf (err_msg, sizeof (err_msg) - 1, "Codepoint found in tag was out of range." "Tag content: %s",
3213  buffer);
3214  LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
3215  goto exit;
3216  }
3217  else if (temp_cp == 0 && (chr_ptr == (char *) buffer))
3218  {
3219  /* If tag content does not start with a hex number. */
3220  err_status = ER_LOC_GEN;
3221  snprintf (err_msg, sizeof (err_msg) - 1, "No valid codepoint could be found in tag." "Tag content: %s",
3222  buffer);
3223  LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
3224  goto exit;
3225  }
3226  else if (temp_cp > 0 && strlen (chr_ptr) != 0)
3227  {
3228  /* If tag content looks like "1234ZZZZ". */
3229  err_status = ER_LOC_GEN;
3230  snprintf (err_msg, sizeof (err_msg) - 1, "Encountered codepoint tag with invalid content." "Tag content: %s",
3231  buffer);
3232  LOG_LOCALE_ERROR (err_msg, ER_LOC_GEN, true);
3233  goto exit;
3234  }
3235  }
3236  *cp = (UCA_CP) temp_cp;
3237 exit:
3238  return err_status;
3239 }
3240 
3241 
3242 /*
3243  * comp_func_coll_contr_bin - compare function for sorting contractions list
3244  *
3245  * Note : Contractions are sorted in binary order
3246  * Elements in array to be sorted are of COLL_CONTRACTION type
3247  */
3248 static int
3249 comp_func_coll_contr_bin (const void *arg1, const void *arg2)
3250 {
3251  UCA_COLL_CONTR_ID *c1 = (UCA_COLL_CONTR_ID *) arg1;
3252  UCA_COLL_CONTR_ID *c2 = (UCA_COLL_CONTR_ID *) arg2;
3253 
3254  return strcmp (c1->contr_ref.c_buf, c2->contr_ref.c_buf);
3255 }
3256 
3257 /*
3258  * UCA with expansion support
3259  */
3260 
3261 /*
3262  * create_opt_ce_w_exp - creates weight tables and next seq array for UCA
3263  * sorting with expansions
3264  * lc(in) : locale struct
3265  */
3266 static int
3268 {
3269  UCA_COLL_KEY key;
3270  UCA_COLL_CE_LIST *ce_list;
3271  UCA_L13_W *uca_w_l13 = NULL;
3272  UCA_L4_W *uca_w_l4 = NULL;
3273  char *uca_exp_num = NULL;
3274  int uca_w_array_size_l13;
3275  int uca_w_array_size_l4;
3276  int err_status = NO_ERROR;
3277  int i;
3278  int cp;
3279  int max_num = 0;
3280  unsigned int *coll_key_list = NULL;
3281  int coll_key_list_cnt = 0;
3282  bool use_level_4;
3283  UCA_OPTIONS uca_exp_next_opt = { TAILOR_PRIMARY, false, false, 0, true, CONTR_IGNORE, true,
3285  };
3286  UCA_OPTIONS *saved_uca_opt = NULL;
3287 
3288  coll_key_list = (unsigned int *) malloc ((curr_uca.count_contr + lc->tail_coll.sett_max_cp) * sizeof (unsigned int));
3289  if (coll_key_list == NULL)
3290  {
3291  err_status = ER_LOC_GEN;
3292  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
3293  goto exit;
3294  }
3295 
3296  for (cp = 0; cp < lc->tail_coll.sett_max_cp; cp++)
3297  {
3298  make_coll_key (&key, COLL_KEY_TYPE_CP, cp);
3299  ce_list = get_ce_list_from_coll_key (&key);
3300 
3301  max_num = MAX (max_num, ce_list->num);
3302  }
3303 
3304  use_level_4 = (lc->tail_coll.uca_opt.sett_strength >= TAILOR_QUATERNARY) ? true : false;
3305 
3306  uca_w_array_size_l13 = lc->tail_coll.sett_max_cp * max_num * sizeof (UCA_L13_W);
3307  uca_w_array_size_l4 = lc->tail_coll.sett_max_cp * max_num * sizeof (UCA_L4_W);
3308  uca_w_l13 = (UCA_L13_W *) malloc (uca_w_array_size_l13);
3309  uca_exp_num = (char *) malloc (lc->tail_coll.sett_max_cp);
3310 
3311  if (uca_w_l13 == NULL || uca_exp_num == NULL)
3312  {
3313  err_status = ER_LOC_GEN;
3314  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
3315  goto exit;
3316  }
3317 
3318  memset (uca_w_l13, 0, uca_w_array_size_l13);
3319  memset (uca_exp_num, 0, lc->tail_coll.sett_max_cp);
3320 
3321  /* do not generate L4 if tailoring level doesn't require it */
3322  if (use_level_4)
3323  {
3324  uca_w_l4 = (UCA_L4_W *) malloc (uca_w_array_size_l4);
3325  if (uca_w_l4 == NULL)
3326  {
3327  err_status = ER_LOC_GEN;
3328  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
3329  goto exit;
3330  }
3331 
3332  memset (uca_w_l4, 0, uca_w_array_size_l4);
3333  }
3334 
3335  lc->opt_coll.next_cp = (unsigned int *) malloc (lc->tail_coll.sett_max_cp * sizeof (unsigned int));
3336  if (lc->opt_coll.next_cp == NULL)
3337  {
3338  err_status = ER_LOC_GEN;
3339  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_GEN, true);
3340  goto exit;
3341  }
3342 
3343  memset (lc->opt_coll.next_cp, 0xff, lc->tail_coll.sett_max_cp * sizeof (unsigned int));
3344 
3345  lc->opt_coll.uca_w_l13 = uca_w_l13;
3346  lc->opt_coll.uca_w_l4 = uca_w_l4;
3347  lc->opt_coll.uca_num = uca_exp_num;
3348  lc->opt_coll.uca_exp_num = max_num;
3350 
3351  for (cp = 0; cp < lc->tail_coll.sett_max_cp; cp++)
3352  {
3353  make_coll_key (&key, COLL_KEY_TYPE_CP, cp);
3354  ce_list = get_ce_list_from_coll_key (&key);
3355 
3356  uca_exp_num[cp] = ce_list->num;
3357 
3358  assert (uca_exp_num[cp] > 0);
3359 
3360  build_compressed_uca_w_l13 (ce_list, &(uca_w_l13[cp * max_num]));
3361  if (use_level_4)
3362  {
3363  build_uca_w_l4 (ce_list, &(uca_w_l4[cp * max_num]));
3364  }
3365 
3366  coll_key_list[coll_key_list_cnt++] = cp;
3367  }
3368 
3369  assert (lc->opt_coll.count_contr == 0);
3370 
3371  for (i = 0; i < curr_uca.count_contr; i++)
3372  {
3374  ce_list = get_ce_list_from_coll_key (&key);
3375 
3376  coll_key_list[coll_key_list_cnt++] = i | INTL_MASK_CONTR;
3377 
3378  err_status = add_opt_coll_contraction (lc, &key, 0, true);
3379  if (err_status != NO_ERROR)
3380  {
3381  goto exit;
3382  }
3383  }
3384 
3385  qsort (coll_key_list, coll_key_list_cnt, sizeof (unsigned int), uca_comp_func_coll_list_exp_fo);
3386 
3387  saved_uca_opt = uca_tailoring_options;
3388  uca_tailoring_options = &uca_exp_next_opt;
3389 
3390  /* TODO : optimize this to speed up 'next' computing */
3391  for (i = 0; i < coll_key_list_cnt - 1; i++)
3392  {
3393  UCA_COLL_KEY curr_key;
3394  UCA_COLL_KEY next_key;
3395  unsigned int curr_pos = coll_key_list[i];
3396  unsigned int next_pos = 0;
3397  int j;
3398 
3399  if (INTL_IS_NEXT_CONTR (curr_pos))
3400  {
3401  make_coll_key (&curr_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (curr_pos));
3402  }
3403  else
3404  {
3405  make_coll_key (&curr_key, COLL_KEY_TYPE_CP, curr_pos);
3406  }
3407 
3408  /* 'next' item is the first element which has weight level 1 greater then current item */
3409  for (j = i + 1; j < coll_key_list_cnt; j++)
3410  {
3411  next_pos = coll_key_list[j];
3412 
3413  if (uca_comp_func_coll_list_exp (&curr_pos, &next_pos) < 0)
3414  {
3415  break;
3416  }
3417  }
3418 
3419  if (INTL_IS_NEXT_CONTR (next_pos))
3420  {
3421  make_coll_key (&next_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (next_pos));
3422  }
3423  else
3424  {
3425  make_coll_key (&next_key, COLL_KEY_TYPE_CP, next_pos);
3426  }
3427 
3428  set_next_value_for_coll_key (lc, &curr_key, &next_key);
3429  }
3430 
3431  uca_tailoring_options = saved_uca_opt;
3432 
3433  /* set next for last key to itself */
3434  {
3435  UCA_COLL_KEY curr_key;
3436  unsigned int curr_pos = coll_key_list[coll_key_list_cnt - 1];
3437  if (INTL_IS_NEXT_CONTR (curr_pos))
3438  {
3439  make_coll_key (&curr_key, COLL_KEY_TYPE_CONTR, INTL_GET_NEXT_CONTR_ID (curr_pos));
3440  }
3441  else
3442  {
3443  make_coll_key (&curr_key, COLL_KEY_TYPE_CP, curr_pos);
3444  }
3445  set_next_value_for_coll_key (lc, &curr_key, &curr_key);
3446  }
3447 
3448  if (lc->opt_coll.count_contr > 0)
3449  {
3450  err_status = optimize_coll_contractions (lc);
3451  }
3452 
3453 exit:
3454  if (coll_key_list != NULL)
3455  {
3456  free (coll_key_list);
3457  }
3458 
3459  return err_status;
3460 }
3461 
3462 /*
3463  * uca_comp_func_coll_list_exp_fo - compare function for sorting collatable
3464  * elements according to UCA algorithm,
3465  * with full order
3466  *
3467  * Note: This function is used in the first step of computing 'next' sequence
3468  * If result of 'uca_comp_func_coll_list_exp' is zero, the comparison
3469  * is performed on codepooints values. The purpose is to provide a
3470  * 'deterministic comparison' in order to eliminate unpredictable
3471  * results of sort algorithm (qsort) when computing 'next' fields.
3472  */
3473 static int
3474 uca_comp_func_coll_list_exp_fo (const void *arg1, const void *arg2)
3475 {
3476  unsigned int pos1;
3477  unsigned int pos2;
3478  int cmp;
3479 
3480  pos1 = *((unsigned int *) arg1);
3481  pos2 = *((unsigned int *) arg2);
3482 
3483  cmp = uca_comp_func_coll_list_exp (arg1, arg2);
3484 
3485  return (cmp == 0) ? (int) (pos1 - pos2) : cmp;
3486 }
3487 
3488 /*
3489  * uca_comp_func_coll_list_exp - compare function for sorting collatable
3490  * elements according to UCA algorithm
3491  *
3492  * Note: this function is used to sort collatable elements according to
3493  * CEs tables and UCA settins (sorting options)
3494  * The elements in array are 32 bit unsigned integers, keys which
3495  * may be Unicode points or contractions (when highest bit is set)
3496  *
3497  */
3498 static int
3499 uca_comp_func_coll_list_exp (const void *arg1, const void *arg2)
3500 {
3501  UCA_COLL_KEY pos1_key;
3502  UCA_COLL_KEY pos2_key;
3503  unsigned int pos1;
3504  unsigned int pos2;
3505 
3506  pos1 = *((unsigned int *) arg1);
3507  pos2 = *((unsigned int *) arg2);
3508 
3509  if (INTL_IS_NEXT_CONTR (pos1))
3510  {
3512  }
3513  else
3514  {
3515  make_coll_key (&pos1_key, COLL_KEY_TYPE_CP, pos1);
3516  }
3517 
3518  if (INTL_IS_NEXT_CONTR (pos2))
3519  {
3521  }
3522  else
3523  {
3524  make_coll_key (&pos2_key, COLL_KEY_TYPE_CP, pos2);
3525  }
3526 
3527  return uca_comp_func_coll_key (&pos1_key, &pos2_key);
3528 }
3529 
3530 /*
3531  * build_compressed_uca_w_l13 - builds the equivalent UCA weights for levels
3532  * 1 to 3 of a collation element list
3533  *
3534  * Note :
3535  * The encoding of levels 1 to 3 is :
3536  * 33333332 22222222 1111111 1111111
3537  * Ranges
3538  * L1 = 0000-ffff
3539  * L2 = 0000-01ff
3540  * L3 = 0000-007f
3541  *
3542  */
3543 static void
3545 {
3546  int i;
3547 
3548  assert (ce_list != NULL);
3549  assert (uca_w_l13 != NULL);
3550 
3551  for (i = 0; i < ce_list->num; i++)
3552  {
3553  UCA_W w1 = GET_UCA_WEIGHT (ce_list, i, 0);
3554  UCA_W w2 = GET_UCA_WEIGHT (ce_list, i, 1);
3555  UCA_W w3 = GET_UCA_WEIGHT (ce_list, i, 2);
3556  UCA_L13_W w_l123 = w3 & 0x0000007f;
3557 
3558  w_l123 <<= 9;
3559  w_l123 |= w2 & 0x000001ff;
3560  w_l123 <<= 16;
3561 
3562  w_l123 |= w1 & 0x0000ffff;
3563 
3564  uca_w_l13[i] = w_l123;
3565  }
3566 }
3567 
3568 /*
3569  * build_uca_w_l4 - builds the equivalent UCA weights for level 4 from a
3570  * collation element list
3571  */
3572 static void
3573 build_uca_w_l4 (const UCA_COLL_CE_LIST * ce_list, UCA_L4_W * uca_w_l4)
3574 {
3575  int i;
3576 
3577  assert (ce_list != NULL);
3578  assert (uca_w_l4 != NULL);
3579 
3580  for (i = 0; i < ce_list->num; i++)
3581  {
3582  uca_w_l4[i] = GET_UCA_WEIGHT (ce_list, i, 3);
3583  }
3584 }
static int find_exp_id(const unsigned int *cp_array, const int cp_count, UCA_STORAGE *st)
Definition: uca_support.c:2547
int uca_process_collation(LOCALE_COLLATION *lc, bool is_verbose)
Definition: uca_support.c:810
TAILOR_DIR
#define INTL_IS_NEXT_CONTR(v)
#define MAX_UCA_CODEPOINT
Definition: uca_support.c:44
#define NO_ERROR
Definition: error_code.h:46
static int apply_tailoring_rule_w_dir(TAILOR_DIR dir, UCA_COLL_KEY *anchor_key, UCA_COLL_KEY *key, UCA_COLL_KEY *ref_key, T_LEVEL lvl)
Definition: uca_support.c:2237
#define MAX_WEIGHT_LEVELS
Definition: uca_support.c:41
#define ER_LOC_GEN
Definition: error_code.h:1371
static int load_ducet(const char *file_path, const int sett_contr_policy)
Definition: uca_support.c:224
UCA_OPTIONS uca_opt
bool sett_expansions
static int create_opt_weights(LOCALE_COLLATION *lc)
Definition: uca_support.c:1619
void uca_free_data(void)
Definition: uca_support.c:2119
CP_BUF_TYPE start_cp_buf_type
char end_cp_buf[LOC_DATA_BUFF_SIZE]
unsigned char size
UCA_EXPANSION * coll_exp
Definition: uca_support.c:131
static void sort_coll_key_lists(LOCALE_COLLATION *lc)
Definition: uca_support.c:1462
UCA_CP cp_list[LOC_MAX_UCA_CHARS_SEQ]
Definition: uca_support.c:74
#define SET_UCA_WEIGHT(ce_list, i, w, val)
Definition: uca_support.c:65
#define LOC_MAX_UCA_CHARS_SEQ
static int build_key_list_groups(LOCALE_COLLATION *lc)
Definition: uca_support.c:1411
union uca_coll_key::@37 val
static UCA_OPTIONS * uca_tailoring_options
Definition: uca_support.c:161
static UCA_COLL_CE_LIST * get_ce_list_from_coll_key(const UCA_COLL_KEY *key)
Definition: uca_support.c:1578
UCA_L13_W uca_w_l13[MAX_UCA_EXP_CE]
char start_cp_buf[LOC_DATA_BUFF_SIZE]
static int apply_tailoring_rule(TAILOR_DIR dir, UCA_COLL_KEY *anchor_key, UCA_COLL_KEY *key, UCA_COLL_KEY *ref_key, T_LEVEL lvl)
Definition: uca_support.c:2161
static int remove_key_from_weight_stats_list(const UCA_COLL_KEY *key, UCA_W wv)
Definition: uca_support.c:2884
UCA_W weight[MAX_WEIGHT_LEVELS]
Definition: uca_support.c:54
T_LEVEL
#define CAST_STRLEN
Definition: porting.h:470
#define GET_UCA_WEIGHT(ce_list, i, w)
Definition: uca_support.c:64
static UCA_STORAGE curr_uca
Definition: uca_support.c:147
RULE_POS_TYPE r_pos_type
static int compare_ce_list(UCA_COLL_CE_LIST *ce_list1, UCA_COLL_CE_LIST *ce_list2, UCA_OPTIONS *uca_opt)
Definition: uca_support.c:729
static int logical_pos_cp[MAX_LOGICAL_POS]
Definition: uca_support.c:158
int intl_cp_to_utf8(const unsigned int codepoint, unsigned char *utf8_seq)
char prev_file_path[PATH_MAX]
Definition: uca_support.c:136
int intl_count_utf8_chars(const unsigned char *s, int length_in_bytes)
static int compute_weights_per_level_stats(void)
Definition: uca_support.c:1234
UCA_CHR_SEQ UCA_EXPANSION
Definition: uca_support.c:81
UCA_OPTIONS uca_opt
static int uca_comp_func_coll_list_exp_fo(const void *arg1, const void *arg2)
Definition: uca_support.c:3474
static void build_compressed_uca_w_l13(const UCA_COLL_CE_LIST *ce_list, UCA_L13_W *uca_w_l13)
Definition: uca_support.c:3544
static int change_key_weight_list(const UCA_COLL_KEY *key, UCA_W w_from, UCA_W w_to)
Definition: uca_support.c:2945
static UCA_EXPANSION * new_expansion(UCA_STORAGE *storage)
Definition: uca_support.c:3009
bool use_only_first_ce
#define MAX_UNICODE_CHARS
unsigned char uca_num
static void build_uca_w_l4(const UCA_COLL_CE_LIST *ce_list, UCA_L4_W *uca_w_l4)
Definition: uca_support.c:3573
static int destroy_uca_instance(void)
Definition: uca_support.c:662
char * envvar_localedatadir_file(char *path, size_t size, const char *filename)
#define assert(x)
CP_BUF_TYPE
#define strlen(s1)
Definition: uca_support.c:36
struct uca_weight_key_list UCA_WEIGHT_KEY_LIST
Definition: uca_support.c:113
unsigned short UCA_W
static int create_opt_ce_w_exp(LOCALE_COLLATION *lc)
Definition: uca_support.c:3267
static UCA_CONTRACTION * new_contraction(UCA_STORAGE *storage)
Definition: uca_support.c:2971
UCA_COLL_KEY_TYPE type
Definition: uca_support.c:102
#define ER_OUT_OF_VIRTUAL_MEMORY
Definition: error_code.h:50
static int apply_tailoring_rules(LOCALE_COLLATION *lc)
Definition: uca_support.c:919
static int find_contr_id(const unsigned int *cp_array, const int cp_count, UCA_STORAGE *st)
Definition: uca_support.c:2491
UCA_L4_W uca_w_l4[MAX_UCA_EXP_CE]
int prev_contr_policy
Definition: uca_support.c:137
static UCA_COLL_KEY * get_key_with_ce_sublist(UCA_COLL_CE_LIST *uca_item, const int lvl)
Definition: uca_support.c:2401
COLL_TAILORING tail_coll
CUBRID_TAILOR_RULE * cub_rules
unsigned int cp_first_contr_count
#define DUCET_FILE
Definition: uca_support.c:39
struct coll_contraction COLL_CONTRACTION
#define NULL
Definition: freelistheap.h:34
int str_to_int32(int *ret_p, char **end_p, const char *str_p, int base)
Definition: porting.c:2346
UCA_COLL_KEY * key_list
Definition: uca_support.c:116
unsigned int intl_utf8_to_cp(const unsigned char *utf8, const int size, unsigned char **next_char)
const char * er_msg(void)
static int init_uca_instance(LOCALE_COLLATION *lc)
Definition: uca_support.c:570
CP_BUF_TYPE end_cp_buf_type
static UCA_WEIGHT_KEY_LIST * weight_key_list
Definition: uca_support.c:156
#define MAX_UCA_EXP_CE
int * cp_first_contr_array
TAILOR_RULE * rules
char anchor_buf[LOC_DATA_COLL_TWO_CHARS]
static UCA_STORAGE ducet
Definition: uca_support.c:140
static int apply_absolute_tailoring_rules(LOCALE_COLLATION *lc)
Definition: uca_support.c:2729
UCA_CHR_SEQ UCA_CONTRACTION
Definition: uca_support.c:80
static int uca_comp_func_coll_list_exp(const void *arg1, const void *arg2)
Definition: uca_support.c:3499
#define cmp
Definition: mprec.h:351
char c_buf[LOC_MAX_UCA_CHARS_SEQ *INTL_UTF8_MAX_CHAR_SIZE]
FILE * fopen_ex(const char *filename, const char *type)
Definition: util_common.c:322
UCA_COLL_KEY_TYPE
Definition: uca_support.c:92
static void sort_one_coll_key_list(LOCALE_COLLATION *lc, int weight_index)
Definition: uca_support.c:1484
UCA_CONTRACTION * coll_contr
Definition: uca_support.c:126
static int uca_comp_func_coll_key_fo(const void *arg1, const void *arg2)
Definition: uca_support.c:1510
#define INTL_UTF8_MAX_CHAR_SIZE
int string_to_int_array(char *s, uint32 *cp_list, const int cp_list_size, const char *delims)
int intl_utf8_to_cp_list(const unsigned char *utf8, const int size, unsigned int *cp_array, const int max_array_size, int *array_count)
static void make_coll_key(UCA_COLL_KEY *key, UCA_COLL_KEY_TYPE type, const int key_id)
Definition: uca_support.c:2456
static int read_cp_from_tag(unsigned char *buffer, CP_BUF_TYPE type, UCA_CP *cp)
Definition: uca_support.c:3162
COLL_CONTRACTION * contr_list
unsigned char num
Definition: uca_support.c:60
#define snprintf_dots_truncate(dest, max_len,...)
Definition: porting.h:323
UCA_L13_W * uca_w_l13
static int comp_func_coll_contr_bin(const void *arg1, const void *arg2)
Definition: uca_support.c:3249
UCA_COLL_CE_LIST * coll_cp
Definition: uca_support.c:123
UCA_L4_W * uca_w_l4
struct uca_coll_key UCA_COLL_KEY
Definition: uca_support.c:99
#define INTL_MASK_CONTR
static int apply_tailoring_rule_identity(UCA_COLL_KEY *key, UCA_COLL_KEY *ref_key)
Definition: uca_support.c:2180
UCA_COLL_CE_LIST ce
Definition: uca_support.c:77
TAILOR_DIR direction
char step[MAX_STRLEN_FOR_COLLATION_ELEMENT]
static int string_to_coll_ce_list(char *s, UCA_COLL_CE_LIST *ui)
Definition: uca_support.c:2603
char start_weight[MAX_STRLEN_FOR_COLLATION_ELEMENT]
unsigned int * next_cp
unsigned int uint32
static UCA_W * w_occurences[MAX_WEIGHT_LEVELS]
Definition: uca_support.c:154
T_LEVEL sett_strength
int i
Definition: dynamic_load.c:954
#define LOG_LOCALE_ERROR(msg, er_status, do_print)
unsigned int UCA_L13_W
for(p=libs;*p;p++)
Definition: dynamic_load.c:968
unsigned short int UCA_L4_W
unsigned short UCA_CP
COLL_CONTRACTION contr_ref
Definition: uca_support.c:87
#define UCA_CONTR_EXP_CNT_GROW
Definition: uca_support.c:46
static int optimize_coll_contractions(LOCALE_COLLATION *lc)
Definition: uca_support.c:1791
unsigned int * weights
static int uca_comp_func_coll_key(const void *arg1, const void *arg2)
Definition: uca_support.c:1551
#define INTL_GET_NEXT_CONTR_ID(v)
#define MAX_LOGICAL_POS
Definition: uca_support.c:48
static int add_opt_coll_contraction(LOCALE_COLLATION *lc, const UCA_COLL_KEY *contr_key, const unsigned int wv, bool use_expansions)
Definition: uca_support.c:2038
static int set_next_value_for_coll_key(LOCALE_COLLATION *lc, const UCA_COLL_KEY *coll_key, const UCA_COLL_KEY *next_key)
Definition: uca_support.c:1991
static int add_key_to_weight_stats_list(const UCA_COLL_KEY *key, UCA_W wv)
Definition: uca_support.c:2853
int contr_min_size
char * uca_num
#define ERR_MSG_SIZE
#define MAX_UCA_WEIGHT
Definition: uca_support.c:43
unsigned int cp_first_contr_offset
unsigned char cp_count
static int add_uca_contr_or_exp(LOCALE_COLLATION *lc, UCA_STORAGE *storage, const unsigned int *cp_array, const int cp_count, const UCA_COLL_KEY_TYPE seq_type)
Definition: uca_support.c:3050
struct uca_coll_ce_list UCA_COLL_CE_LIST
Definition: uca_support.c:57