CUBRID Engine  latest
language_support.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /*
20  * language_support.c : Multi-language and character set support
21  */
22 
23 #ident "$Id$"
24 
25 #include "config.h"
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <assert.h>
30 #if !defined(WINDOWS)
31 #include <langinfo.h>
32 #endif
33 
34 #include "language_support.h"
35 
36 #include "chartype.h"
37 #include "environment_variable.h"
38 #include "memory_hash.h"
39 #include "object_primitive.h"
40 #include "util_func.h"
41 #if !defined(WINDOWS)
42 #include <dlfcn.h>
43 #endif /* !defined (WINDOWS) */
44 #include "tz_support.h"
45 #include "db_date.h"
46 #include "string_opfunc.h"
47 
48 #if !defined (SERVER_MODE)
49 #include "authenticate.h"
50 #include "db.h"
51 #endif /* !defined (SERVER_MODE) */
52 #include "dbtype.h"
53 
54 #define PAD ' ' /* str_pad_char(INTL_CODESET_ISO88591, pad, &pad_size) */
55 #define SPACE PAD /* smallest character in the collation sequence */
56 #define ZERO '\0' /* space is treated as zero */
57 
58 #define EUC_SPACE 0xa1 /* for euckr */
59 #define ASCII_SPACE 0x20
60 
68 
69 /* built-in collations */
70 /* number of characters in the (extended) alphabet per language */
71 #define LANG_CHAR_COUNT_EN 256
72 #define LANG_CHAR_COUNT_TR 352
73 
74 #define LANG_COLL_GENERIC_SORT_OPT \
75  {TAILOR_UNDEFINED, false, false, 1, false, CONTR_IGNORE, false, \
76  MATCH_CONTR_BOUND_ALLOW}
77 #define LANG_COLL_NO_EXP 0, NULL, NULL, NULL
78 #define LANG_COLL_NO_CONTR NULL, 0, 0, NULL, 0, 0
79 
80 #define LANG_NO_NORMALIZATION {NULL, 0, NULL, NULL, 0}
81 
84 
87 
90 
93 
94 static unsigned int lang_Weight_TR[LANG_CHAR_COUNT_TR];
96 
99 
100 #define DEFAULT_COLL_OPTIONS {true, true, true}
101 #define CI_COLL_OPTIONS {false, false, true}
102 
103 
104 static bool lang_Builtin_initialized = false;
105 static bool lang_Initialized = false;
106 static bool lang_Init_w_error = false;
107 static bool lang_Charset_initialized = false;
108 static bool lang_Language_initialized = false;
109 static bool lang_Msg_env_initialized = false;
110 
113 {
114  const char *lang_name;
117 };
118 
119 /* Order of language/charset pair is important: first encoutered charset is
120  * the default for a language */
122  /* English - ISO-8859-1 - default lang and charset */
124  /* English - UTF-8 */
126  /* Korean - UTF-8 */
127  {LANG_NAME_KOREAN, INTL_LANG_KOREAN, INTL_CODESET_UTF8},
128  /* Korean - EUC-KR */
130  /* Korean - ISO-8859-1 : contains romanized names for months, days */
131  {LANG_NAME_KOREAN, INTL_LANG_KOREAN, INTL_CODESET_ISO88591},
132  /* Turkish - UTF-8 */
133  {LANG_NAME_TURKISH, INTL_LANG_TURKISH, INTL_CODESET_UTF8},
134  /* Turkish - ISO-8859-1 : contains romanized names for months, days */
135  {LANG_NAME_TURKISH, INTL_LANG_TURKISH, INTL_CODESET_ISO88591}
136 };
137 
138 /* Turkish collation */
139 static unsigned int lang_upper_TR[LANG_CHAR_COUNT_TR];
140 static unsigned int lang_lower_TR[LANG_CHAR_COUNT_TR];
141 static unsigned int lang_upper_i_TR[LANG_CHAR_COUNT_TR];
142 static unsigned int lang_lower_i_TR[LANG_CHAR_COUNT_TR];
143 
144 static char lang_time_format_TR[] = "HH24:MI:SS";
145 static char lang_date_format_TR[] = "DD.MM.YYYY";
146 static char lang_datetime_format_TR[] = "HH24:MI:SS.FF DD.MM.YYYY";
147 static char lang_timestamp_format_TR[] = "HH24:MI:SS DD.MM.YYYY";
148 static char lang_datetimetz_format_TR[] = "HH24:MI:SS.FF DD.MM.YYYY TZR";
149 static char lang_timestamptz_format_TR[] = "HH24:MI:SS DD.MM.YYYY TZR";
150 
151 static void **loclib_Handle = NULL;
152 static int loclib_Handle_size = 0;
153 static int loclib_Handle_count = 0;
154 
158 
159 /* all loaded locales */
161 
162 static int lang_Count_locales = 0;
163 
164 static int lang_Count_collations = 0;
165 
166 /* normalization data */
168 
169 static const DB_CHARSET lang_Db_charsets[] = {
170  {"ascii", "US English charset - ASCII encoding", " ", "",
171  "", INTL_CODESET_ASCII, 1},
172  {"raw-bits", "Uninterpreted bits - Raw encoding", "", "",
173  "", INTL_CODESET_RAW_BITS, 1},
174  {"raw-bytes", "Uninterpreted bytes - Raw encoding", "", "_binary",
175  "binary", INTL_CODESET_BINARY, 1},
176  {"iso8859-1", "Latin 1 charset - ISO 8859 encoding", " ", "_iso88591",
177  "iso88591", INTL_CODESET_ISO88591, 1},
178  {"ksc-euc", "KSC 5601 1990 charset - EUC encoding", "\241\241", "_euckr",
179  "euckr", INTL_CODESET_KSC5601_EUC, 2},
180  {"utf-8", "UNICODE charset - UTF-8 encoding", " ", "_utf8",
181  "utf8", INTL_CODESET_UTF8, 1},
182  {"", "", "", "", "", INTL_CODESET_NONE, 0}
183 };
184 
185 
186 /*
187  * Locales data
188  */
189 
190 #define LOCALE_DUMMY_ALPHABET(codeset) \
191  {ALPHABET_TAILORED, (codeset), 0, 0, NULL, 0, NULL, false}
192 
193 #define LOCALE_NULL_DATE_FORMATS NULL, NULL, NULL, NULL, NULL, NULL
194 
195 /* Calendar names and parsing order of these names */
196 #define LOCALE_NULL_CALENDAR_NAMES \
197  {NULL}, {NULL}, {NULL}, {NULL}, {NULL}, \
198  NULL, NULL, NULL, NULL, NULL
199 
200 static int set_current_locale (void);
201 static int set_msg_lang_from_env (void);
202 static int check_env_lang_val (char *env_val, char *lang_name, char **charset_ptr, INTL_CODESET * codeset);
203 static void set_default_lang (void);
204 static void lang_unload_libraries (void);
205 static void destroy_user_locales (void);
206 static int init_user_locales (void);
207 static LANG_LOCALE_DATA *find_lang_locale_data (const char *name, const INTL_CODESET codeset,
208  LANG_LOCALE_DATA ** last_lang_locale);
210 static void free_lang_locale_data (LANG_LOCALE_DATA * lld);
211 static int register_collation (LANG_COLLATION * coll);
212 
213 static bool lang_is_codeset_allowed (const INTL_LANG intl_id, const INTL_CODESET codeset);
215 static INTL_CODESET lang_get_default_codeset (const INTL_LANG intl_id);
216 
217 static int lang_strmatch_byte (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1,
218  int size1, const unsigned char *str2, int size2, const unsigned char *escape,
219  const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
220 static int lang_fastcmp_byte (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
221  const unsigned char *string2, const int size2, bool ignore_trailing_space);
222 static int lang_fastcmp_binary (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
223  const unsigned char *string2, const int size2, bool ignore_trailing_space);
224 static int lang_strmatch_binary (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
225  const unsigned char *str2, int size2, const unsigned char *escape,
226  const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
227 static int lang_next_alpha_char_iso88591 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
228  unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
229 static int lang_next_coll_byte (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
230  unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
231 static int lang_strcmp_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
232  const unsigned char *str2, const int size2, bool ignore_trailing_space);
233 static int lang_strmatch_utf8 (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
234  const unsigned char *str2, int size2, const unsigned char *escape,
235  const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
236 static int lang_strcmp_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
237  const unsigned char *str2, const int size2, bool ignore_trailing_space);
238 static unsigned int lang_get_w_first_el (const COLL_DATA * coll, const unsigned char *str, const int str_size,
239  unsigned char **next_char, bool ignore_trailing_space);
240 static int lang_strmatch_utf8_w_contr (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1,
241  int size1, const unsigned char *str2, int size2, const unsigned char *escape,
242  const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
243 static COLL_CONTRACTION *lang_get_contr_for_string (const COLL_DATA * coll_data, const unsigned char *str,
244  const int str_size, unsigned int cp);
245 static void lang_get_uca_w_l13 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str,
246  const int size, UCA_L13_W ** uca_w_l13, int *num_ce, unsigned char **str_next,
247  unsigned int *cp_out);
248 static void lang_get_uca_back_weight_l13 (const COLL_DATA * coll_data, const bool use_contractions,
249  const unsigned char *str_start, const unsigned char *str_last,
250  UCA_L13_W ** uca_w_l13, int *num_ce, unsigned char **str_prev,
251  unsigned int *cp_out);
252 static void lang_get_uca_w_l4 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str,
253  const int size, UCA_L4_W ** uca_w_l4, int *num_ce, unsigned char **str_next,
254  unsigned int *cp_out);
255 static int lang_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, const int level, bool is_match,
256  const unsigned char *str1, const int size1, const unsigned char *str2,
257  const int size2, const unsigned char *escape, const bool has_last_escape,
258  int *offset_next_level, int *str1_match_size, bool ignore_trailing_space);
259 static int lang_back_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, bool is_match, const unsigned char *str1,
260  const int size1, const unsigned char *str2, const int size2,
261  const unsigned char *escape, const bool has_last_escape,
262  int *offset_next_level, int *str1_match_size,
263  bool ignore_trailing_space);
264 static int lang_strcmp_utf8_uca (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
265  const unsigned char *str2, const int size2, bool ignore_trailing_space);
266 static int lang_strmatch_utf8_uca (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1,
267  const int size1, const unsigned char *str2, const int size2,
268  const unsigned char *escape, const bool has_last_escape, int *str1_match_size,
269  bool ignore_trailing_space);
270 static int lang_str_utf8_trail_zero_weights (const LANG_COLLATION * lang_coll, const unsigned char *str, int size);
271 static int lang_str_utf8_trail_zero_weights_w_exp (const COLL_DATA * coll_data, const int level,
272  const unsigned char *str, int size);
273 static int lang_next_coll_char_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
274  unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
275 static int lang_next_coll_seq_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
276  unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
277 static int lang_split_key_iso (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
278  const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
279  int *byte_size, bool ignore_trailing_space);
280 static int lang_split_key_byte (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
281  const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
282  int *byte_size, bool ignore_trailing_space);
283 static int lang_split_key_binary (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
284  const int size1, const unsigned char *str2, const int size2,
285  const unsigned char **key, int *byte_size, bool ignore_trailing_space);
286 static int lang_split_key_utf8 (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
287  const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
288  int *byte_size, bool ignore_trailing_space);
289 static int lang_split_key_w_exp (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
290  const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
291  int *byte_size, bool ignore_trailing_space);
292 static int lang_split_key_euckr (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1,
293  const int size1, const unsigned char *str2, const int size2, const unsigned char **key,
294  int *byte_size, bool ignore_trailing_space);
295 static unsigned int lang_mht2str_byte (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
296 static unsigned int lang_mht2str_default (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
297 static unsigned int lang_mht2str_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
298 static unsigned int lang_mht2str_utf8_exp (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
299 static unsigned int lang_mht2str_ko (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
300 static void lang_init_coll_en_ci (LANG_COLLATION * lang_coll);
301 static void lang_init_coll_en_cs (LANG_COLLATION * lang_coll);
302 static void lang_init_coll_Utf8_tr_cs (LANG_COLLATION * lang_coll);
303 static int lang_fastcmp_ko (const LANG_COLLATION * lang_coll, const unsigned char *string1, int size1,
304  const unsigned char *string2, int size2, bool ignore_trailing_space);
305 static int lang_strmatch_ko (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
306  const unsigned char *str2, int size2, const unsigned char *escape,
307  const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space);
308 static int lang_next_alpha_char_ko (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
309  unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
310 static int lang_locale_load_alpha_from_lib (ALPHABET_DATA * a, bool load_w_identifier_name, const char *alpha_suffix,
311  void *lib_handle, const LOCALE_FILE * lf);
312 static int lang_locale_load_normalization_from_lib (UNICODE_NORMALIZATION * norm, void *lib_handle,
313  const LOCALE_FILE * lf);
314 static void lang_free_collations (void);
315 
316 /* English collation */
317 static unsigned int lang_upper_EN[LANG_CHAR_COUNT_EN];
318 static unsigned int lang_lower_EN[LANG_CHAR_COUNT_EN];
319 
320 #if !defined(LANG_W_MAP_COUNT_EN)
321 #define LANG_W_MAP_COUNT_EN 256
322 #endif
324 
325 
326 static void lang_initloc_en_iso88591 (LANG_LOCALE_DATA * ld);
327 
328 static void lang_initloc_en_binary (LANG_LOCALE_DATA * ld);
329 
331 
332 
335  /* collation data */
336  {LANG_COLL_UTF8_EN_CS, "utf8_en_cs",
343  "1bdb1b1f630edc508be37f66dfdce7b0"},
350 };
351 
353 
354 static void lang_initloc_en_utf8 (LANG_LOCALE_DATA * ld);
355 
356 static void lang_initloc_tr_iso (LANG_LOCALE_DATA * ld);
357 
358 static void lang_initloc_ko_iso (LANG_LOCALE_DATA * ld);
359 
360 static void lang_initloc_ko_utf8 (LANG_LOCALE_DATA * ld);
361 
362 static void lang_initloc_ko_euc (LANG_LOCALE_DATA * ld);
363 
364 static void lang_initloc_tr_utf8 (LANG_LOCALE_DATA * ld);
365 
368  /* collation data */
369  {LANG_COLL_ISO_EN_CS, "iso88591_en_cs",
376  "707cef004e58be204d999d8a2abb4cc3"},
382  NULL
383 };
384 
385 /* locale data */
387  NULL,
391  /* alphabet for user strings */
392  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
393  /* alphabet for identifiers strings */
394  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
396  NULL, /* console text conversion */
397  false,
398  NULL, /* time, date, date-time, timestamp */
399  NULL, /* datetimetz, timestamptz format */
400  NULL,
401  NULL,
402  NULL,
403  NULL,
404  {NULL},
405  {NULL},
406  {NULL},
407  {NULL},
408  {NULL},
409  NULL,
410  NULL,
411  NULL,
412  NULL,
413  NULL,
414  '.',
415  ',',
418  (char *) "6ae1bf7f15e6f132c4361761d203c1b4",
420  false
421 };
422 
423 /* locale data */
425  NULL,
431  false},
434  false},
436  &con_Iso_8859_1_conv, /* text conversion */
437  false,
438  NULL, /* time, date, date-time, timestamp */
439  NULL, /* datetimetz, timestamptz format */
440  NULL,
441  NULL,
442  NULL,
443  NULL,
444  {NULL},
445  {NULL},
446  {NULL},
447  {NULL},
448  {NULL},
449  NULL,
450  NULL,
451  NULL,
452  NULL,
453  NULL,
454  '.',
455  ',',
458  (char *) "945bead220ece6f4d020403835308785",
460  false
461 };
462 
463 /* Turkish in ISO-8859-1 charset : limited support (only date - formats) */
465  NULL,
469  /* user alphabet : same as English ISO */
470  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
471  /* identifiers alphabet : same as English ISO */
472  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
473  &coll_Iso88591_en_cs, /* collation : same as English ISO */
474  NULL, /* console text conversion */
475  false,
482  {NULL},
483  {NULL},
484  {NULL},
485  {NULL},
486  {NULL},
487  NULL,
488  NULL,
489  NULL,
490  NULL,
491  NULL,
492  ',',
493  '.',
496  (char *) "b9ac135bdf8100b205ebb6b7e0e9c3df",
498  false
499 };
500 
501 
503  NULL,
507  /* alphabet : same as English ISO */
508  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
509  /* identifiers alphabet : same as English ISO */
510  {ALPHABET_TAILORED, INTL_CODESET_ISO88591, 0, 0, NULL, 0, NULL, false},
511  &coll_Iso88591_en_cs, /* collation : same as English ISO */
512  NULL, /* console text conversion */
513  false,
514  NULL, /* time, date, date-time, timestamp */
515  NULL, /* datetimetz, timestamptz format */
516  NULL,
517  NULL,
518  NULL,
519  NULL,
520  {NULL},
521  {NULL},
522  {NULL},
523  {NULL},
524  {NULL},
525  NULL,
526  NULL,
527  NULL,
528  NULL,
529  NULL,
530  '.',
531  ',',
534  (char *) "8710ffb79b191c2158d4c498e8bc7dea",
536  false
537 };
538 
541  /* collation data - same as en_US.utf8 */
542  {LANG_COLL_UTF8_KO_CS, "utf8_ko_cs",
549  "422c85ede1e265a761078763d2240c81"},
556 };
557 
558 /* built-in support of Korean in UTF-8 : date-time conversions as in English
559  * collation : by codepoints
560  * this needs to be overriden by user defined locale */
562  NULL,
567  lang_upper_EN, false},
569  lang_upper_EN, false},
570  &coll_Utf8_ko_cs, /* collation */
571  NULL, /* console text conversion */
572  false,
573  NULL, /* time, date, date-time, timestamp */
574  NULL, /* datetimetz, timestamptz format */
575  NULL,
576  NULL,
577  NULL,
578  NULL,
579  {NULL},
580  {NULL},
581  {NULL},
582  {NULL},
583  {NULL},
584  NULL,
585  NULL,
586  NULL,
587  NULL,
588  NULL,
589  '.',
590  ',',
593  (char *) "802cff8e10d857952241d19b50a13a27",
595  false
596 };
597 
598 
601  /* collation data */
602  {LANG_COLL_EUCKR_BINARY, "euckr_bin",
609  "18fb633e87f0a3a785ef38cf2a6a7789"},
616 };
617 
618 /* built-in support of Korean in EUC-KR : date-time conversions as in English
619  * collation : binary */
621  NULL,
625  /* alphabet */
627  /* identifiers alphabet */
629  &coll_Euckr_bin, /* collation */
630  NULL, /* console text conversion */
631  false,
632  NULL, /* time, date, date-time, timestamp */
633  NULL, /* datetimetz, timestamptz */
634  NULL,
635  NULL,
636  NULL,
637  NULL,
638  {NULL},
639  {NULL},
640  {NULL},
641  {NULL},
642  {NULL},
643  NULL,
644  NULL,
645  NULL,
646  NULL,
647  NULL,
648  '.',
649  ',',
652  (char *) "c46ff948b4147323edfba0c51f96fe47",
654  false
655 };
656 
659  /* collation data */
660  {LANG_COLL_BINARY, "binary",
662  NULL, NULL,
663  NULL, NULL,
664  0,
667  "93fbdcc87193d2783b2396c6bec068bb"},
673  NULL
674 };
675 
677  NULL,
681  LOCALE_DUMMY_ALPHABET (INTL_CODESET_BINARY),
682  LOCALE_DUMMY_ALPHABET (INTL_CODESET_BINARY),
683  &coll_Binary, /* collation */
684  NULL, /* console text conversion */
685  false,
686  LOCALE_NULL_DATE_FORMATS, /* time, date, date-time, timestamp format */
688  '.',
689  ',',
692  (char *) "390462b716493cbd74c77f545a77a2bf",
694  false
695 };
696 
698 
701  /* collation data */
702  {LANG_COLL_ISO_BINARY, "iso88591_bin",
709  "54735f231842c3a673161fc90670989b"},
715  NULL
716 };
717 
720  /* collation data */
721  {LANG_COLL_UTF8_BINARY, "utf8_bin",
728  "d16a9a3825e263f76028c1e8c3cd043d"},
729  /* compare functions handles bytes, no need to handle UTF-8 chars */
732  /* 'next' and 'split_point' functions must handle UTF-8 chars */
736  NULL
737 };
738 
741  /* collation data */
742  {LANG_COLL_ISO_EN_CI, "iso88591_en_ci",
749  "b3fb4c073fbc76c5ec302da9128d9542"},
756 };
757 
760  /* collation data */
761  {LANG_COLL_UTF8_EN_CI, "utf8_en_ci",
768  "3050bc8e9814b196f4bbb84759aab77c"},
775 };
776 
779  /* collation data */
780  {LANG_COLL_UTF8_TR_CS, "utf8_tr_cs",
787  "52f12f045d2fc90c3a818d0b334485d7"},
794 };
795 
797  NULL,
802  lang_upper_TR, false},
804  lang_lower_i_TR, 1, lang_upper_i_TR, false},
806  &con_Iso_8859_9_conv, /* console text conversion */
807  false,
814  {NULL},
815  {NULL},
816  {NULL},
817  {NULL},
818  {NULL},
819  NULL,
820  NULL,
821  NULL,
822  NULL,
823  NULL,
824  ',',
825  '.',
828  (char *) "a6c90a844ad44f78d0b1a3a9a87ddb2f",
830  false
831 };
832 
843  &coll_Binary
844 };
845 
846 /*
847  * lang_init_builtin - Initializes the built-in available languages and sets
848  * message catalog language according to env
849  *
850  * return: error code
851  *
852  */
853 void
855 {
856  int i;
857 
859  {
860  return;
861  }
862 
863  (void) set_msg_lang_from_env ();
864 
865  /* init all collation placeholders with ISO binary collation */
866  for (i = 0; i < LANG_MAX_COLLATIONS; i++)
867  {
868  lang_Collations[i] = &coll_Iso_binary;
869  }
870 
871  /* built-in collations : order of registration should match colation ID */
872  for (i = 0; i < (int) (sizeof (built_In_collations) / sizeof (built_In_collations[0])); i++)
873  {
874  (void) register_collation (built_In_collations[i]);
875  }
876 
877  /* register all built-in locales allowed in current charset Support for multiple locales is required for switching
878  * function context string - data/time , string - number conversions */
879 
880  /* built-in locales with ISO codeset */
881  (void) register_lang_locale_data (&lc_English_iso88591);
882  (void) register_lang_locale_data (&lc_Korean_iso88591);
883  (void) register_lang_locale_data (&lc_Turkish_iso88591);
884 
885  (void) register_lang_locale_data (&lc_Korean_euckr);
886 
887  /* built-in locales with UTF-8 codeset : should be loaded last */
888  (void) register_lang_locale_data (&lc_English_utf8);
889  (void) register_lang_locale_data (&lc_Korean_utf8);
890  (void) register_lang_locale_data (&lc_Turkish_utf8);
891  (void) register_lang_locale_data (&lc_English_binary);
892 
894 }
895 
896 /*
897  * lang_init - Initializes the multi-language module
898  *
899  * return: error code
900  *
901  * Note : Initializes available built-in and LDML locales.
902  * System charset and language information is not available and is not
903  * set here.
904  */
905 int
906 lang_init (void)
907 {
908  int error = NO_ERROR;
909 
910  if (lang_Initialized)
911  {
913  }
914 
916 
918 
919  /* load & register user locales (no matter the default DB codeset) */
920  error = init_user_locales ();
921  if (error != NO_ERROR)
922  {
923  lang_Init_w_error = true;
924  }
925 
926  lang_Initialized = true;
927 
928  return error;
929 }
930 
931 /*
932  * lang_init_console_txt_conv - Initializes console text conversion
933  *
934  */
935 void
937 {
938  char *sys_id = NULL;
939  char *conv_sys_ids = NULL;
940 #if defined(WINDOWS)
941  UINT cp;
942  char win_codepage_str[32];
943 #endif
944 
946  assert (lang_Loc_data != NULL);
947 
948  if (lang_Loc_data == NULL || lang_Loc_data->txt_conv == NULL)
949  {
950 #if !defined(WINDOWS)
951  (void) setlocale (LC_CTYPE, "");
952 #endif
953  return;
954  }
955 
956 #if defined(WINDOWS)
957  cp = GetConsoleCP ();
958  snprintf (win_codepage_str, sizeof (win_codepage_str) - 1, "%d", cp);
959 
960  sys_id = win_codepage_str;
961  conv_sys_ids = lang_Loc_data->txt_conv->win_codepages;
962 #else
963  /* setlocale with empty string forces the current locale : this is required to retrieve codepage id, but as a
964  * side-effect modifies the behavior of string utility functions such as 'snprintf' to support current locale charset
965  */
966  if (setlocale (LC_CTYPE, "") != NULL)
967  {
968  sys_id = nl_langinfo (CODESET);
969  conv_sys_ids = lang_Loc_data->txt_conv->nl_lang_str;
970  }
971 #endif
972 
973  if (sys_id != NULL && conv_sys_ids != NULL)
974  {
975  char *conv_sys_end = conv_sys_ids + strlen (conv_sys_ids);
976  char *found_token;
977 
978  /* supported system identifiers for conversion are separated by comma */
979  do
980  {
981  found_token = strstr (conv_sys_ids, sys_id);
982  if (found_token == NULL)
983  {
984  break;
985  }
986 
987  if (found_token + strlen (sys_id) >= conv_sys_end || *(found_token + strlen (sys_id)) == ','
988  || *(found_token + strlen (sys_id)) == ' ')
989  {
990  if (lang_Loc_data->txt_conv->init_conv_func != NULL)
991  {
992  lang_Loc_data->txt_conv->init_conv_func ();
993  }
994  console_Conv = lang_Loc_data->txt_conv;
995  break;
996  }
997  else
998  {
999  conv_sys_ids = conv_sys_ids + strlen (sys_id);
1000  }
1001  }
1002  while (conv_sys_ids < conv_sys_end);
1003  }
1004 }
1005 
1006 /*
1007  * set_current_locale - Initializes current locale from global variables
1008  * 'lang_Lang_name' and 'lang_Loc_charset';
1009  * if these are invalid current locale is initialized
1010  * with default locale (en_US.iso88591), and error is
1011  * returned.
1012  *
1013  * return : error code
1014  */
1015 static int
1017 {
1018  bool found = false;
1019 
1021 
1022  for (lang_Loc_data = lang_Loaded_locales[lang_Lang_id]; lang_Loc_data != NULL;
1023  lang_Loc_data = lang_Loc_data->next_lld)
1024  {
1025  assert (lang_Loc_data != NULL);
1026 
1027  if (lang_Loc_data->codeset == lang_Loc_charset && strcasecmp (lang_Lang_name, lang_Loc_data->lang_name) == 0)
1028  {
1029  found = true;
1030  break;
1031  }
1032  }
1033 
1034  if (!found)
1035  {
1036  char err_msg[ERR_MSG_SIZE];
1037 
1038  lang_Init_w_error = true;
1039  snprintf_dots_truncate (err_msg, sizeof (err_msg) - 1, "Locale %s.%s was not loaded.\n"
1040  " %s not found in cubrid_locales.txt", lang_Lang_name,
1042  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
1043  set_default_lang ();
1044  }
1045 
1046  /* at this point we have locale : either the user selected or default one */
1047  assert (lang_Loc_data != NULL);
1048  lang_Loc_currency = lang_Loc_data->default_currency_code;
1049 
1050  /* static globals in db_date.c should also be initialized with the current locale (for parsing local am/pm strings
1051  * for times) */
1053 
1055 }
1056 
1057 /*
1058  * set_msg_lang_from_env - Initializes language for catalog messages from
1059  * environment
1060  *
1061  * return: NO_ERROR if success
1062  *
1063  * Note : This function sets the following global variables according to
1064  * - lang_Msg_loc_name : <lang>.<charset>; en_US.utf8;
1065  * if $CUBRID_MSG_LANG is not set, then en_US is used
1066  */
1067 static int
1069 {
1070  const char *env;
1071  char *charset = NULL;
1072  char err_msg[ERR_MSG_SIZE];
1073  int status = NO_ERROR;
1074 
1076  {
1077  return status;
1078  }
1079 
1080  /* set flag as set; this function will set the messages language either to environment or leave it default value */
1081  lang_Msg_env_initialized = true;
1082 
1083  /*
1084  * Determines the messages language by examining environment variables.
1085  * We check the optional variable CUBRID_MSG_LANG, which decides the
1086  * locale for catalog messages; if not set, en_US is used for catalog
1087  * messages
1088  */
1089 
1090  env = envvar_get ("MSG_LANG");
1091  if (env != NULL)
1092  {
1093  INTL_CODESET dummy_cs;
1094  char msg_lang[LANG_MAX_LANGNAME];
1095 
1097 
1098  status = check_env_lang_val (lang_Msg_loc_name, msg_lang, &charset, &dummy_cs);
1099  if (status != NO_ERROR)
1100  {
1101  sprintf (err_msg, "invalid value '%s' for CUBRID_MSG_LANG", lang_Msg_loc_name);
1104  return ER_LOC_INIT;
1105  }
1106  else
1107  {
1108  if (charset == NULL && strcasecmp (msg_lang, "en_US") != 0)
1109  {
1110  /* by default all catalog message folders are in .utf8, unless otherwise specified */
1111  assert (strlen (lang_Msg_loc_name) == 5);
1112  strcat (lang_Msg_loc_name, ".utf8");
1113  }
1114  }
1115  }
1116 
1117  lang_Msg_env_initialized = true;
1118 
1119  return NO_ERROR;
1120 }
1121 
1122 /*
1123  * lang_set_charset_lang - Initializes language and charset from a locale
1124  * string
1125  *
1126  * return: NO_ERROR if success
1127  *
1128  * Note : This function sets the following global variables according to
1129  * input:
1130  * - lang_Loc_name : resolved locale string: <lang>.<charset>
1131  * - lang_Lang_name : <lang> string part (without <charset>)
1132  * - lang_Lang_id: id of language
1133  * - lang_Loc_charset : charset id : ISO-8859-1, UTF-8 or EUC-KR
1134  * - lang_Loc_data: pointer to locale (struct) used by sistem
1135  */
1136 int
1138 {
1139  char *charset = NULL;
1140  char err_msg[ERR_MSG_SIZE];
1141  int status = NO_ERROR;
1142 
1145 
1146  lang_Charset_initialized = true;
1148 
1149  if (lang_charset != NULL)
1150  {
1151  strncpy_bufsize (lang_Loc_name, lang_charset);
1152  }
1153  else
1154  {
1155  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOC_INIT, 1, "Invalid language initialization string");
1156  return ER_LOC_INIT;
1157  }
1158 
1161  if (status != NO_ERROR)
1162  {
1163  sprintf (err_msg, "invalid value %s for charset", lang_Loc_name);
1165  return ER_LOC_INIT;
1166  }
1167 
1169  {
1170  /* no charset provided in $CUBRID_MSG_LANG */
1174  {
1175  set_default_lang ();
1176  goto error_codeset;
1177  }
1178  }
1179  else if (lang_Loc_charset != INTL_CODESET_UTF8)
1180  {
1181  /* not UTF-8 charset, it has to be a built-in language */
1184  {
1185  goto error_codeset;
1186  }
1187  }
1188 
1189  status = set_current_locale ();
1191 
1192  return status;
1193 
1194 error_codeset:
1195  sprintf (err_msg, "codeset %s for language %s is not supported", charset, lang_Lang_name);
1197 
1198  return ER_LOC_INIT;
1199 }
1200 
1201 /*
1202  * lang_set_charset - Set system charset
1203  *
1204  * return : error code
1205  *
1206  */
1207 int
1209 {
1210  if (codeset < INTL_CODESET_ISO88591 || codeset > INTL_CODESET_LAST)
1211  {
1212  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_LOC_INIT, 1, "Codeset is not valid");
1213  return ER_LOC_INIT;
1214  }
1215 
1217  lang_Charset_initialized = true;
1218 
1220  return NO_ERROR;
1221 }
1222 
1223 /*
1224  * lang_set_language - Set system language, and system locale
1225  * The system charset must be set prior to this.
1226  *
1227  * return : error code
1228  *
1229  */
1230 int
1231 lang_set_language (const char *lang_str)
1232 {
1233  char full_locale_name[LANG_MAX_LANGNAME];
1234 
1235  assert (lang_str != NULL);
1236 
1238  {
1239  assert (false);
1241  "Internal error: must set charset first before setting " "language");
1242  return ER_LOC_INIT;
1243  }
1244 
1245  (void) lang_get_charset_env_string (full_locale_name, sizeof (full_locale_name), lang_str, lang_charset ());
1246 
1247  return lang_set_charset_lang (full_locale_name);
1248 }
1249 
1250 /*
1251  * check_env_lang_val - check and normalizes the environment variable value;
1252  * gets the language and charset parts
1253  *
1254  * return: NO_ERROR if success
1255  *
1256  * env_val(in/out): value; Example : "En_US.UTF8" -> en_US.utf8
1257  * lang_name(out): language part : en_US
1258  * charset_ptr(out): pointer in env_val to charset part : utf8
1259  * codeset(out): codeset value, according to charset part or
1260  * INTL_CODESET_NODE, if charset part is empty
1261  *
1262  */
1263 static int
1264 check_env_lang_val (char *env_val, char *lang_name, char **charset_ptr, INTL_CODESET * codeset)
1265 {
1266  char *charset;
1267 
1268  assert (env_val != NULL);
1269  assert (lang_name != NULL);
1270  assert (charset_ptr != NULL);
1271 
1272  /* strip quotas : */
1273  envvar_trim_char (env_val, (int) '\"');
1274 
1275  /* Locale should be formated like xx_XX.charset or xx_XX */
1276  charset = strchr (env_val, '.');
1277  *charset_ptr = charset;
1278  if (charset != NULL)
1279  {
1280  strncpy (lang_name, env_val, charset - env_val);
1281  lang_name[charset - env_val] = '\0';
1282 
1283  charset++;
1284  if (strcasecmp (charset, LANG_CHARSET_EUCKR) == 0 || strcasecmp (charset, LANG_CHARSET_EUCKR_ALIAS1) == 0)
1285  {
1286  *codeset = INTL_CODESET_KSC5601_EUC;
1287  strcpy (charset, LANG_CHARSET_EUCKR);
1288  }
1289  else if (strcasecmp (charset, LANG_CHARSET_UTF8) == 0 || strcasecmp (charset, LANG_CHARSET_UTF8_ALIAS1) == 0)
1290  {
1291  *codeset = INTL_CODESET_UTF8;
1292  strcpy (charset, LANG_CHARSET_UTF8);
1293  }
1294  else if (strcasecmp (charset, LANG_CHARSET_ISO88591) == 0
1295  || strcasecmp (charset, LANG_CHARSET_ISO88591_ALIAS1) == 0
1296  || strcasecmp (charset, LANG_CHARSET_ISO88591_ALIAS2) == 0)
1297  {
1298  *codeset = INTL_CODESET_ISO88591;
1299  strcpy (charset, LANG_CHARSET_ISO88591);
1300  }
1301  else
1302  {
1303  return ER_FAILED;
1304  }
1305  }
1306  else
1307  {
1308  strcpy (lang_name, env_val);
1309  }
1310 
1311  if (strlen (lang_name) == 5)
1312  {
1313  intl_toupper_iso8859 ((unsigned char *) lang_name + 3, 2);
1314  intl_tolower_iso8859 ((unsigned char *) lang_name, 2);
1315  }
1316  else
1317  {
1318  return ER_FAILED;
1319  }
1320 
1321  memcpy (env_val, lang_name, strlen (lang_name));
1322 
1323  return NO_ERROR;
1324 }
1325 
1326 /*
1327  * set_default_lang -
1328  * return:
1329  *
1330  */
1331 static void
1333 {
1335  strncpy (lang_Loc_name, LANG_NAME_DEFAULT, sizeof (lang_Loc_name));
1336  strncpy (lang_Lang_name, LANG_NAME_DEFAULT, sizeof (lang_Lang_name));
1337  lang_Loc_data = &lc_English_iso88591;
1338  lang_Loc_charset = lang_Loc_data->codeset;
1339  lang_Loc_currency = lang_Loc_data->default_currency_code;
1340 }
1341 
1342 /*
1343  * lang_locales_count -
1344  * return: number of locales in the system
1345  */
1346 int
1347 lang_locales_count (bool check_codeset)
1348 {
1349  int i;
1350  int count;
1351 
1352  if (!check_codeset)
1353  {
1354  return lang_Count_locales;
1355  }
1356 
1357  count = 0;
1358  for (i = 0; i < lang_Count_locales; i++)
1359  {
1360  LANG_LOCALE_DATA *lld = lang_Loaded_locales[i];
1361  do
1362  {
1363  count++;
1364  lld = lld->next_lld;
1365  }
1366  while (lld != NULL);
1367  }
1368 
1369  return count;
1370 }
1371 
1372 /*
1373  * init_user_locales -
1374  * return: error code
1375  *
1376  */
1377 static int
1379 {
1380  LOCALE_FILE *user_lf = NULL;
1381  int num_user_loc = 0, i;
1382  int er_status = NO_ERROR;
1383 
1384  er_status = locale_get_cfg_locales (&user_lf, &num_user_loc, true);
1385  if (er_status != NO_ERROR)
1386  {
1387  goto error;
1388  }
1389 
1390  loclib_Handle_size = num_user_loc;
1391  loclib_Handle_count = 0;
1392 
1393  if (num_user_loc == 0)
1394  {
1395  /* no extra locales : nothing to do */
1396  er_status = NO_ERROR;
1397  goto exit;
1398  }
1399  assert (num_user_loc > 0);
1400 
1401  loclib_Handle = (void **) malloc (loclib_Handle_size * sizeof (void *));
1402  if (loclib_Handle == NULL)
1403  {
1405  er_status = ER_OUT_OF_VIRTUAL_MEMORY;
1406  goto error;
1407  }
1408 
1409  for (i = 0; i < num_user_loc; i++)
1410  {
1411  /* load user locale */
1412  LANG_LOCALE_DATA *lld = NULL;
1413  LANG_LOCALE_DATA *last_lang_locale = NULL;
1414  INTL_LANG l_id;
1415  bool is_new_locale = false;
1416 
1417  er_status = locale_check_and_set_default_files (&(user_lf[i]), true);
1418  if (er_status != NO_ERROR)
1419  {
1420  goto error;
1421  }
1422 
1424  er_status = lang_load_library (user_lf[i].lib_file, &(loclib_Handle[loclib_Handle_count]));
1425  if (er_status != NO_ERROR)
1426  {
1427  goto error;
1428  }
1429  loclib_Handle_count++;
1430 
1431  lld = find_lang_locale_data (user_lf[i].locale_name, INTL_CODESET_UTF8, &last_lang_locale);
1432 
1433  if (lld != NULL)
1434  {
1435  /* user customization : overwrite built-in locale */
1436  if (lld->is_user_data)
1437  {
1438  char err_msg[ERR_MSG_SIZE];
1439 
1440  snprintf (err_msg, sizeof (err_msg) - 1, "Duplicate user locale : %s", lld->lang_name);
1441  er_status = ER_LOC_INIT;
1442  LOG_LOCALE_ERROR (err_msg, er_status, false);
1443  goto error;
1444  }
1445  l_id = lld->lang_id;
1446  }
1447  else
1448  {
1449  /* locale not found */
1450  if (last_lang_locale != NULL)
1451  {
1452  /* existing language, but new locale (another charset) */
1453  l_id = last_lang_locale->lang_id;
1454  }
1455  else
1456  {
1457  /* new language */
1458  l_id = lang_Count_locales;
1459 
1460  assert (l_id >= INTL_LANG_USER_DEF_START);
1461 
1462  if (l_id >= LANG_MAX_LOADED_LOCALES)
1463  {
1464  er_status = ER_LOC_INIT;
1465  LOG_LOCALE_ERROR ("too many locales", er_status, false);
1466  goto error;
1467  }
1468  }
1469 
1470  lld = (LANG_LOCALE_DATA *) malloc (sizeof (LANG_LOCALE_DATA));
1471  if (lld == NULL)
1472  {
1473  er_status = ER_LOC_INIT;
1474  LOG_LOCALE_ERROR ("memory allocation failed", er_status, false);
1475  goto error;
1476  }
1477 
1478  memset (lld, 0, sizeof (LANG_LOCALE_DATA));
1479  lld->codeset = INTL_CODESET_UTF8;
1480  lld->lang_id = l_id;
1481 
1482  is_new_locale = true;
1483  }
1484 
1485  assert (lld->codeset == INTL_CODESET_UTF8);
1486  assert (lld->lang_id == l_id);
1487 
1488  lld->is_user_data = true;
1489 
1490  er_status = lang_locale_data_load_from_lib (lld, loclib_Handle[loclib_Handle_count - 1], &(user_lf[i]), false);
1491  if (er_status != NO_ERROR)
1492  {
1493  goto error;
1494  }
1495 
1496  assert (strcmp (lld->lang_name, user_lf[i].locale_name) == 0);
1497 
1498  /* initialization alphabet */
1501 
1502  /* initialize text conversion */
1503  if (lld->txt_conv != NULL)
1504  {
1506  {
1507  lld->txt_conv->init_conv_func = NULL;
1510  }
1511  else if (lld->txt_conv->conv_type == TEXT_CONV_GENERIC_1BYTE)
1512  {
1513  lld->txt_conv->init_conv_func = NULL;
1516  }
1517  else
1518  {
1521  }
1522  }
1523 
1525  {
1527  }
1528 
1529  if (is_new_locale)
1530  {
1531  er_status = register_lang_locale_data (lld);
1532 
1533  if (er_status != NO_ERROR)
1534  {
1535  goto error;
1536  }
1537  }
1538 
1539  lld->is_initialized = true;
1540  }
1541 
1542 exit:
1543  /* free user defined locale files struct */
1544  for (i = 0; i < num_user_loc; i++)
1545  {
1546  free_and_init (user_lf[i].locale_name);
1547  free_and_init (user_lf[i].ldml_file);
1548  free_and_init (user_lf[i].lib_file);
1549  }
1550 
1551  if (user_lf != NULL)
1552  {
1553  free (user_lf);
1554  }
1555 
1556  return er_status;
1557 
1558 error:
1562 
1563  goto exit;
1564 }
1565 
1566 /*
1567  * register_collation - registers a collation
1568  * return: error code
1569  * coll(in): collation structure
1570  */
1571 static int
1573 {
1574  int id;
1575  assert (coll != NULL);
1577 
1578  id = coll->coll.coll_id;
1579 
1580  if (id < ((coll->built_in) ? 0 : LANG_MAX_BUILTIN_COLLATIONS) || id >= LANG_MAX_COLLATIONS)
1581  {
1582  char err_msg[ERR_MSG_SIZE];
1583  snprintf (err_msg, sizeof (err_msg) - 1,
1584  "Invalid collation numeric identifier : %d" " for collation '%s'. Expecting greater than %d and lower "
1585  "than %d.", id, coll->coll.coll_name, ((coll->built_in) ? 0 : LANG_MAX_BUILTIN_COLLATIONS),
1587  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
1588  return ER_LOC_INIT;
1589  }
1590 
1591  assert (lang_Collations[id] != NULL);
1592 
1593  if (lang_Collations[id]->coll.coll_id != LANG_COLL_DEFAULT)
1594  {
1595  char err_msg[ERR_MSG_SIZE];
1596  snprintf (err_msg, sizeof (err_msg) - 1,
1597  "Invalid collation numeric identifier : %d for collation '%s'"
1598  ". This id is already used by collation '%s'", id, coll->coll.coll_name,
1599  lang_Collations[id]->coll.coll_name);
1600  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
1601  return ER_LOC_INIT;
1602  }
1603 
1604  lang_Collations[id] = coll;
1605 
1607 
1608  if (coll->init_coll != NULL)
1609  {
1610  coll->init_coll (coll);
1611  }
1612 
1613  return NO_ERROR;
1614 }
1615 
1616 /*
1617  * lang_is_coll_name_allowed - checks if collation name is allowed
1618  * return: true if allowed
1619  * name(in): collation name
1620  */
1621 bool
1622 lang_is_coll_name_allowed (const char *name)
1623 {
1624  int i;
1625 
1626  if (name == NULL || *name == '\0')
1627  {
1628  return false;
1629  }
1630 
1631  if (strchr (name, (int) ' ') || strchr (name, (int) '\t'))
1632  {
1633  return false;
1634  }
1635 
1636  for (i = 0; i < (int) (sizeof (built_In_collations) / sizeof (built_In_collations[0])); i++)
1637  {
1638  if (strcasecmp (built_In_collations[i]->coll.coll_name, name) == 0)
1639  {
1640  return false;
1641  }
1642  }
1643 
1644  return true;
1645 }
1646 
1647 /*
1648  * lang_get_collation - access a collation by id
1649  * return: pointer to collation data or NULL
1650  * coll_id(in): collation identifier
1651  */
1653 lang_get_collation (const int coll_id)
1654 {
1655  assert (coll_id >= 0 && coll_id < LANG_MAX_COLLATIONS);
1656 
1657  return lang_Collations[coll_id];
1658 }
1659 
1660 
1661 /*
1662  * lang_get_collation_name - return collation name
1663  * return: collation name
1664  * coll_id(in): collation identifier
1665  */
1666 const char *
1667 lang_get_collation_name (const int coll_id)
1668 {
1669  if (coll_id < 0 || coll_id >= LANG_MAX_COLLATIONS)
1670  {
1671  return NULL;
1672  }
1673 
1674  return lang_Collations[coll_id]->coll.coll_name;
1675 }
1676 
1677 /*
1678  * lang_get_collation_by_name - access a collation by name
1679  * return: pointer to collation data or NULL
1680  * coll_name(in): collation name
1681  */
1683 lang_get_collation_by_name (const char *coll_name)
1684 {
1685  int i;
1686  assert (coll_name != NULL);
1687 
1688  for (i = 0; i < LANG_MAX_COLLATIONS; i++)
1689  {
1690  if (strcmp (coll_name, lang_Collations[i]->coll.coll_name) == 0)
1691  {
1692  return lang_Collations[i];
1693  }
1694  }
1695 
1696  return NULL;
1697 }
1698 
1699 /*
1700  * lang_collation_count -
1701  * return: number of collations in the system
1702  */
1703 int
1705 {
1706  return lang_Count_collations;
1707 }
1708 
1709 /*
1710  * lang_get_codeset_name - get charset string equivalent
1711  * return: charset string or empty string
1712  * codeset_id(in): charset/codeset id
1713  */
1714 const char *
1715 lang_get_codeset_name (int codeset_id)
1716 {
1717  switch (codeset_id)
1718  {
1719  case INTL_CODESET_UTF8:
1720  return "utf8";
1721  case INTL_CODESET_ISO88591:
1722  return "iso88591";
1724  return "euckr";
1725  case INTL_CODESET_BINARY:
1726  return "binary";
1727  }
1728 
1729  /* codeset_id is propagated downwards from the grammar, so it is either INTL_CODESET_UTF8, INTL_CODESET_KSC5601_EUC
1730  * or INTL_CODESET_ISO88591 */
1731  assert (false);
1732 
1733  return "";
1734 }
1735 
1736 /*
1737  * lang_user_alphabet_w_coll -
1738  * return: id of default collation
1739  */
1740 const ALPHABET_DATA *
1741 lang_user_alphabet_w_coll (const int collation_id)
1742 {
1743  LANG_COLLATION *lang_coll;
1744 
1745  lang_coll = lang_get_collation (collation_id);
1746 
1747  assert (lang_coll->default_lang != NULL);
1748 
1749  return &(lang_coll->default_lang->alphabet);
1750 }
1751 
1752 /*
1753  * find_lang_locale_data - searches a locale with a given name and codeset
1754  * return: locale or NULL if the name+codeset combination was not found
1755  * name(in): name of locale
1756  * codeset(in): codeset to search
1757  * last_locale(out): last locale whith this name or NULL if no locale was
1758  * found
1759  */
1760 static LANG_LOCALE_DATA *
1761 find_lang_locale_data (const char *name, const INTL_CODESET codeset, LANG_LOCALE_DATA ** last_lang_locale)
1762 {
1763  LANG_LOCALE_DATA *first_lang_locale = NULL;
1764  LANG_LOCALE_DATA *curr_lang_locale;
1765  LANG_LOCALE_DATA *found_lang_locale = NULL;
1766  int i;
1767 
1768  assert (last_lang_locale != NULL);
1769 
1770  for (i = 0; i < lang_Count_locales; i++)
1771  {
1772  if (strcasecmp (lang_Loaded_locales[i]->lang_name, name) == 0)
1773  {
1774  first_lang_locale = lang_Loaded_locales[i];
1775  break;
1776  }
1777  }
1778 
1779  for (curr_lang_locale = first_lang_locale; curr_lang_locale != NULL; curr_lang_locale = curr_lang_locale->next_lld)
1780  {
1781  if (codeset == curr_lang_locale->codeset)
1782  {
1783  found_lang_locale = curr_lang_locale;
1784  }
1785 
1786  if (curr_lang_locale->next_lld == NULL)
1787  {
1788  *last_lang_locale = curr_lang_locale;
1789  break;
1790  }
1791  }
1792 
1793  return found_lang_locale;
1794 }
1795 
1796 /*
1797  * register_lang_locale_data - registers a language locale data in the system
1798  * return: error status
1799  * lld(in): language locale data
1800  */
1801 static int
1803 {
1804  LANG_LOCALE_DATA *last_lang_locale = NULL;
1805  LANG_LOCALE_DATA *found_lang_locale = NULL;
1806 
1807  assert (lld != NULL);
1808 
1809  found_lang_locale = find_lang_locale_data (lld->lang_name, lld->codeset, &last_lang_locale);
1810 
1811  assert (found_lang_locale == NULL);
1812 
1813  if (!lld->is_user_data)
1814  {
1815  /* make a copy of built-in */
1816  LANG_LOCALE_DATA *new_lld = (LANG_LOCALE_DATA *) malloc (sizeof (LANG_LOCALE_DATA));
1817  if (new_lld == NULL)
1818  {
1819  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_INIT, false);
1820  return ER_LOC_INIT;
1821  }
1822 
1823  memcpy (new_lld, lld, sizeof (LANG_LOCALE_DATA));
1824  lld = new_lld;
1825  }
1826 
1827  if (last_lang_locale == NULL)
1828  {
1829  /* no other locales exists with the same name */
1831  lang_Loaded_locales[lang_Count_locales++] = lld;
1832  }
1833  else
1834  {
1835  last_lang_locale->next_lld = lld;
1836  }
1837 
1838  if (!(lld->is_initialized) && lld->initloc != NULL)
1839  {
1842  lld->initloc (lld);
1843 
1844  /* init default collation */
1845  if (lld->default_lang_coll != NULL && lld->default_lang_coll->init_coll != NULL)
1846  {
1848  }
1849  }
1850 
1851  return NO_ERROR;
1852 }
1853 
1854 /*
1855  * free_lang_locale_data - Releases any resources held by a language locale
1856  * data
1857  * return: none
1858  */
1859 static void
1861 {
1862  assert (lld != NULL);
1863 
1864  if (lld->next_lld != NULL)
1865  {
1867  lld->next_lld = NULL;
1868  }
1869 
1870  if (lld->is_user_data)
1871  {
1872  /* Text conversions having init_conv_func not NULL are built-in. They can't be deallocated. */
1873  if (lld->txt_conv != NULL && lld->txt_conv->init_conv_func == NULL)
1874  {
1875  free (lld->txt_conv);
1876  lld->txt_conv = NULL;
1877  }
1878  }
1879 
1880  free (lld);
1881 }
1882 
1883 /*
1884  * lang_get_msg_Loc_name - returns the language name for the message files,
1885  * according to environment
1886  * return: language name string
1887  */
1888 const char *
1890 {
1892  {
1893  /* ignore any errors, we just need a locale for messages */
1894  (void) set_msg_lang_from_env ();
1895  }
1896 
1897  return lang_Msg_loc_name;
1898 }
1899 
1900 /*
1901  * lang_get_Lang_name - returns the language name according to environment
1902  * return: language name string
1903  */
1904 const char *
1906 {
1908  {
1909  assert (false);
1910  return NULL;
1911  }
1912  return lang_Lang_name;
1913 }
1914 
1915 /*
1916  * lang_id - Returns language id per env settings
1917  * return: language identifier
1918  */
1919 INTL_LANG
1920 lang_id (void)
1921 {
1923  {
1924  assert (false);
1925  return -1;
1926  }
1927  return lang_Lang_id;
1928 }
1929 
1930 /*
1931  * lang_currency - Returns language currency per env settings
1932  * return: language currency identifier
1933  */
1936 {
1938  {
1939  assert (false);
1940  return DB_CURRENCY_NULL;
1941  }
1942  return lang_Loc_currency;
1943 }
1944 
1945 /*
1946  * lang_locale_currency - Returns language currency for a language
1947  * return: language currency identifier
1948  */
1950 lang_locale_currency (const char *locale_str)
1951 {
1952  int i;
1953 
1955  {
1956  assert (false);
1957  return DB_CURRENCY_NULL;
1958  }
1959 
1960  for (i = 0; i < lang_Count_locales; i++)
1961  {
1962  if (strcasecmp (lang_Loaded_locales[i]->lang_name, locale_str) == 0)
1963  {
1964  return lang_Loaded_locales[i]->default_currency_code;
1965  }
1966  }
1967 
1968  return lang_currency ();
1969 }
1970 
1971 /*
1972  * lang_charset - Returns language charset per env settings
1973  * return: language charset
1974  */
1977 {
1979  {
1980  assert (false);
1981  return INTL_CODESET_NONE;
1982  }
1983  return lang_Loc_charset;
1984 }
1985 
1986 /*
1987  * lang_final - Releases any resources held by this module
1988  * return: none
1989  */
1990 void
1992 {
1994 
1996 
1998 
2000 
2001  lang_Builtin_initialized = false;
2002  lang_Initialized = false;
2003  lang_Init_w_error = false;
2004  lang_Language_initialized = false;
2005  lang_Charset_initialized = false;
2006  lang_Msg_env_initialized = false;
2007 }
2008 
2009 /*
2010  * lang_currency_symbol - Computes an appropriate printed representation for
2011  * a currency identifier
2012  * return: currency string
2013  * curr(in): currency constant
2014  */
2015 const char *
2017 {
2018  return intl_get_money_symbol_console (curr);
2019 }
2020 
2021 #if defined (ENABLE_UNUSED_FUNCTION)
2022 /*
2023  * lang_char_mem_size - Returns the character memory size for the given
2024  * pointer to a character
2025  * return: memory size for the first character
2026  * p(in)
2027  */
2028 int
2029 lang_char_mem_size (const char *p)
2030 {
2032  {
2033  if (0x80 & (p[0]))
2034  {
2035  return 2;
2036  }
2037  }
2038  return 1;
2039 }
2040 
2041 /*
2042  * lang_char_screen_size - Returns the screen size for the given pointer
2043  * to a character
2044  * return: screen size for the first character
2045  * p(in)
2046  */
2047 int
2048 lang_char_screen_size (const char *p)
2049 {
2051  {
2052  return (0x80 & (p[0]) ? 2 : 1);
2053  }
2054  return 1;
2055 }
2056 
2057 /*
2058  * lang_wchar_mem_size - Returns the memory size for the given pointer
2059  * to a wide character
2060  * return: memory size for the first character
2061  * p(in)
2062  */
2063 int
2064 lang_wchar_mem_size (const wchar_t * p)
2065 {
2067  {
2068  if (0x8000 & (p[0]))
2069  {
2070  return 2;
2071  }
2072  }
2073  return 1;
2074 }
2075 
2076 /*
2077  * lang_wchar_screen_size - Returns the screen size for the given pointer
2078  * to a wide character
2079  * return: screen size for the first character
2080  * p(in)
2081  */
2082 int
2083 lang_wchar_screen_size (const wchar_t * p)
2084 {
2086  {
2087  return (0x8000 & (p[0]) ? 2 : 1);
2088  }
2089  return 1;
2090 }
2091 #endif
2092 
2093 /*
2094  * lang_check_identifier - Tests an identifier for possibility
2095  * return: true if the name is suitable for identifier,
2096  * false otherwise.
2097  * name(in): identifier name
2098  * length(in): identifier name length
2099  */
2100 bool
2101 lang_check_identifier (const char *name, int length)
2102 {
2103  bool ok = false;
2104  int i;
2105 
2106  if (name == NULL)
2107  {
2108  return false;
2109  }
2110 
2111  if (char_isalpha (name[0]))
2112  {
2113  ok = true;
2114  for (i = 0; i < length && ok; i++)
2115  {
2116  if (!char_isalnum (name[i]) && name[i] != '_')
2117  {
2118  ok = false;
2119  }
2120  }
2121  }
2122 
2123  return (ok);
2124 }
2125 
2126 /*
2127  * lang_locale - returns language locale per env settings.
2128  * return: language locale data
2129  */
2130 const LANG_LOCALE_DATA *
2132 {
2134  {
2135  assert (false);
2136  return NULL;
2137  }
2138  return lang_Loc_data;
2139 }
2140 
2141 /*
2142  * lang_get_specific_locale - returns language locale of a specific language
2143  * and codeset
2144  *
2145  * return: language locale data
2146  * lang(in):
2147  * codeset(in):
2148  *
2149  * Note : if codeset is INTL_CODESET_NONE, returns the first locale it
2150  * founds with requested language id, not matter the codeset.
2151  */
2152 const LANG_LOCALE_DATA *
2154 {
2156  {
2157  assert (false);
2158  return NULL;
2159  }
2160 
2161  if ((int) lang < lang_Count_locales)
2162  {
2163  LANG_LOCALE_DATA *first_lang_locale = lang_Loaded_locales[lang];
2164  LANG_LOCALE_DATA *curr_lang_locale;
2165 
2166  for (curr_lang_locale = first_lang_locale; curr_lang_locale != NULL;
2167  curr_lang_locale = curr_lang_locale->next_lld)
2168  {
2169  if (curr_lang_locale->codeset == codeset || codeset == INTL_CODESET_NONE)
2170  {
2171  return curr_lang_locale;
2172  }
2173  }
2174  }
2175 
2176  return NULL;
2177 }
2178 
2179 
2180 /*
2181  * lang_get_first_locale_for_lang - returns first locale for language
2182  * return: language locale data or NULL if language id is not valid
2183  * lang(in):
2184  */
2185 const LANG_LOCALE_DATA *
2187 {
2189  {
2190  assert (false);
2191  return NULL;
2192  }
2193 
2194  if ((int) lang < lang_Count_locales)
2195  {
2196  return lang_Loaded_locales[lang];
2197  }
2198 
2199  return NULL;
2200 }
2201 
2202 /*
2203  * lang_get_builtin_lang_id_from_name - returns the builtin language id from a
2204  * language name
2205  *
2206  * return: 0, if language name is accepted, non-zero otherwise
2207  * lang_name(in):
2208  * lang_id(out): language identifier
2209  *
2210  * Note : INTL_LANG_ENGLISH is returned if name is not a valid language name
2211  */
2212 static int
2214 {
2215  int i;
2216 
2217  assert (lang_id != NULL);
2218 
2219  *lang_id = INTL_LANG_ENGLISH;
2220 
2221  for (i = 0; i < (int) (sizeof (builtin_Langs) / sizeof (LANG_DEFAULTS)); i++)
2222  {
2223  if (strncasecmp (lang_name, builtin_Langs[i].lang_name, strlen (builtin_Langs[i].lang_name)) == 0)
2224  {
2225  *lang_id = builtin_Langs[i].lang;
2226  return 0;
2227  }
2228  }
2229 
2230  assert (*lang_id < INTL_LANG_USER_DEF_START);
2231 
2232  return 1;
2233 }
2234 
2235 /*
2236  * lang_get_lang_id_from_name - returns the language id from a language name
2237  *
2238  * return: 0, if language name is accepted, non-zero otherwise
2239  * lang_name(in):
2240  * lang_id(out): language identifier
2241  *
2242  * Note : INTL_LANG_ENGLISH is returned if name is not a valid language name
2243  */
2244 int
2246 {
2247  int i;
2248 
2249  assert (lang_id != NULL);
2250 
2251  *lang_id = INTL_LANG_ENGLISH;
2252 
2253  for (i = 0; i < lang_Count_locales; i++)
2254  {
2255  assert (lang_Loaded_locales[i] != NULL);
2256 
2257  if (strcasecmp (lang_name, lang_Loaded_locales[i]->lang_name) == 0)
2258  {
2259  assert (i == (int) lang_Loaded_locales[i]->lang_id);
2260  *lang_id = lang_Loaded_locales[i]->lang_id;
2261  return 0;
2262  }
2263  }
2264 
2265  return 1;
2266 }
2267 
2268 /*
2269  * lang_get_lang_name_from_id - returns the language name from a language id
2270  *
2271  * return: language name (NULL if lang_id is not valid)
2272  * lang_id(in):
2273  *
2274  */
2275 const char *
2277 {
2278  if ((int) lang_id < lang_Count_locales)
2279  {
2280  assert (lang_Loaded_locales[lang_id] != NULL);
2281  return lang_Loaded_locales[lang_id]->lang_name;
2282  }
2283 
2284  return NULL;
2285 }
2286 
2287 /*
2288  * lang_set_flag_from_lang - set a flag according to language string
2289  *
2290  * return: 0 if language string OK and flag was set, non-zero otherwise
2291  * lang_str(in): language string identier
2292  * has_user_format(in): true if user has given a format, false otherwise
2293  * has_user_lang(in): true if user has given a language, false otherwise
2294  * flag(out): bit flag : bit 0 is the user flag, bits 1 - 31 are for
2295  * language identification
2296  * Bit 0 : if set, the language was given by user
2297  * Bit 1 - 31 : INTL_LANG
2298  *
2299  * Note : function is used in context of some date-string functions.
2300  * If lang_str cannot be solved, the language is assumed English.
2301  */
2302 int
2303 lang_set_flag_from_lang (const char *lang_str, bool has_user_format, bool has_user_lang, int *flag)
2304 {
2306  int status = 0;
2307 
2308  if (lang_str != NULL)
2309  {
2310  status = lang_get_lang_id_from_name (lang_str, &lang);
2311  }
2312 
2313  if (lang_set_flag_from_lang_id (lang, has_user_format, has_user_lang, flag) == 0)
2314  {
2315  return status;
2316  }
2317 
2318  assert (lang == INTL_LANG_ENGLISH);
2319 
2320  return 1;
2321 }
2322 
2323 /*
2324  * lang_set_flag_from_lang - set a flag according to language identifier
2325  *
2326  * return: 0 if language string OK and flag was set, non-zero otherwise
2327  * lang(in): language identier
2328  * has_user_format(in): true if user has given a format, false otherwise
2329  * has_user_lang(in): true if user has given a language, false otherwise
2330  * flag(out): bit flag : bits 0 and 1 are user flags, bits 2 - 31 are for
2331  * language identification
2332  * Bit 0 : if set, the format was given by user
2333 * Bit 1 : if set, the language was given by user
2334  * Bit 2 - 31 : INTL_LANG
2335  * Consider change this flag to store the language as value
2336  * instead of as bit map
2337  *
2338  * Note : function is used in context of some date-string functions.
2339  */
2340 int
2341 lang_set_flag_from_lang_id (const INTL_LANG lang, bool has_user_format, bool has_user_lang, int *flag)
2342 {
2343  int lang_val = (int) lang;
2344 
2345  *flag = 0;
2346 
2347  *flag |= (has_user_format) ? 1 : 0;
2348  *flag |= (has_user_lang) ? 2 : 0;
2349 
2350  if (lang_val >= lang_Count_locales)
2351  {
2352  lang_val = (int) INTL_LANG_ENGLISH;
2353  *flag |= lang_val << 2;
2354  return 1;
2355  }
2356 
2357  *flag |= lang_val << 2;
2358 
2359  return 0;
2360 }
2361 
2362 /*
2363  * lang_get_lang_id_from_flag - get lang id from flag
2364  *
2365  * return: id of language, current language is returned when flag value is
2366  * invalid
2367  * flag(in): bit flag : bit 0 and 1 are user flags, bits 2 - 31 are for
2368  * language identification
2369  *
2370  * Note : function is used in context of some date-string functions.
2371  */
2372 INTL_LANG
2373 lang_get_lang_id_from_flag (const int flag, bool * has_user_format, bool * has_user_lang)
2374 {
2375  int lang_val;
2376 
2377  *has_user_format = ((flag & 0x1) == 0x1) ? true : false;
2378  *has_user_lang = ((flag & 0x2) == 0x2) ? true : false;
2379 
2380  lang_val = flag >> 2;
2381 
2382  if (lang_val >= 0 && lang_val < lang_Count_locales)
2383  {
2384  return (INTL_LANG) lang_val;
2385  }
2386 
2387  return lang_id ();
2388 }
2389 
2390 /*
2391  * lang_date_format_parse - Returns the default format of parsing date for the
2392  * required language or NULL if a the default format is not
2393  * available
2394  * lang_id (in):
2395  * codeset (in):
2396  * type (in): DB type for format
2397  * format_codeset (in): codeset of the format found
2398  *
2399  * Note: If a format for combination (lang_id, codeset) is not found, then
2400  * the first valid (non-NULL) format for lang_id and the codeset
2401  * are returned.
2402  *
2403  */
2404 const char *
2406  INTL_CODESET * format_codeset)
2407 {
2408  const LANG_LOCALE_DATA *lld;
2409  const char *format = NULL;
2410  const char *first_valid_format = NULL;
2411 
2412  assert (format_codeset != NULL);
2413 
2415 
2416  lld = lang_get_first_locale_for_lang (lang_id);
2417 
2418  if (lld == NULL)
2419  {
2420  return NULL;
2421  }
2422 
2423  do
2424  {
2425  switch (type)
2426  {
2427  case DB_TYPE_TIME:
2428  format = lld->time_format;
2429  break;
2430  case DB_TYPE_DATE:
2431  format = lld->date_format;
2432  break;
2433  case DB_TYPE_DATETIME:
2434  format = lld->datetime_format;
2435  break;
2436  case DB_TYPE_TIMESTAMP:
2437  format = lld->timestamp_format;
2438  break;
2439  case DB_TYPE_DATETIMETZ:
2440  format = lld->datetimetz_format;
2441  break;
2442  case DB_TYPE_TIMESTAMPTZ:
2443  format = lld->timestamptz_format;
2444  break;
2445  default:
2446  break;
2447  }
2448 
2449  if (lld->codeset == codeset)
2450  {
2451  *format_codeset = codeset;
2452  first_valid_format = format;
2453  break;
2454  }
2455 
2456  if (first_valid_format == NULL)
2457  {
2458  *format_codeset = lld->codeset;
2459  first_valid_format = format;
2460  }
2461 
2462  lld = lld->next_lld;
2463  }
2464  while (lld != NULL);
2465 
2466  return first_valid_format;
2467 }
2468 
2469 /*
2470  * lang_get_default_codeset - returns the default codeset to be used for a
2471  * given language identifier
2472  * return: codeset
2473  * intl_id(in):
2474  */
2475 static INTL_CODESET
2477 {
2478  unsigned int i;
2480 
2481  for (i = 0; i < sizeof (builtin_Langs) / sizeof (LANG_DEFAULTS); i++)
2482  {
2483  if (intl_id == builtin_Langs[i].lang)
2484  {
2485  codeset = builtin_Langs[i].codeset;
2486  break;
2487  }
2488  }
2489  return codeset;
2490 }
2491 
2492 /*
2493  * lang_is_codeset_allowed - checks if a combination of language and codeset
2494  * is allowed
2495  * return: true if combination is allowed, false otherwise
2496  * intl_id(in):
2497  * codeset(in):
2498  */
2499 static bool
2501 {
2502  unsigned int i;
2503 
2504  for (i = 0; i < sizeof (builtin_Langs) / sizeof (LANG_DEFAULTS); i++)
2505  {
2506  if (intl_id == builtin_Langs[i].lang && codeset == builtin_Langs[i].codeset)
2507  {
2508  return true;
2509  }
2510  }
2511  return false;
2512 }
2513 
2514 /*
2515  * lang_digit_grouping_symbol - Returns symbol used for grouping digits in
2516  * numbers
2517  * lang_id (in):
2518  */
2519 char
2521 {
2523 
2524  assert (lld != NULL);
2525 
2526  return lld->number_group_sym;
2527 }
2528 
2529 /*
2530  * lang_digit_fractional_symbol - Returns symbol used for fractional part of
2531  * numbers
2532  * lang_id (in):
2533  */
2534 char
2536 {
2538 
2539  assert (lld != NULL);
2540 
2541  return lld->number_decimal_sym;
2542 }
2543 
2544 /*
2545  * lang_get_txt_conv - Returns the information required for console text
2546  * conversion
2547  */
2550 {
2551  return console_Conv;
2552 }
2553 
2554 /*
2555  * lang_charset_name() - returns charset name
2556  *
2557  * return:
2558  * codeset(in):
2559  */
2560 const char *
2562 {
2563  int i;
2564 
2565  assert (codeset >= INTL_CODESET_BINARY && codeset <= INTL_CODESET_UTF8);
2566 
2567  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
2568  {
2569  if (codeset == lang_Db_charsets[i].charset_id)
2570  {
2571  return lang_Db_charsets[i].charset_name;
2572  }
2573  }
2574 
2575  return NULL;
2576 }
2577 
2578 /*
2579  * lang_charset_cubrid_name() - returns charset name
2580  *
2581  * return:
2582  * codeset(in):
2583  */
2584 const char *
2586 {
2587  int i;
2588 
2589  assert (codeset >= INTL_CODESET_BINARY && codeset <= INTL_CODESET_UTF8);
2590 
2591  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
2592  {
2593  if (codeset == lang_Db_charsets[i].charset_id)
2594  {
2595  return lang_Db_charsets[i].charset_cubrid_name;
2596  }
2597  }
2598 
2599  return NULL;
2600 }
2601 
2602 /*
2603  * lang_get_charset_env_string -
2604  * buf(out):
2605  * buf_size(in):
2606  * lang_name(in):
2607  * codeset(in):
2608  * return:
2609  */
2610 int
2611 lang_get_charset_env_string (char *buf, int buf_size, const char *lang_name, const INTL_CODESET codeset)
2612 {
2613  if (buf == NULL)
2614  {
2615  assert_release (0);
2616  return ER_FAILED;
2617  }
2618 
2619  if (!strcasecmp (lang_name, "en_US") && codeset == INTL_CODESET_ISO88591)
2620  {
2621  snprintf (buf, buf_size, "%s", lang_name);
2622  }
2623  else
2624  {
2625  snprintf (buf, buf_size, "%s.%s", lang_name, lang_charset_cubrid_name (codeset));
2626  }
2627 
2628  return NO_ERROR;
2629 }
2630 
2631 #if !defined (SERVER_MODE)
2632 /* client side charset and collation */
2634 
2635 /*
2636  * lang_db_put_charset - Saves the charset and language information into DB
2637  * return: error code
2638  *
2639  * Note: This is called during database creation; charset and language are
2640  * initialized with DB creation parameters.
2641  */
2642 int
2644 {
2645  INTL_CODESET server_codeset;
2646  INTL_LANG server_lang;
2647  DB_VALUE value;
2648  int au_save;
2649 
2650  server_codeset = lang_charset ();
2651 
2652  server_lang = lang_id ();
2653 
2654  AU_DISABLE (au_save);
2655  db_make_string (&value, lang_get_lang_name_from_id (server_lang));
2656  if (db_put_internal (Au_root, "lang", &value) != NO_ERROR)
2657  {
2658  /* Error Setting the language */
2659  assert (false);
2660  }
2661 
2662  pr_clear_value (&value);
2663 
2664  db_make_int (&value, (int) server_codeset);
2665  if (db_put_internal (Au_root, "charset", &value) != NO_ERROR)
2666  {
2667  /* Error Setting the nchar codeset */
2668  assert (false);
2669  }
2670  AU_ENABLE (au_save);
2671 
2672  return NO_ERROR;
2673 }
2674 
2675 /*
2676  * lang_charset_name_to_id - Returns the INTL_CODESET of the specified charset
2677  * return: NO_ERROR or error code if the specified name can't be found in
2678  * the lang_Db_charsets array
2679  * name(in): the name of the desired charset
2680  * codeset(out): INTL_CODESET of the desired charset
2681  */
2682 int
2684 {
2685  int i;
2686 
2687  /* Find the charset in the lang_Db_charsets array */
2688  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
2689  {
2690  if (strcmp (lang_Db_charsets[i].charset_name, name) == 0)
2691  {
2692  *codeset = lang_Db_charsets[i].charset_id;
2693  return NO_ERROR;
2694  }
2695  }
2696 
2697  return ER_FAILED;
2698 }
2699 
2700 /*
2701  * lang_get_client_charset - Gets Client's charset
2702  * return: codeset
2703  */
2706 {
2707  INTL_CODESET charset = LANG_SYS_CODESET;
2708  char *coll_name = prm_get_string_value (PRM_ID_INTL_COLLATION);
2709 
2710  if (coll_name != NULL)
2711  {
2712  LANG_COLLATION *lc = lang_get_collation_by_name (coll_name);
2713  if (lc != NULL)
2714  {
2715  charset = lc->codeset;
2716  }
2717  }
2718 
2719  return charset;
2720 }
2721 
2722 /*
2723  * lang_get_client_collation - Gets Client's charset
2724  * return: codeset
2725  */
2726 int
2728 {
2729  int coll_id = LANG_SYS_COLLATION;
2730  char *coll_name = prm_get_string_value (PRM_ID_INTL_COLLATION);
2731 
2732  if (coll_name != NULL)
2733  {
2734  LANG_COLLATION *lc = lang_get_collation_by_name (coll_name);
2735  if (lc != NULL)
2736  {
2737  coll_id = lc->coll.coll_id;
2738  }
2739  }
2740 
2741  return coll_id;
2742 }
2743 
2744 /*
2745  * lang_set_parser_use_client_charset - set if next parsing operation should
2746  * use client's setting of charset and
2747  * collation
2748  */
2749 void
2751 {
2753 }
2754 
2755 /*
2756  * lang_get_parser_use_client_charset - checks if parser should use client's
2757  * charset and collation
2758  * return:
2759  */
2760 bool
2762 {
2764 }
2765 
2766 #endif /* !SERVER_MODE */
2767 
2768 /*
2769  * lang_charset_cubrid_name_to_id - Returns the INTL_CODESET of the charset
2770  * with CUBRID name
2771  * return: codeset id, INTL_CODESET_NONE if not found
2772  * name(in): the name of the desired charset
2773  */
2776 {
2777  int current_codeset = INTL_CODESET_BINARY;
2778 
2779  while (current_codeset <= INTL_CODESET_LAST)
2780  {
2781  if (strcasecmp (name, lang_Db_charsets[current_codeset].charset_cubrid_name) == 0)
2782  {
2783  return (INTL_CODESET) current_codeset;
2784  }
2785  current_codeset++;
2786  }
2787 
2788  return INTL_CODESET_NONE;
2789 }
2790 
2791 /*
2792  * lang_charset_introducer() - returns introducer text to print for a charset
2793  *
2794  * return: charset introducer or NULL if not found
2795  * codeset(in):
2796  */
2797 const char *
2799 {
2800  int i;
2801 
2802  assert (codeset >= INTL_CODESET_BINARY && codeset <= INTL_CODESET_UTF8);
2803 
2804  for (i = 0; lang_Db_charsets[i].charset_id != INTL_CODESET_NONE; i++)
2805  {
2806  if (codeset == lang_Db_charsets[i].charset_id)
2807  {
2808  return lang_Db_charsets[i].introducer;
2809  }
2810  }
2811 
2812  return NULL;
2813 }
2814 
2815 
2816 /* Collation functions */
2817 
2818 /*
2819  * lang_strcmp_utf8() - string compare for UTF8
2820  * return:
2821  * lang_coll(in) : collation data
2822  * string1(in):
2823  * size1(in):
2824  * string2(in):
2825  * size2(in):
2826  */
2827 static int
2828 lang_strcmp_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
2829  const unsigned char *str2, const int size2, bool ignore_trailing_space)
2830 {
2831  return lang_strmatch_utf8 (lang_coll, false, str1, size1, str2, size2, NULL, false, NULL, ignore_trailing_space);
2832 }
2833 
2834 /*
2835  * lang_strmatch_utf8() - string match and compare for UTF8 collations
2836  *
2837  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
2838  * lang_coll(in) : collation data
2839  * is_match(in) : true if match, otherwise is compare
2840  * str1(in):
2841  * size1(in):
2842  * str2(in): this is the pattern string in case of match
2843  * size2(in):
2844  * escape(in): pointer to escape character (multi-byte allowed)
2845  * (used in context of LIKE)
2846  * has_last_escape(in): true if it should check if last character is the
2847  * escape character
2848  * str1_match_size(out): size from str1 which is matched with str2
2849  */
2850 static int
2851 lang_strmatch_utf8 (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
2852  const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
2853  int *str1_match_size, bool ignore_trailing_space)
2854 {
2855  const unsigned char *str1_end;
2856  const unsigned char *str2_end;
2857  const unsigned char *str1_begin;
2858  unsigned char *str1_next, *str2_next;
2859  unsigned int cp1, cp2, w_cp1, w_cp2;
2860  const int alpha_cnt = lang_coll->coll.w_count;
2861  const unsigned int *weight_ptr = lang_coll->coll.weights;
2862 
2863  if (lang_coll->built_in && ignore_trailing_space)
2864  {
2865  weight_ptr = lang_coll->coll.weights_ti;
2866  }
2867 
2868  str1_begin = str1;
2869  str1_end = str1 + size1;
2870  str2_end = str2 + size2;
2871 
2872  for (; str1 < str1_end && str2 < str2_end;)
2873  {
2874  assert (str1_end - str1 > 0);
2875  assert (str2_end - str2 > 0);
2876 
2877  cp1 = intl_utf8_to_cp (str1, CAST_BUFLEN (str1_end - str1), &str1_next);
2878  cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);
2879 
2880  if (is_match && escape != NULL && memcmp (str2, escape, str2_next - str2) == 0)
2881  {
2882  if (!(has_last_escape && str2_next >= str2_end))
2883  {
2884  str2 = str2_next;
2885  cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);
2886  }
2887  }
2888 
2889  if (cp1 < (unsigned int) alpha_cnt)
2890  {
2891  if (cp1 == SPACE)
2892  {
2893  w_cp1 = ZERO;
2894  }
2895  else
2896  {
2897  w_cp1 = weight_ptr[cp1];
2898  }
2899  }
2900  else
2901  {
2902  w_cp1 = cp1;
2903  }
2904 
2905  if (cp2 < (unsigned int) alpha_cnt)
2906  {
2907  if (cp2 == SPACE)
2908  {
2909  w_cp2 = ZERO;
2910  }
2911  else
2912  {
2913  w_cp2 = weight_ptr[cp2];
2914  }
2915  }
2916  else
2917  {
2918  w_cp2 = cp2;
2919  }
2920 
2921  if (w_cp1 != w_cp2)
2922  {
2923  return (w_cp1 < w_cp2) ? (-1) : 1;
2924  }
2925 
2926  str1 = str1_next;
2927  str2 = str2_next;
2928  }
2929 
2930  size1 = CAST_BUFLEN (str1_end - str1);
2931  size2 = CAST_BUFLEN (str2_end - str2);
2932 
2933  assert (size1 == 0 || size2 == 0);
2934 
2935  if (is_match)
2936  {
2937  assert (str1_match_size != NULL);
2938  *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
2939  }
2940 
2941  if (size1 == size2)
2942  {
2943  return 0;
2944  }
2945  else if (size2 > 0)
2946  {
2947  if (is_match || !ignore_trailing_space)
2948  {
2949  return -1;
2950  }
2951 
2952  if (lang_str_utf8_trail_zero_weights (lang_coll, str2, CAST_BUFLEN (str2_end - str2)) != 0)
2953  {
2954  return -1;
2955  }
2956  }
2957  else
2958  {
2959  assert (size1 > 0);
2960 
2961  if (is_match)
2962  {
2963  return 0;
2964  }
2965 
2966  if (!ignore_trailing_space)
2967  {
2968  return 1;
2969  }
2970 
2971  if (lang_str_utf8_trail_zero_weights (lang_coll, str1, CAST_BUFLEN (str1_end - str1)) != 0)
2972  {
2973  return 1;
2974  }
2975  }
2976 
2977  return 0;
2978 }
2979 
2980 /*
2981  * lang_strcmp_utf8_w_contr() - string compare for UTF8 for a collation
2982  * having UCA contractions
2983  * return:
2984  * lang_coll(in) : collation data
2985  * string1(in):
2986  * size1(in):
2987  * string2(in):
2988  * size2(in):
2989  */
2990 static int
2991 lang_strcmp_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
2992  const unsigned char *str2, const int size2, bool ignore_trailing_space)
2993 {
2994  return lang_strmatch_utf8_w_contr (lang_coll, false, str1, size1, str2, size2, NULL, false, NULL,
2995  ignore_trailing_space);
2996 }
2997 
2998 /*
2999  * lang_strmatch_utf8_w_contr() - string match or compare for UTF8 for a
3000  * collation having UCA contractions
3001  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
3002  * lang_coll(in) : collation data
3003  * is_match(in) : true if match, otherwise is compare
3004  * str1(in):
3005  * size1(in):
3006  * str2(in): this is the pattern string in case of match
3007  * size2(in):
3008  * escape(in): pointer to escape character (multi-byte allowed)
3009  * (used in context of LIKE)
3010  * has_last_escape(in): true if it should check if last character is the
3011  * escape character
3012  * str1_match_size(out): size from str1 which is matched with str2
3013  */
3014 static int
3015 lang_strmatch_utf8_w_contr (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
3016  const unsigned char *str2, int size2, const unsigned char *escape,
3017  const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
3018 {
3019  const unsigned char *str1_end;
3020  const unsigned char *str2_end;
3021  const unsigned char *str1_begin;
3022  unsigned char *str1_next, *str2_next;
3023  unsigned int cp1, cp2, w_cp1, w_cp2;
3024  const COLL_DATA *coll = &(lang_coll->coll);
3025  const int alpha_cnt = coll->w_count;
3026  const unsigned int *weight_ptr = lang_coll->coll.weights;
3027 
3028  bool is_str1_contr = false;
3029  bool is_str2_contr = false;
3030 
3031  str1_end = str1 + size1;
3032  str2_end = str2 + size2;
3033  str1_begin = str1;
3034 
3035  for (; str1 < str1_end && str2 < str2_end;)
3036  {
3037  assert (str1_end - str1 > 0);
3038  assert (str2_end - str2 > 0);
3039 
3040  cp1 = intl_utf8_to_cp (str1, CAST_BUFLEN (str1_end - str1), &str1_next);
3041  cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);
3042 
3043  if (is_match && escape != NULL && memcmp (str2, escape, str2_next - str2) == 0)
3044  {
3045  if (!(has_last_escape && str2_next >= str2_end))
3046  {
3047  str2 = str2_next;
3048  cp2 = intl_utf8_to_cp (str2, CAST_BUFLEN (str2_end - str2), &str2_next);
3049  }
3050  }
3051 
3052  is_str1_contr = is_str2_contr = false;
3053 
3054  if (cp1 < (unsigned int) alpha_cnt)
3055  {
3056  COLL_CONTRACTION *contr = NULL;
3057 
3058  if (str1_end - str1 >= coll->contr_min_size && cp1 >= coll->cp_first_contr_offset
3059  && cp1 < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
3060  && ((contr = lang_get_contr_for_string (coll, str1, CAST_BUFLEN (str1_end - str1), cp1)) != NULL))
3061  {
3062  assert (contr != NULL);
3063 
3064  w_cp1 = contr->wv;
3065  str1_next = (unsigned char *) str1 + contr->size;
3066  is_str1_contr = true;
3067  }
3068  else
3069  {
3070  w_cp1 = weight_ptr[cp1];
3071  }
3072  }
3073  else
3074  {
3075  w_cp1 = cp1;
3076  }
3077 
3078  if (cp2 < (unsigned int) alpha_cnt)
3079  {
3080  COLL_CONTRACTION *contr = NULL;
3081 
3082  if (str2_end - str2 >= coll->contr_min_size && cp2 >= coll->cp_first_contr_offset
3083  && cp2 < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
3084  && ((contr = lang_get_contr_for_string (coll, str2, CAST_BUFLEN (str2_end - str2), cp2)) != NULL))
3085  {
3086  assert (contr != NULL);
3087 
3088  w_cp2 = contr->wv;
3089  str2_next = (unsigned char *) str2 + contr->size;
3090  is_str2_contr = true;
3091  }
3092  else
3093  {
3094  w_cp2 = weight_ptr[cp2];
3095  }
3096  }
3097  else
3098  {
3099  w_cp2 = cp2;
3100  }
3101 
3102  if (is_match && coll->uca_opt.sett_match_contr == MATCH_CONTR_BOUND_ALLOW && !is_str2_contr && is_str1_contr
3103  && cp1 == cp2)
3104  {
3105  /* re-read weight for str1 ignoring contractions */
3106  if (cp1 < (unsigned int) alpha_cnt)
3107  {
3108  w_cp1 = weight_ptr[cp1];
3109  }
3110  else
3111  {
3112  w_cp1 = cp1;
3113  }
3114  str1_next = (unsigned char *) str1 + intl_Len_utf8_char[*str1];
3115  }
3116 
3117  if (w_cp1 != w_cp2)
3118  {
3119  return (w_cp1 < w_cp2) ? (-1) : 1;
3120  }
3121 
3122  str1 = str1_next;
3123  str2 = str2_next;
3124  }
3125 
3126  size1 = CAST_BUFLEN (str1_end - str1);
3127  size2 = CAST_BUFLEN (str2_end - str2);
3128 
3129  assert (size1 == 0 || size2 == 0);
3130 
3131  if (is_match)
3132  {
3133  assert (str1_match_size != NULL);
3134  *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
3135  }
3136 
3137  if (size1 == size2)
3138  {
3139  return 0;
3140  }
3141  else if (size2 > 0)
3142  {
3143  if (is_match || !ignore_trailing_space)
3144  {
3145  return -1;
3146  }
3147 
3148  /* use same function as for collation without contractions : we suppose that there are no contractions with zero
3149  * weights or having starting codepoints with zero weight */
3150  if (lang_str_utf8_trail_zero_weights (lang_coll, str2, CAST_BUFLEN (str2_end - str2)) != 0)
3151  {
3152  return -1;
3153  }
3154  }
3155  else
3156  {
3157  assert (size1 > 0);
3158  if (is_match)
3159  {
3160  return 0;
3161  }
3162 
3163  if (!ignore_trailing_space)
3164  {
3165  return 1;
3166  }
3167 
3168  /* same function as for collation without contractions */
3169  if (lang_str_utf8_trail_zero_weights (lang_coll, str1, CAST_BUFLEN (str1_end - str1)) != 0)
3170  {
3171  return 1;
3172  }
3173  }
3174 
3175  return 0;
3176 }
3177 
3178 #define ADD_TO_HASH(pseudo_key, w) \
3179  do { \
3180  unsigned int i; \
3181  pseudo_key = (pseudo_key << 4) + w; \
3182  i = pseudo_key & 0xf0000000; \
3183  if (i != 0) \
3184  { \
3185  pseudo_key ^= i >> 24; \
3186  pseudo_key ^= i; \
3187  } \
3188  } while (0)
3189 
3190 /*
3191  * lang_mht2str_utf8() - computes hash 2 style for a UTF-8 string having
3192  * collation without expansions
3193  *
3194  * return: hash value
3195  * lang_coll(in) : collation data
3196  * str(in):
3197  * size(in):
3198  */
3199 static unsigned int
3200 lang_mht2str_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
3201 {
3202  const unsigned char *str_end;
3203  unsigned char *str_next;
3204  unsigned int cp, w;
3205  const COLL_DATA *coll = &(lang_coll->coll);
3206  const int alpha_cnt = coll->w_count;
3207  const unsigned int *weight_ptr = lang_coll->coll.weights;
3208  unsigned int pseudo_key = 0;
3209 
3210  str_end = str + size;
3211 
3212  for (; str < str_end;)
3213  {
3214  assert (str_end - str > 0);
3215 
3216  cp = intl_utf8_to_cp (str, CAST_BUFLEN (str_end - str), &str_next);
3217 
3218  if (cp < (unsigned int) alpha_cnt)
3219  {
3220  COLL_CONTRACTION *contr = NULL;
3221 
3222  if (coll->count_contr > 0 && str_end - str >= coll->contr_min_size && cp >= coll->cp_first_contr_offset
3223  && cp < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
3224  && ((contr = lang_get_contr_for_string (coll, str, CAST_BUFLEN (str_end - str), cp)) != NULL))
3225  {
3226  assert (contr != NULL);
3227 
3228  w = contr->wv;
3229  str_next = (unsigned char *) str + contr->size;
3230  }
3231  else
3232  {
3233  w = weight_ptr[cp];
3234  }
3235  }
3236  else
3237  {
3238  w = cp;
3239  }
3240 
3241  ADD_TO_HASH (pseudo_key, w);
3242 
3243  str = str_next;
3244  }
3245 
3246  return pseudo_key;
3247 }
3248 
3249 /*
3250  * lang_get_w_first_el() - get the weight of the first element (character or
3251  * contraction) encountered in the string
3252  *
3253  * return: weight value
3254  * coll_data(in): collation data
3255  * str(in): buffer to check for contractions
3256  * str_size(in): size of buffer (bytes)
3257  * next_char(out): pointer to the end of element (next character)
3258  *
3259  * Note : This function works only on UTF-8 collations without expansions.
3260  *
3261  */
3262 static unsigned int
3263 lang_get_w_first_el (const COLL_DATA * coll, const unsigned char *str, const int str_size, unsigned char **next_char,
3264  bool ignore_trailing_space)
3265 {
3266  unsigned int cp, w;
3267  const int alpha_cnt = coll->w_count;
3268  const unsigned int *weight_ptr = coll->weights;
3269 
3270  assert (coll->uca_exp_num == 0);
3271  assert (str_size > 0);
3272  assert (next_char != NULL);
3273 
3274  cp = intl_utf8_to_cp (str, str_size, next_char);
3275  if (cp < (unsigned int) alpha_cnt)
3276  {
3277  COLL_CONTRACTION *contr = NULL;
3278 
3279  if (coll->count_contr > 0 && str_size >= coll->contr_min_size && cp >= coll->cp_first_contr_offset
3280  && cp < (coll->cp_first_contr_offset + coll->cp_first_contr_count)
3281  && ((contr = lang_get_contr_for_string (coll, str, str_size, cp)) != NULL))
3282  {
3283  assert (contr != NULL);
3284 
3285  w = contr->wv;
3286  *next_char = (unsigned char *) str + contr->size;
3287  }
3288  else
3289  {
3290  if (cp == ASCII_SPACE && ignore_trailing_space)
3291  {
3292  return 0;
3293  }
3294  w = weight_ptr[cp];
3295  }
3296  }
3297  else
3298  {
3299  w = cp;
3300  }
3301 
3302  return w;
3303 }
3304 
3305 /*
3306  * lang_get_contr_for_string() - checks if the string starts with a
3307  * contraction
3308  *
3309  * return: contraction pointer or NULL if no contraction is found
3310  * coll_data(in): collation data
3311  * str(in): buffer to check for contractions
3312  * str_size(in): size of buffer (bytes)
3313  * cp(in): codepoint of first character in 'str'
3314  *
3315  */
3316 static COLL_CONTRACTION *
3317 lang_get_contr_for_string (const COLL_DATA * coll_data, const unsigned char *str, const int str_size, unsigned int cp)
3318 {
3319  const int *first_contr;
3320  int contr_id;
3321  COLL_CONTRACTION *contr;
3322  int cmp;
3323 
3324  assert (coll_data != NULL);
3325  assert (coll_data->count_contr > 0);
3326 
3327  assert (str != NULL);
3328  assert (str_size >= coll_data->contr_min_size);
3329 
3330  first_contr = coll_data->cp_first_contr_array;
3331  assert (first_contr != NULL);
3332  contr_id = first_contr[cp - coll_data->cp_first_contr_offset];
3333 
3334  if (contr_id == -1)
3335  {
3336  return NULL;
3337  }
3338 
3339  assert (contr_id >= 0 && contr_id < coll_data->count_contr);
3340  contr = &(coll_data->contr_list[contr_id]);
3341 
3342  do
3343  {
3344  if ((int) contr->size > str_size)
3345  {
3346  cmp = memcmp (contr->c_buf, str, str_size);
3347  if (cmp == 0)
3348  {
3349  cmp = 1;
3350  }
3351  }
3352  else
3353  {
3354  cmp = memcmp (contr->c_buf, str, contr->size);
3355  }
3356 
3357  if (cmp >= 0)
3358  {
3359  break;
3360  }
3361 
3362  assert (cmp < 0);
3363 
3364  contr++;
3365  contr_id++;
3366 
3367  }
3368  while (contr_id < coll_data->count_contr);
3369 
3370  if (cmp != 0)
3371  {
3372  contr = NULL;
3373  }
3374 
3375  return contr;
3376 }
3377 
3378 static UCA_L13_W uca_l13_max_weight = 0xffffffff;
3379 static UCA_L4_W uca_l4_max_weight = 0xffff;
3380 
3381 /*
3382  * lang_get_uca_w_l13() - returns pointer to array of CEs of first collatable
3383  * element in string (codepoint or contraction) and
3384  * number of CEs in this array
3385  * return:
3386  * coll_data(in): collation data
3387  * use_contractions(in):
3388  * str(in): string to get weights for
3389  * size(in): size of string (bytes)
3390  * uca_w_l13(out): pointer to weight array
3391  * num_ce(out): number of Collation Elements
3392  * str_next(out): pointer to next collatable element in string
3393  * cp_out(out): bit field value : codepoint value, and if contraction is
3394  * found than INTL_MASK_CONTR mask is set (MSB)
3395  */
3396 static void
3397 lang_get_uca_w_l13 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str, const int size,
3398  UCA_L13_W ** uca_w_l13, int *num_ce, unsigned char **str_next, unsigned int *cp_out)
3399 {
3400  unsigned int cp;
3401  const int alpha_cnt = coll_data->w_count;
3402  const int exp_num = coll_data->uca_exp_num;
3403 
3404  assert (size > 0);
3405 
3406  cp = intl_utf8_to_cp (str, size, str_next);
3407 
3408  *cp_out = cp;
3409 
3410  if (cp < (unsigned int) alpha_cnt)
3411  {
3412  COLL_CONTRACTION *contr = NULL;
3413 
3414  if (use_contractions && coll_data->count_contr > 0 && size >= coll_data->contr_min_size
3415  && cp >= coll_data->cp_first_contr_offset
3416  && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
3417  && ((contr = lang_get_contr_for_string (coll_data, str, size, cp)) != NULL))
3418  {
3419  assert (contr != NULL);
3420  *uca_w_l13 = contr->uca_w_l13;
3421  *num_ce = contr->uca_num;
3422  *str_next = (unsigned char *) str + contr->size;
3423  *cp_out = INTL_MASK_CONTR | cp;
3424  }
3425  else
3426  {
3427  *uca_w_l13 = &(coll_data->uca_w_l13[cp * exp_num]);
3428  *num_ce = coll_data->uca_num[cp];
3429  /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
3430  }
3431  }
3432  else
3433  {
3434  *uca_w_l13 = &uca_l13_max_weight;
3435  *num_ce = 1;
3436  /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
3437  }
3438 }
3439 
3440 
3441 /*
3442  * lang_get_uca_back_weight_l13() - returns pointer to array of CEs of
3443  * previous collatable element in string and
3444  * number of CEs in this array
3445  *
3446  * return:
3447  * coll_data(in): collation data
3448  * use_contractions(in):
3449  * str(in): string to get weights for
3450  * size(in): size of string (bytes)
3451  * uca_w_l13(out): pointer to weight array
3452  * num_ce(out): number of Collation Elements
3453  * str_next(out): pointer to next collatable element in string
3454  * cp_out(out): bit field value : codepoint value, and if contraction is
3455  * found than INTL_MASK_CONTR mask is set (MSB)
3456  */
3457 static void
3458 lang_get_uca_back_weight_l13 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str_start,
3459  const unsigned char *str_last, UCA_L13_W ** uca_w_l13, int *num_ce,
3460  unsigned char **str_prev, unsigned int *cp_out)
3461 {
3462  unsigned int cp;
3463  const int alpha_cnt = coll_data->w_count;
3464  const int exp_num = coll_data->uca_exp_num;
3465 
3466  assert (str_prev != NULL);
3467  assert (cp_out != NULL);
3468  assert (str_start <= str_last);
3469 
3470  cp = intl_back_utf8_to_cp (str_start, str_last, str_prev);
3471  *cp_out = cp;
3472 
3473  if (cp < (unsigned int) alpha_cnt)
3474  {
3475  COLL_CONTRACTION *contr = NULL;
3476  unsigned int cp_prev;
3477  unsigned char *str_prev_prev = NULL;
3478 
3479  if (*str_prev >= str_start)
3480  {
3481  cp_prev = intl_back_utf8_to_cp (str_start, *str_prev, &str_prev_prev);
3482 
3483  if (use_contractions && coll_data->count_contr > 0 && cp_prev < (unsigned int) alpha_cnt
3484  && str_last - *str_prev >= coll_data->contr_min_size && cp >= coll_data->cp_first_contr_offset
3485  && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
3486  && ((contr = lang_get_contr_for_string (coll_data, str_prev_prev + 1,
3487  CAST_BUFLEN (str_last - str_prev_prev), cp_prev)) != NULL))
3488  {
3489  assert (contr != NULL);
3490  *uca_w_l13 = contr->uca_w_l13;
3491  *num_ce = contr->uca_num;
3492  *str_prev = str_prev_prev;
3493  *cp_out = INTL_MASK_CONTR | cp_prev;
3494  return;
3495  }
3496  }
3497 
3498  *uca_w_l13 = &(coll_data->uca_w_l13[cp * exp_num]);
3499  *num_ce = coll_data->uca_num[cp];
3500  /* leave str_prev pointer to the one returned by intl_back_utf8_to_cp */
3501  }
3502  else
3503  {
3504  *uca_w_l13 = &uca_l13_max_weight;
3505  *num_ce = 1;
3506  /* leave str_prev pointer to the one returned by 'intl_back_utf8_to_cp' */
3507  }
3508 }
3509 
3510 /*
3511  * lang_get_uca_w_l4() - returns pointer to array of CEs of first collatable
3512  * element in string (codepoint or contraction) and
3513  * number of CEs in this array
3514  * return:
3515  * coll_data(in): collation data
3516  * use_contractions(in):
3517  * str(in): string to get weights for
3518  * size(in): size of string (bytes)
3519  * uca_w_l13(out): pointer to weight array
3520  * num_ce(out): number of Collation Elements
3521  * str_next(out): pointer to next collatable element in string
3522  * cp_out(out): bit field value : codepoint value, and if contraction is
3523  * found than INTL_MASK_CONTR mask is set (MSB)
3524  *
3525  */
3526 static void
3527 lang_get_uca_w_l4 (const COLL_DATA * coll_data, const bool use_contractions, const unsigned char *str, const int size,
3528  UCA_L4_W ** uca_w_l4, int *num_ce, unsigned char **str_next, unsigned int *cp_out)
3529 {
3530  unsigned int cp;
3531  const int alpha_cnt = coll_data->w_count;
3532  const int exp_num = coll_data->uca_exp_num;
3533 
3534  assert (size > 0);
3535 
3536  cp = intl_utf8_to_cp (str, size, str_next);
3537 
3538  if (cp < (unsigned int) alpha_cnt)
3539  {
3540  COLL_CONTRACTION *contr = NULL;
3541 
3542  if (use_contractions && coll_data->count_contr > 0 && size >= coll_data->contr_min_size
3543  && cp >= coll_data->cp_first_contr_offset
3544  && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
3545  && ((contr = lang_get_contr_for_string (coll_data, str, size, cp)) != NULL))
3546  {
3547  assert (contr != NULL);
3548  *uca_w_l4 = contr->uca_w_l4;
3549  *num_ce = contr->uca_num;
3550  *str_next = (unsigned char *) str + contr->size;
3551  *cp_out = INTL_MASK_CONTR | cp;
3552  }
3553  else
3554  {
3555  *uca_w_l4 = &(coll_data->uca_w_l4[cp * exp_num]);
3556  *num_ce = coll_data->uca_num[cp];
3557  /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
3558  }
3559  }
3560  else
3561  {
3562  *uca_w_l4 = &uca_l4_max_weight;
3563  *num_ce = 1;
3564  /* leave next pointer to the one returned by 'intl_utf8_to_cp' */
3565  }
3566 }
3567 
3568 /* retrieve UCA weight level:
3569  * l = level
3570  * i = position weight array
3571  * l13w = array of compressed weight for levels 1,2,3
3572  * l4w = array of weight level 4
3573  */
3574 #define GET_UCA_WEIGHT(l, i, l13w, l4w) \
3575  ((l == 0) ? (UCA_GET_L1_W (l13w[i])) : \
3576  (l == 1) ? (UCA_GET_L2_W (l13w[i])) : \
3577  (l == 2) ? (UCA_GET_L3_W (l13w[i])) : (l4w[i]))
3578 
3579 #define INTL_CONTR_FOUND(v) (((v) & INTL_MASK_CONTR) == INTL_MASK_CONTR)
3580 /*
3581  * lang_strmatch_utf8_uca_w_level() - string match or compare for UTF8
3582  * collation employing full UCA weights (expansions and contractions)
3583  *
3584  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
3585  * coll_data(in) : collation data
3586  * level(in) : current UCA level to compare
3587  * is_match(in) : true if match, otherwise is compare
3588  * str1(in):
3589  * size1(in):
3590  * str2(in): this is the pattern string in case of match
3591  * size2(in):
3592  * escape(in): pointer to escape character (multi-byte allowed)
3593  * (used in context of LIKE)
3594  * has_last_escape(in): true if it should check if last character is the
3595  * escape character
3596  * offset_next_level(in/out) : offset in bytes from which to start the
3597  * compare; used to avoid compare between
3598  * binary identical part in consecutive compare
3599  * levels
3600  * str1_match_size(out): size from str1 which is matched with str2
3601  */
3602 static int
3603 lang_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, const int level, bool is_match, const unsigned char *str1,
3604  const int size1, const unsigned char *str2, const int size2,
3605  const unsigned char *escape, const bool has_last_escape, int *offset_next_level,
3606  int *str1_match_size, bool ignore_trailing_space)
3607 {
3608  const unsigned char *str1_end;
3609  const unsigned char *str2_end;
3610  const unsigned char *str1_begin;
3611  unsigned char *str1_next, *str2_next;
3612  UCA_L13_W *uca_w_l13_1 = NULL;
3613  UCA_L13_W *uca_w_l13_2 = NULL;
3614  UCA_L4_W *uca_w_l4_1 = NULL;
3615  UCA_L4_W *uca_w_l4_2 = NULL;
3616  int num_ce1 = 0, num_ce2 = 0;
3617  int ce_index1 = 0, ce_index2 = 0;
3618  unsigned int w1 = 0, w2 = 0;
3619 
3620  bool compute_offset = false;
3621  unsigned int str1_cp_contr = 0, str2_cp_contr = 0;
3622  int cmp_offset = 0;
3623 
3624  int result = 0;
3625 
3626  assert (offset_next_level != NULL && *offset_next_level > -1);
3627  assert (level >= 0 && level <= 4);
3628 
3629  str1_end = str1 + size1;
3630  str2_end = str2 + size2;
3631  str1_begin = str1;
3632 
3633  if (level == 0)
3634  {
3635  assert (*offset_next_level == 0);
3636  compute_offset = true;
3637  }
3638  else
3639  {
3640  cmp_offset = *offset_next_level;
3641  if (cmp_offset > 0)
3642  {
3643  assert (cmp_offset <= size1);
3644  assert (cmp_offset <= size2);
3645  str1 += cmp_offset;
3646  str2 += cmp_offset;
3647 
3648  }
3649  compute_offset = false;
3650  }
3651 
3652  str1_next = (unsigned char *) str1;
3653  str2_next = (unsigned char *) str2;
3654 
3655  for (;;)
3656  {
3657  read_weights1:
3658  if (num_ce1 == 0)
3659  {
3660  str1 = str1_next;
3661  if (str1 >= str1_end)
3662  {
3663  goto read_weights2;
3664  }
3665 
3666  if (level == 3)
3667  {
3668  lang_get_uca_w_l4 (coll_data, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l4_1, &num_ce1,
3669  &str1_next, &str1_cp_contr);
3670  }
3671  else
3672  {
3673  lang_get_uca_w_l13 (coll_data, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1,
3674  &str1_next, &str1_cp_contr);
3675  }
3676  assert (num_ce1 > 0);
3677 
3678  ce_index1 = 0;
3679  }
3680 
3681  read_weights2:
3682  if (num_ce2 == 0)
3683  {
3684  int c_size;
3685 
3686  str2 = str2_next;
3687  if (str2 >= str2_end)
3688  {
3689  goto compare;
3690  }
3691 
3692  if (is_match && escape != NULL && intl_cmp_char (str2, escape, INTL_CODESET_UTF8, &c_size) == 0)
3693  {
3694  if (!(has_last_escape && str2 + c_size >= str2_end))
3695  {
3696  str2 += c_size;
3697  }
3698  }
3699 
3700  if (level == 3)
3701  {
3702  lang_get_uca_w_l4 (coll_data, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l4_2, &num_ce2,
3703  &str2_next, &str1_cp_contr);
3704  }
3705  else
3706  {
3707  lang_get_uca_w_l13 (coll_data, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l13_2, &num_ce2,
3708  &str2_next, &str2_cp_contr);
3709  }
3710 
3711  if (is_match && coll_data->uca_opt.sett_match_contr == MATCH_CONTR_BOUND_ALLOW
3712  && !INTL_CONTR_FOUND (str2_cp_contr) && INTL_CONTR_FOUND (str1_cp_contr) && ce_index1 == 0
3713  && str2_cp_contr == (str1_cp_contr & (~INTL_MASK_CONTR)))
3714  {
3715  /* re-compute weight of str1 without considering contractions */
3716  if (level == 3)
3717  {
3718  lang_get_uca_w_l4 (coll_data, false, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l4_1, &num_ce1,
3719  &str1_next, &str1_cp_contr);
3720  }
3721  else
3722  {
3723  lang_get_uca_w_l13 (coll_data, false, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1,
3724  &str1_next, &str1_cp_contr);
3725  }
3726  assert (num_ce1 > 0);
3727  }
3728 
3729  assert (num_ce2 > 0);
3730 
3731  ce_index2 = 0;
3732  }
3733 
3734  if (compute_offset)
3735  {
3736  if (ce_index1 == 0 && ce_index2 == 0)
3737  {
3738  if (!INTL_CONTR_FOUND (str1_cp_contr) && str1_cp_contr == str2_cp_contr)
3739  {
3740  assert (!INTL_CONTR_FOUND (str2_cp_contr));
3741  cmp_offset += CAST_BUFLEN (str1_next - str1);
3742  }
3743  else
3744  {
3745  compute_offset = false;
3746  }
3747  }
3748  else if (ce_index1 != ce_index2)
3749  {
3750  compute_offset = false;
3751  }
3752  }
3753 
3754  compare:
3755  if (num_ce1 == 0 && str1 >= str1_end)
3756  {
3757  /* str1 was consumed */
3758  if (num_ce2 == 0)
3759  {
3760  if (str2 >= str2_end)
3761  {
3762  /* both strings consumed and equal */
3763  assert (result == 0);
3764  goto exit;
3765  }
3766  else
3767  {
3768  if (is_match || !ignore_trailing_space)
3769  {
3770  result = -1;
3771  goto exit;
3772  }
3773  goto read_weights2;
3774  }
3775  }
3776 
3777  assert (num_ce2 > 0);
3778  if (is_match && *str2 == ASCII_SPACE)
3779  {
3780  /* trailing spaces are not matched */
3781  result = -1;
3782  goto exit;
3783  }
3784 
3785  if (!ignore_trailing_space)
3786  {
3787  result = -1;
3788  goto exit;
3789  }
3790 
3791  /* consume any remaining zero-weight values (skip them) from str2 */
3792  do
3793  {
3794  w2 = GET_UCA_WEIGHT (level, ce_index2, uca_w_l13_2, uca_w_l4_2);
3795  if (w2 != 0)
3796  {
3797  /* non-zero weight : strings are not equal */
3798  result = -1;
3799  goto exit;
3800  }
3801  ce_index2++;
3802  num_ce2--;
3803  }
3804  while (num_ce2 > 0);
3805 
3806  goto read_weights2;
3807  }
3808 
3809  if (num_ce2 == 0 && str2 >= str2_end)
3810  {
3811  if (is_match)
3812  {
3813  assert (result == 0);
3814  goto exit;
3815  }
3816 
3817  if (!ignore_trailing_space)
3818  {
3819  result = 1;
3820  goto exit;
3821  }
3822 
3823  /* consume any remaining zero-weight values (skip them) from str1 */
3824  while (num_ce1 > 0)
3825  {
3826  w1 = GET_UCA_WEIGHT (level, ce_index1, uca_w_l13_1, uca_w_l4_1);
3827  if (w1 != 0)
3828  {
3829  /* non-zero weight : strings are not equal */
3830  result = 1;
3831  goto exit;
3832  }
3833  ce_index1++;
3834  num_ce1--;
3835  }
3836 
3837  goto read_weights1;
3838  }
3839 
3840  w1 = GET_UCA_WEIGHT (level, ce_index1, uca_w_l13_1, uca_w_l4_1);
3841  w2 = GET_UCA_WEIGHT (level, ce_index2, uca_w_l13_2, uca_w_l4_2);
3842 
3843  /* ignore zero weights (unless character is space) */
3844  if (w1 == 0 && *str1 != ASCII_SPACE)
3845  {
3846  ce_index1++;
3847  num_ce1--;
3848 
3849  if (w2 == 0 && *str2 != ASCII_SPACE)
3850  {
3851  ce_index2++;
3852  num_ce2--;
3853  }
3854 
3855  goto read_weights1;
3856  }
3857  else if (w2 == 0 && *str2 != ASCII_SPACE)
3858  {
3859  ce_index2++;
3860  num_ce2--;
3861 
3862  goto read_weights1;
3863  }
3864  else if (w1 > w2)
3865  {
3866  result = 1;
3867  goto exit;
3868  }
3869  else if (w1 < w2)
3870  {
3871  result = -1;
3872  goto exit;
3873  }
3874 
3875  ce_index1++;
3876  ce_index2++;
3877 
3878  num_ce1--;
3879  num_ce2--;
3880  }
3881 
3882  if (str2 < str2_end)
3883  {
3884  assert (str1 == str1_end);
3885  if (ignore_trailing_space)
3886  {
3887  if (lang_str_utf8_trail_zero_weights_w_exp (coll_data, level, str2, CAST_BUFLEN (str2_end - str2)) != 0)
3888  {
3889  result = -1;
3890  }
3891  }
3892  else
3893  {
3894  result = -1;
3895  }
3896  }
3897  else if (str1 < str1_end)
3898  {
3899  assert (str2 == str2_end);
3900  if (ignore_trailing_space)
3901  {
3902  if (lang_str_utf8_trail_zero_weights_w_exp (coll_data, level, str1, CAST_BUFLEN (str1_end - str1)) != 0)
3903  {
3904  result = 1;
3905  }
3906  }
3907  else
3908  {
3909  result = 1;
3910  }
3911  }
3912  else
3913  {
3914  assert (str2 == str2_end && str1 == str1_end);
3915 
3916  if (num_ce1 > num_ce2)
3917  {
3918  result = 1;
3919  }
3920  else if (num_ce1 < num_ce2)
3921  {
3922  result = -1;
3923  }
3924  }
3925 
3926 exit:
3927  if (is_match)
3928  {
3929  assert (str1_match_size != NULL);
3930  *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
3931  }
3932 
3933  if (level == 0)
3934  {
3935  *offset_next_level = cmp_offset;
3936  }
3937  return result;
3938 }
3939 
3940 /*
3941  * lang_mht2str_utf8_exp() -
3942  *
3943  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
3944  * coll_data(in) : collation data
3945  * level(in) : current UCA level to compare
3946  * is_match(in) : true if match, otherwise is compare
3947  * str1(in):
3948  * size1(in):
3949  * str2(in): this is the pattern string in case of match
3950  * size2(in):
3951  * escape(in): pointer to escape character (multi-byte allowed)
3952  * (used in context of LIKE)
3953  * has_last_escape(in): true if it should check if last character is the
3954  * escape character
3955  * offset_next_level(in/out) : offset in bytes from which to start the
3956  * compare; used to avoid compare between
3957  * binary identical part in consecutive compare
3958  * levels
3959  * str1_match_size(out): size from str1 which is matched with str2
3960  */
3961 static unsigned int
3962 lang_mht2str_utf8_exp (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
3963 {
3964  const unsigned char *str_end;
3965  unsigned char *str_next;
3966  const COLL_DATA *coll_data = &(lang_coll->coll);
3967  UCA_L13_W *uca_w_l13 = NULL;
3968  UCA_L4_W *uca_w_l4 = NULL;
3969  int num_ce = 0;
3970  int ce_index = 0;
3971  unsigned int w, cp;
3972  const int alpha_cnt = coll_data->w_count;
3973  const int exp_num = coll_data->uca_exp_num;
3974  unsigned int pseudo_key = 0;
3975  unsigned int level;
3976  int str_size;
3977 
3978  str_end = str + size;
3979 
3980  str_next = (unsigned char *) str;
3981 
3982  for (;;)
3983  {
3984  if (num_ce == 0)
3985  {
3986  str = str_next;
3987  if (str >= str_end)
3988  {
3989  break;
3990  }
3991 
3992  str_size = CAST_BUFLEN (str_end - str);
3993  cp = intl_utf8_to_cp (str, str_size, &str_next);
3994 
3995  if (cp < (unsigned int) alpha_cnt)
3996  {
3997  COLL_CONTRACTION *contr = NULL;
3998 
3999  if (coll_data->count_contr > 0 && str_size >= coll_data->contr_min_size
4000  && cp >= coll_data->cp_first_contr_offset
4001  && cp < (coll_data->cp_first_contr_offset + coll_data->cp_first_contr_count)
4002  && ((contr = lang_get_contr_for_string (coll_data, str, str_size, cp)) != NULL))
4003  {
4004  assert (contr != NULL);
4005  uca_w_l13 = contr->uca_w_l13;
4006  if (coll_data->uca_opt.sett_strength >= TAILOR_QUATERNARY)
4007  {
4008  uca_w_l4 = contr->uca_w_l4;
4009  }
4010  num_ce = contr->uca_num;
4011  str_next = (unsigned char *) str + contr->size;
4012  }
4013  else
4014  {
4015  uca_w_l13 = &(coll_data->uca_w_l13[cp * exp_num]);
4016  if (coll_data->uca_opt.sett_strength >= TAILOR_QUATERNARY)
4017  {
4018  uca_w_l4 = &(coll_data->uca_w_l4[cp * exp_num]);
4019  }
4020  num_ce = coll_data->uca_num[cp];
4021  /* leave next pointer to the value returned by 'intl_utf8_to_cp' */
4022  }
4023  }
4024  else
4025  {
4026  uca_w_l13 = &uca_l13_max_weight;
4027  if (coll_data->uca_opt.sett_strength >= TAILOR_QUATERNARY)
4028  {
4029  uca_w_l4 = &uca_l4_max_weight;
4030  }
4031  num_ce = 1;
4032  /* leave next pointer to the value returned by 'intl_utf8_to_cp' */
4033  }
4034 
4035  assert (num_ce > 0);
4036 
4037  ce_index = 0;
4038  }
4039 
4040  if (num_ce == 0 && str >= str_end)
4041  {
4042  break;
4043  }
4044 
4045  for (level = 0; level < (unsigned int) coll_data->uca_opt.sett_strength; level++)
4046  {
4047  w = GET_UCA_WEIGHT (level, ce_index, uca_w_l13, uca_w_l4);
4048  ADD_TO_HASH (pseudo_key, w);
4049  }
4050 
4051  ce_index++;
4052  num_ce--;
4053  }
4054 
4055  return pseudo_key;
4056 }
4057 
4058 
4059 /*
4060  * lang_back_strmatch_utf8_uca_w_level() - string match or compare for UTF8
4061  * collation employing full UCA weights (expansions and contractions)
4062  *
4063  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
4064  * coll_data(in) : collation data
4065  * level(in) : current UCA level to compare
4066  * is_match(in) : true if match, otherwise is compare
4067  * str1(in):
4068  * size1(in):
4069  * str2(in): this is the pattern string in case of match
4070  * size2(in):
4071  * escape(in): pointer to escape character (multi-byte allowed)
4072  * (used in context of LIKE)
4073  * has_last_escape(in): true if it should check if last character is the
4074  * escape character
4075  * offset_next_level(in/out) : offset in bytes from which to start the
4076  * compare; used to avoid compare between
4077  * binary identical part in consecutive compare
4078  * levels
4079  * str1_match_size(out): size from str1 which is matched with str2
4080  */
4081 static int
4082 lang_back_strmatch_utf8_uca_w_level (const COLL_DATA * coll_data, bool is_match, const unsigned char *str1,
4083  const int size1, const unsigned char *str2, const int size2,
4084  const unsigned char *escape, const bool has_last_escape, int *offset_next_level,
4085  int *str1_match_size, bool ignore_trailing_space)
4086 {
4087  const unsigned char *str1_start;
4088  const unsigned char *str2_start;
4089  const unsigned char *str1_last;
4090  const unsigned char *str2_last;
4091  unsigned char *str1_prev, *str2_prev;
4092  UCA_L13_W *uca_w_l13_1 = NULL;
4093  UCA_L13_W *uca_w_l13_2 = NULL;
4094  int num_ce1 = 0, num_ce2 = 0;
4095  int ce_index1 = -1, ce_index2 = -1;
4096  unsigned int w1 = 0, w2 = 0;
4097  unsigned int str1_cp_contr = 0, str2_cp_contr = 0;
4098  int result = 0;
4099 
4100  assert (offset_next_level != NULL && *offset_next_level > -1);
4101 
4102  str1_last = str1 + size1 - 1;
4103  str2_last = str2 + size2 - 1;
4104  str1_start = str1;
4105  str2_start = str2;
4106 
4107  while (*str1_last == ASCII_SPACE)
4108  {
4109  str1_last--;
4110  }
4111 
4112  while (*str2_last == ASCII_SPACE)
4113  {
4114  str2_last--;
4115  }
4116 
4117  str1_prev = (unsigned char *) str1_last;
4118  str2_prev = (unsigned char *) str2_last;
4119 
4120  for (;;)
4121  {
4122  read_weights1:
4123  if (ce_index1 < 0)
4124  {
4125  str1 = str1_prev;
4126  if (str1 < str1_start)
4127  {
4128  goto read_weights2;
4129  }
4130 
4131  lang_get_uca_back_weight_l13 (coll_data, true, str1_start, str1, &uca_w_l13_1, &num_ce1, &str1_prev,
4132  &str1_cp_contr);
4133 
4134  assert (num_ce1 > 0);
4135 
4136  ce_index1 = num_ce1 - 1;
4137  }
4138 
4139  read_weights2:
4140  if (ce_index2 < 0)
4141  {
4142  int c_size;
4143 
4144  str2 = str2_prev;
4145  if (str2 < str2_start)
4146  {
4147  goto compare;
4148  }
4149 
4150  if (is_match && escape != NULL && !(has_last_escape && str2 == str2_last))
4151  {
4152  unsigned char *str2_prev_prev;
4153 
4154  (void) intl_back_utf8_to_cp (str2, str2_start, &str2_prev_prev);
4155 
4156  if (intl_cmp_char (str2_prev_prev + 1, escape, INTL_CODESET_UTF8, &c_size) == 0)
4157  {
4158  str2 = str2_prev_prev;
4159  }
4160  }
4161 
4162  lang_get_uca_back_weight_l13 (coll_data, true, str2_start, str2, &uca_w_l13_2, &num_ce2, &str2_prev,
4163  &str2_cp_contr);
4164 
4165  assert (num_ce2 > 0);
4166 
4167  ce_index2 = num_ce2 - 1;
4168 
4169  if (is_match && coll_data->uca_opt.sett_match_contr == MATCH_CONTR_BOUND_ALLOW
4170  && !INTL_CONTR_FOUND (str2_cp_contr) && INTL_CONTR_FOUND (str1_cp_contr) && ce_index1 == num_ce1 - 1
4171  && str2_cp_contr == (str1_cp_contr & (~INTL_MASK_CONTR)))
4172  {
4173  /* re-compute weight of str1 without considering contractions */
4174  lang_get_uca_back_weight_l13 (coll_data, false, str1_start, str1, &uca_w_l13_1, &num_ce1, &str1_prev,
4175  &str1_cp_contr);
4176 
4177  assert (num_ce1 > 0);
4178  ce_index1 = num_ce1 - 1;
4179  }
4180  }
4181 
4182  compare:
4183  if (ce_index1 < 0 && str1 < str1_start)
4184  {
4185  /* str1 was consumed */
4186  if (ce_index2 < 0)
4187  {
4188  if (str2 < str2_start)
4189  {
4190  /* both strings consumed and equal */
4191  assert (result == 0);
4192  goto exit;
4193  }
4194  else
4195  {
4196  if (is_match || !ignore_trailing_space)
4197  {
4198  result = -1;
4199  goto exit;
4200  }
4201  goto read_weights2;
4202  }
4203  }
4204 
4205  assert (ce_index2 >= 0);
4206  if (is_match || !ignore_trailing_space)
4207  {
4208  /* trailing spaces are not matched */
4209  result = -1;
4210  goto exit;
4211  }
4212 
4213  /* consume any remaining zero-weight values (skip them) from str2 */
4214  do
4215  {
4216  w2 = UCA_GET_L2_W (uca_w_l13_2[ce_index2]);
4217  if (w2 != 0)
4218  {
4219  /* non-zero weight : strings are not equal */
4220  result = -1;
4221  goto exit;
4222  }
4223  ce_index2--;
4224  }
4225  while (ce_index2 > 0);
4226 
4227  goto read_weights2;
4228  }
4229 
4230  if (ce_index2 < 0 && str2 < str2_start)
4231  {
4232  if (is_match)
4233  {
4234  assert (result == 0);
4235  goto exit;
4236  }
4237 
4238  if (!ignore_trailing_space)
4239  {
4240  result = 1;
4241  goto exit;
4242  }
4243  /* consume any remaining zero-weight values (skip them) from str1 */
4244  while (ce_index1 >= 0)
4245  {
4246  w1 = UCA_GET_L2_W (uca_w_l13_1[ce_index1]);
4247  if (w1 != 0)
4248  {
4249  /* non-zero weight : strings are not equal */
4250  result = 1;
4251  goto exit;
4252  }
4253  ce_index1--;
4254  }
4255 
4256  goto read_weights1;
4257  }
4258 
4259  assert (ce_index1 >= 0 && ce_index2 >= 0);
4260 
4261  w1 = UCA_GET_L2_W (uca_w_l13_1[ce_index1]);
4262  w2 = UCA_GET_L2_W (uca_w_l13_2[ce_index2]);
4263 
4264  /* ignore zero weights (unless character is space) */
4265  if (w1 == 0 && *str1 != ASCII_SPACE)
4266  {
4267  ce_index1--;
4268 
4269  if (w2 == 0 && *str2 != ASCII_SPACE)
4270  {
4271  ce_index2--;
4272  }
4273 
4274  goto read_weights1;
4275  }
4276  else if (w2 == 0 && *str2 != ASCII_SPACE)
4277  {
4278  ce_index2--;
4279  goto read_weights1;
4280  }
4281  else if (w1 > w2)
4282  {
4283  result = 1;
4284  goto exit;
4285  }
4286  else if (w1 < w2)
4287  {
4288  result = -1;
4289  goto exit;
4290  }
4291 
4292  ce_index1--;
4293  ce_index2--;
4294  }
4295 
4296  if (str1 > str1_start)
4297  {
4298  assert (str2 <= str2_start);
4299  result = 1;
4300  }
4301  else if (str2 > str2_start)
4302  {
4303  assert (str1 <= str1_start);
4304  result = -1;
4305  }
4306  else
4307  {
4308  if (ce_index1 > ce_index2)
4309  {
4310  result = 1;
4311  goto exit;
4312  }
4313  else if (ce_index1 < ce_index2)
4314  {
4315  result = -1;
4316  goto exit;
4317  }
4318  }
4319 
4320 exit:
4321  if (is_match)
4322  {
4323  assert (str1_match_size != NULL);
4324  *str1_match_size = CAST_BUFLEN (str1_last - str1_start) + 1;
4325  }
4326 
4327  return result;
4328 }
4329 
4330 /*
4331  * lang_strcmp_utf8_uca() - string compare for UTF8 for a collation using
4332  * full UCA weights (expansions and contractions)
4333  * return:
4334  * lang_coll(in):
4335  * string1(in):
4336  * size1(in):
4337  * string2(in):
4338  * size2(in):
4339  */
4340 static int
4341 lang_strcmp_utf8_uca (const LANG_COLLATION * lang_coll, const unsigned char *str1, const int size1,
4342  const unsigned char *str2, const int size2, bool ignore_trailing_space)
4343 {
4344  return lang_strmatch_utf8_uca_w_coll_data (&(lang_coll->coll), false, str1, size1, str2, size2, NULL, false, NULL,
4345  ignore_trailing_space);
4346 }
4347 
4348 /*
4349  * lang_strmatch_utf8_uca() - string match for UTF8 for a collation using
4350  * full UCA weights (expansions and contractions)
4351  * return:
4352  * lang_coll(in):
4353  * is_match(in):
4354  * string1(in):
4355  * size1(in):
4356  * string2(in):
4357  * size2(in):
4358  * escape(in):
4359  * has_last_escape(in):
4360  * str1_match_size(out):
4361  */
4362 static int
4363 lang_strmatch_utf8_uca (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, const int size1,
4364  const unsigned char *str2, const int size2, const unsigned char *escape,
4365  const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
4366 {
4367  return lang_strmatch_utf8_uca_w_coll_data (&(lang_coll->coll), is_match, str1, size1, str2, size2, escape,
4368  has_last_escape, str1_match_size, ignore_trailing_space);
4369 }
4370 
4371 /*
4372  * lang_strmatch_utf8_uca_w_coll_data() - string match/compare for UTF8 for a
4373  * collation using full UCA weights (+ expansions and contractions)
4374  *
4375  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
4376  * coll_data(in):
4377  * is_match(in) : true if match, otherwise is compare
4378  * str1(in):
4379  * size1(in):
4380  * str2(in): this is the pattern string in case of match
4381  * size2(in):
4382  * escape(in): pointer to escape character (multi-byte allowed)
4383  * (used in context of LIKE)
4384  * has_last_escape(in): true if it should check if last character is the
4385  * escape character
4386  * str1_match_size(out): size from str1 which is matched with str2
4387  */
4388 int
4389 lang_strmatch_utf8_uca_w_coll_data (const COLL_DATA * coll_data, bool is_match, const unsigned char *str1,
4390  const int size1, const unsigned char *str2, const int size2,
4391  const unsigned char *escape, const bool has_last_escape, int *str1_match_size,
4392  bool ignore_trailing_space)
4393 {
4394  int res;
4395  int cmp_offset = 0;
4396 
4397  /* compare level 1 */
4398  res =
4399  lang_strmatch_utf8_uca_w_level (coll_data, 0, is_match, str1, size1, str2, size2, escape, has_last_escape,
4400  &cmp_offset, str1_match_size, ignore_trailing_space);
4401  if (res != 0)
4402  {
4403  return res;
4404  }
4405 
4406  if (coll_data->uca_opt.sett_strength == TAILOR_PRIMARY)
4407  {
4408  if (coll_data->uca_opt.sett_caseLevel)
4409  {
4410  /* compare level 3 (casing) */
4411  res =
4412  lang_strmatch_utf8_uca_w_level (coll_data, 2, is_match, str1, size1, str2, size2, escape, has_last_escape,
4413  &cmp_offset, str1_match_size, ignore_trailing_space);
4414  if (res != 0)
4415  {
4416  /* reverse order when caseFirst == UPPER */
4417  return (coll_data->uca_opt.sett_caseFirst == 1) ? -res : res;
4418  }
4419  }
4420  return 0;
4421  }
4422 
4423  assert (coll_data->uca_opt.sett_strength >= TAILOR_SECONDARY);
4424 
4425  /* compare level 2 */
4426  if (coll_data->uca_opt.sett_backwards)
4427  {
4428  int str1_level_2_size;
4429 
4430  if (is_match)
4431  {
4432  str1_level_2_size = *str1_match_size;
4433  }
4434  else
4435  {
4436  str1_level_2_size = size1;
4437  }
4438  if (str1_level_2_size > 0 && size2 > 0)
4439  {
4440  res =
4441  lang_back_strmatch_utf8_uca_w_level (coll_data, is_match, str1, str1_level_2_size, str2, size2, escape,
4442  has_last_escape, &cmp_offset, str1_match_size, ignore_trailing_space);
4443  }
4444  else
4445  {
4446  res = (str1_level_2_size == size2) ? 0 : ((str1_level_2_size > size2) ? 1 : -1);
4447  }
4448  }
4449  else
4450  {
4451  res =
4452  lang_strmatch_utf8_uca_w_level (coll_data, 1, is_match, str1, size1, str2, size2, escape, has_last_escape,
4453  &cmp_offset, str1_match_size, ignore_trailing_space);
4454  }
4455 
4456  if (res != 0)
4457  {
4458  return res;
4459  }
4460 
4461  if (coll_data->uca_opt.sett_strength == TAILOR_SECONDARY)
4462  {
4463  return 0;
4464  }
4465 
4466  /* compare level 3 */
4467  res =
4468  lang_strmatch_utf8_uca_w_level (coll_data, 2, is_match, str1, size1, str2, size2, escape, has_last_escape,
4469  &cmp_offset, str1_match_size, ignore_trailing_space);
4470  if (res != 0)
4471  {
4472  /* reverse order when caseFirst == UPPER */
4473  return (coll_data->uca_opt.sett_caseFirst == 1) ? -res : res;
4474  }
4475 
4476  if (coll_data->uca_opt.sett_strength == TAILOR_TERTIARY)
4477  {
4478  return 0;
4479  }
4480 
4481  /* compare level 4 */
4482  res =
4483  lang_strmatch_utf8_uca_w_level (coll_data, 3, is_match, str1, size1, str2, size2, escape, has_last_escape,
4484  &cmp_offset, str1_match_size, ignore_trailing_space);
4485  if (res != 0)
4486  {
4487  /* reverse order when caseFirst == UPPER */
4488  return res;
4489  }
4490 
4491  return 0;
4492 }
4493 
4494 /*
4495  * lang_str_utf8_trail_zero_weights() - cheks if remaining characters of an
4496  * UTF-8 string have all zero weights
4497  *
4498  * return: 0 if all remaining characters have zero weight, 1 otherwise
4499  * lang_coll(in): collation data
4500  * str(in):
4501  * size(in):
4502  */
4503 static int
4504 lang_str_utf8_trail_zero_weights (const LANG_COLLATION * lang_coll, const unsigned char *str, int size)
4505 {
4506  unsigned char *str_next;
4507  unsigned int cp;
4508  unsigned int *weight = (lang_coll->built_in) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;
4509 
4510  while (size > 0)
4511  {
4512  cp = intl_utf8_to_cp (str, size, &str_next);
4513 
4514  if (cp >= (unsigned int) lang_coll->coll.w_count || weight[cp] != 0)
4515  {
4516  return 1;
4517  }
4518  size -= CAST_BUFLEN (str_next - str);
4519  str = str_next;
4520  }
4521 
4522  return 0;
4523 }
4524 
4525 /*
4526  * lang_str_utf8_trail_zero_weights_w_exp() - cheks if remaining characters of
4527  * an UTF-8 string have all zero weights
4528  * collation with expansions.
4529  *
4530  * return: 0 if all remaining characters have zero weight, 1 otherwise
4531  * coll_data(in): collation data
4532  * level(in):current level of matching
4533  * str(in):
4534  * size(in):
4535  */
4536 static int
4537 lang_str_utf8_trail_zero_weights_w_exp (const COLL_DATA * coll_data, const int level, const unsigned char *str,
4538  int size)
4539 {
4540  UCA_L13_W *uca_w_l13 = NULL;
4541  UCA_L4_W *uca_w_l4 = NULL;
4542  unsigned char *str_next;
4543  int num_ce = 0;
4544  int ce_index = 0;
4545  unsigned int dummy;
4546 
4547  str_next = (unsigned char *) str;
4548  while (size > 0)
4549  {
4550  if (num_ce == 0)
4551  {
4552  str = str_next;
4553 
4554  if (level == 3)
4555  {
4556  lang_get_uca_w_l4 (coll_data, true, str, size, &uca_w_l4, &num_ce, &str_next, &dummy);
4557  }
4558  else
4559  {
4560  lang_get_uca_w_l13 (coll_data, true, str, size, &uca_w_l13, &num_ce, &str_next, &dummy);
4561  }
4562  assert (num_ce > 0);
4563 
4564  ce_index = 0;
4565  size -= CAST_BUFLEN (str_next - str);
4566  str = str_next;
4567  }
4568 
4569  if (GET_UCA_WEIGHT (level, ce_index, uca_w_l13, uca_w_l4) != 0)
4570  {
4571  return 1;
4572  }
4573 
4574  ce_index++;
4575  num_ce--;
4576  }
4577 
4578  return 0;
4579 }
4580 
4581 /*
4582  * lang_next_coll_char_utf8() - computes the next collatable char
4583  * return: size in bytes of the next collatable char
4584  * lang_coll(on): collation
4585  * seq(in): pointer to current char
4586  * size(in): available bytes for current char
4587  * next_seq(in/out): buffer to return next alphabetical char
4588  * len_next(in/out): length in chars of next char (always 1 for this func)
4589  *
4590  * Note : It is assumed that the input buffer (cur_char) contains at least
4591  * one UTF-8 character.
4592  * The calling function should take into account cases when 'next'
4593  * character is encoded on greater byte size.
4594  */
4595 static int
4596 lang_next_coll_char_utf8 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
4597  unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
4598 {
4599  unsigned int cp_alpha_char, cp_next_alpha_char;
4600  const int alpha_cnt = lang_coll->coll.w_count;
4601  const unsigned int *next_alpha_char = (ignore_trailing_space) ? lang_coll->coll.next_cp_ti : lang_coll->coll.next_cp;
4602 
4603  unsigned char *dummy = NULL;
4604 
4605  assert (seq != NULL);
4606  assert (next_seq != NULL);
4607  assert (len_next != NULL);
4608  assert (size > 0);
4609 
4610  cp_alpha_char = intl_utf8_to_cp (seq, size, &dummy);
4611 
4612  if (cp_alpha_char < (unsigned int) alpha_cnt)
4613  {
4614  cp_next_alpha_char = next_alpha_char[cp_alpha_char];
4615  }
4616  else
4617  {
4618  cp_next_alpha_char = cp_alpha_char + 1;
4619  }
4620 
4621  *len_next = 1;
4622 
4623  return intl_cp_to_utf8 (cp_next_alpha_char, next_seq);
4624 }
4625 
4626 /*
4627  * lang_next_coll_seq_utf8_w_contr() - computes the next collatable sequence
4628  * for locales having contractions
4629  * return: size in bytes of the next collatable sequence
4630  * lang_coll(on): collation
4631  * seq(in): pointer to current sequence
4632  * size(in): available bytes for current sequence
4633  * next_seq(in/out): buffer to return next collatable sequence
4634  * len_next(in/out): length in chars of next sequence
4635  *
4636  * Note : It is assumed that the input buffer (cur_char) contains at least
4637  * one UTF-8 character.
4638  */
4639 static int
4640 lang_next_coll_seq_utf8_w_contr (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
4641  unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
4642 {
4643  unsigned int cp_first_char;
4644  unsigned int next_seq_id;
4645  unsigned int cp_next_char;
4646  const int alpha_cnt = lang_coll->coll.w_count;
4647  const unsigned int *next_alpha_char = (ignore_trailing_space) ? lang_coll->coll.next_cp_ti : lang_coll->coll.next_cp;
4648 
4649  unsigned char *dummy = NULL;
4650  COLL_CONTRACTION *contr = NULL;
4651 
4652  assert (seq != NULL);
4653  assert (next_seq != NULL);
4654  assert (len_next != NULL);
4655  assert (size > 0);
4656 
4657  cp_first_char = intl_utf8_to_cp (seq, size, &dummy);
4658 
4659  if (cp_first_char < (unsigned int) alpha_cnt)
4660  {
4661  if (size >= lang_coll->coll.contr_min_size && cp_first_char >= lang_coll->coll.cp_first_contr_offset
4662  && cp_first_char < (lang_coll->coll.cp_first_contr_offset + lang_coll->coll.cp_first_contr_count))
4663  {
4664  contr = lang_get_contr_for_string (&(lang_coll->coll), seq, size, cp_first_char);
4665  }
4666 
4667  if (contr == NULL)
4668  {
4669  next_seq_id = next_alpha_char[cp_first_char];
4670  }
4671  else
4672  {
4673  next_seq_id = contr->next;
4674  }
4675 
4676  if (INTL_IS_NEXT_CONTR (next_seq_id))
4677  {
4678  contr = &(lang_coll->coll.contr_list[INTL_GET_NEXT_CONTR_ID (next_seq_id)]);
4679  memcpy (next_seq, contr->c_buf, contr->size);
4680  *len_next = contr->cp_count;
4681  return contr->size;
4682  }
4683  else
4684  {
4685  cp_next_char = next_seq_id;
4686  }
4687  }
4688  else
4689  {
4690  /* codepoint is not collated in current locale */
4691  cp_next_char = cp_first_char + 1;
4692  }
4693 
4694  *len_next = 1;
4695  return intl_cp_to_utf8 (cp_next_char, next_seq);
4696 }
4697 
4698 /*
4699  * lang_split_key_iso() - finds the prefix key between two strings (ISO
4700  * charset with cases sensitive collation)
4701  *
4702  * return: error status
4703  * lang_coll(in):
4704  * is_desc(in):
4705  * str1(in):
4706  * size1(in):
4707  * str2(in):
4708  * size2(in):
4709  * key(out): key
4710  * byte_size(out): size in bytes of key
4711  *
4712  * Note : this function is used by index prefix computation
4713  */
4714 static int
4715 lang_split_key_iso (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
4716  const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
4717  bool ignore_trailing_space)
4718 {
4719  const unsigned char *str1_end, *str2_end;
4720  const unsigned char *str1_begin, *str2_begin;
4721  int key_size;
4722  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;
4723 
4724  assert (key != NULL);
4725  assert (byte_size != NULL);
4726 
4727  str1_end = str1 + size1;
4728  str2_end = str2 + size2;
4729  str1_begin = str1;
4730  str2_begin = str2;
4731 
4732  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
4733  {
4734  if (*str1 != *str2)
4735  {
4736  assert ((!is_desc && *str1 < *str2) || (is_desc && *str1 > *str2));
4737  break;
4738  }
4739  }
4740 
4741  if (!is_desc)
4742  { /* normal index */
4743  *key = (unsigned char *) str2_begin;
4744 
4745  /* common part plus a character with non-zero weight */
4746  while (str2 < str2_end)
4747  {
4748  if (weight[*str2++] != ZERO)
4749  {
4750  break;
4751  }
4752  }
4753  assert (str2 <= str2_end);
4754  key_size = CAST_BUFLEN (str2 - str2_begin);
4755  }
4756  else
4757  { /* reverse index */
4758  assert (is_desc);
4759 
4760  /* common part plus a character with non-zero weight from str1 */
4761  while (str1 < str1_end)
4762  {
4763  if (weight[*str1++] != ZERO)
4764  {
4765  break;
4766  }
4767  }
4768 
4769  if (str1 >= str1_end)
4770  {
4771  /* str1 exhaused or at last char, we use str2 as key */
4772  *key = (unsigned char *) str2_begin;
4773  key_size = CAST_BUFLEN (str2_end - str2_begin);
4774  }
4775  else
4776  {
4777  assert (str1 < str1_end);
4778  *key = (unsigned char *) str1_begin;
4779  key_size = CAST_BUFLEN (str1 - str1_begin);
4780  }
4781  }
4782 
4783  *byte_size = key_size;
4784 
4785  return NO_ERROR;
4786 }
4787 
4788 /*
4789  * lang_split_key_byte() - finds the prefix key :
4790  * collations with byte-characters (ISO charset) and
4791  * weight values (e.g. case insensitive).
4792  *
4793  * return: error status
4794  * lang_coll(in):
4795  * is_desc(in):
4796  * str1(in):
4797  * size1(in):
4798  * str2(in):
4799  * size2(in):
4800  * key(out): key
4801  * byte_size(out): size in bytes of key
4802  *
4803  * Note : this function is used by index prefix computation
4804  */
4805 static int
4806 lang_split_key_byte (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
4807  const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
4808  bool ignore_trailing_space)
4809 {
4810  const unsigned char *str1_end, *str2_end;
4811  const unsigned char *str1_begin, *str2_begin;
4812  unsigned int w1, w2;
4813  int key_size;
4814  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;
4815 
4816  assert (key != NULL);
4817  assert (byte_size != NULL);
4818 
4819  str1_end = str1 + size1;
4820  str2_end = str2 + size2;
4821  str1_begin = str1;
4822  str2_begin = str2;
4823 
4824  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
4825  {
4826  w1 = weight[*str1];
4827  w2 = weight[*str2];
4828 
4829  if (w1 != w2)
4830  {
4831  assert ((!is_desc && w1 < w2) || (is_desc && w1 > w2));
4832  break;
4833  }
4834  }
4835 
4836  if (!is_desc)
4837  { /* normal index */
4838  *key = (unsigned char *) str2_begin;
4839 
4840  /* common part plus a character with non-zero weight */
4841  while (str2 < str2_end)
4842  {
4843  if (weight[*str2++] != 0)
4844  {
4845  break;
4846  }
4847  }
4848  key_size = CAST_BUFLEN (str2 - str2_begin);
4849  }
4850  else
4851  { /* reverse index */
4852  assert (is_desc);
4853 
4854  /* common part plus a character with non-zero weight from str1 */
4855  while (str1 < str1_end)
4856  {
4857  if (weight[*str1++] != 0)
4858  {
4859  break;
4860  }
4861  }
4862 
4863  if (str1 >= str1_end)
4864  {
4865  /* str1 exhaused or at last char, we use str2 as key */
4866  *key = (unsigned char *) str2_begin;
4867  key_size = CAST_BUFLEN (str2_end - str2_begin);
4868  }
4869  else
4870  {
4871  assert (str1 < str1_end);
4872  *key = (unsigned char *) str1_begin;
4873  key_size = CAST_BUFLEN (str1 - str1_begin);
4874  }
4875  }
4876 
4877  *byte_size = key_size;
4878 
4879  return NO_ERROR;
4880 }
4881 
4882 /*
4883  * lang_split_key_utf8() - finds the prefix key; UTF-8 collation with
4884  * contractions but without expansions
4885  *
4886  * return: error status
4887  * lang_coll(in):
4888  * is_desc(in):
4889  * str1(in):
4890  * size1(in):
4891  * str2(in):
4892  * size2(in):
4893  * key(out): key
4894  * byte_size(out): size in bytes of key
4895  *
4896  * Note : this function is used by index prefix computation
4897  */
4898 static int
4899 lang_split_key_utf8 (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
4900  const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
4901  bool ignore_trailing_space)
4902 {
4903  const unsigned char *str1_end, *str2_end;
4904  const unsigned char *str1_begin, *str2_begin;
4905  unsigned char *str1_next, *str2_next;
4906  unsigned int w1, w2;
4907  int key_size;
4908  const COLL_DATA *coll = &(lang_coll->coll);
4909 
4910  assert (key != NULL);
4911  assert (byte_size != NULL);
4912 
4913  str1_end = str1 + size1;
4914  str2_end = str2 + size2;
4915  str1_begin = str1;
4916  str2_begin = str2;
4917 
4918  for (; str1 < str1_end && str2 < str2_end;)
4919  {
4920  w1 = lang_get_w_first_el (coll, str1, CAST_BUFLEN (str1_end - str1), &str1_next, ignore_trailing_space);
4921  w2 = lang_get_w_first_el (coll, str2, CAST_BUFLEN (str2_end - str2), &str2_next, ignore_trailing_space);
4922 
4923  if (w1 != w2)
4924  {
4925  assert ((!is_desc && w1 < w2) || (is_desc && w1 > w2));
4926  break;
4927  }
4928 
4929  str1 = str1_next;
4930  str2 = str2_next;
4931  }
4932 
4933  if (!is_desc)
4934  { /* normal index */
4935  *key = (unsigned char *) str2_begin;
4936 
4937  /* common part plus a character with non-zero weight from str2 */
4938  while (str2 < str2_end)
4939  {
4940  w2 = lang_get_w_first_el (coll, str2, CAST_BUFLEN (str2_end - str2), &str2_next, ignore_trailing_space);
4941  str2 = str2_next;
4942  if (w2 != 0)
4943  {
4944  break;
4945  }
4946  }
4947 
4948  assert (str2 <= str2_end);
4949  key_size = CAST_BUFLEN (str2 - str2_begin);
4950  }
4951  else
4952  { /* reverse index */
4953  assert (is_desc);
4954  /* common part plus a character with non-zero weight from str1 */
4955  while (str1 < str1_end)
4956  {
4957  w1 = lang_get_w_first_el (coll, str1, CAST_BUFLEN (str1_end - str1), &str1_next, ignore_trailing_space);
4958  str1 = str1_next;
4959  if (w1 != 0)
4960  {
4961  break;
4962  }
4963  }
4964 
4965  if (str1 >= str1_end)
4966  {
4967  /* str1 exhaused or at last char, we use str2 as key */
4968  *key = (unsigned char *) str2_begin;
4969  key_size = CAST_BUFLEN (str2_end - str2_begin);
4970  }
4971  else
4972  {
4973  assert (str1 < str1_end);
4974  *key = (unsigned char *) str1_begin;
4975  key_size = CAST_BUFLEN (str1 - str1_begin);
4976  }
4977  }
4978 
4979  *byte_size = key_size;
4980 
4981  return NO_ERROR;
4982 }
4983 
4984 /*
4985  * lang_split_key_w_exp() - finds the prefix key for UTF-8 strings and
4986  * collation with expansions
4987  *
4988  * return: error status
4989  * lang_coll(in):
4990  * is_desc(in):
4991  * str1(in):
4992  * size1(in):
4993  * str2(in):
4994  * size2(in):
4995  * key(out): key
4996  * byte_size(out): size in bytes in key
4997  *
4998  * Note : this function is used by index prefix computation
4999  */
5000 static int
5001 lang_split_key_w_exp (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
5002  const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
5003  bool ignore_trailing_space)
5004 {
5005  const unsigned char *str1_end;
5006  const unsigned char *str2_end;
5007  unsigned char *str1_next, *str2_next;
5008  unsigned char *str1_begin, *str2_begin;
5009  UCA_L13_W *uca_w_l13_1 = NULL;
5010  UCA_L13_W *uca_w_l13_2 = NULL;
5011  int num_ce1 = 0, num_ce2 = 0;
5012  int ce_index1 = 0, ce_index2 = 0;
5013  unsigned int w1 = 0, w2 = 0;
5014  const COLL_DATA *cd = &(lang_coll->coll);
5015  unsigned int dummy;
5016  int key_size;
5017  bool force_key = false;
5018 
5019  assert (key != NULL);
5020  assert (byte_size != NULL);
5021 
5022  str1_begin = str1_next = (unsigned char *) str1;
5023  str2_begin = str2_next = (unsigned char *) str2;
5024 
5025  str1_end = str1 + size1;
5026  str2_end = str2 + size2;
5027 
5028  /* Regular string compare in collation with expansions requires multiple passes up to the UCA level of collation or
5029  * until a weight difference Key prefix algorithm takes into account only level 1 of weight */
5030  for (;;)
5031  {
5032  read_weights1:
5033  if (num_ce1 == 0)
5034  {
5035  str1 = str1_next;
5036  if (str1 >= str1_end)
5037  {
5038  goto read_weights2;
5039  }
5040 
5041  lang_get_uca_w_l13 (cd, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1, &str1_next,
5042  &dummy);
5043  assert (num_ce1 > 0);
5044 
5045  ce_index1 = 0;
5046  }
5047 
5048  read_weights2:
5049  if (num_ce2 == 0)
5050  {
5051  str2 = str2_next;
5052  if (str2 >= str2_end)
5053  {
5054  goto compare;
5055  }
5056 
5057  lang_get_uca_w_l13 (cd, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l13_2, &num_ce2, &str2_next,
5058  &dummy);
5059 
5060  assert (num_ce2 > 0);
5061 
5062  ce_index2 = 0;
5063  }
5064 
5065  compare:
5066  if ((num_ce1 == 0 && str1 >= str1_end) || (num_ce2 == 0 && str2 >= str2_end))
5067  {
5068  force_key = true;
5069  break;
5070  }
5071 
5072  w1 = UCA_GET_L1_W (uca_w_l13_1[ce_index1]);
5073  w2 = UCA_GET_L1_W (uca_w_l13_2[ce_index2]);
5074 
5075  /* ignore zero weights (unless character is space) */
5076  if (w1 == 0 && *str1 != ASCII_SPACE)
5077  {
5078  ce_index1++;
5079  num_ce1--;
5080 
5081  if (w2 == 0 && *str2 != ASCII_SPACE)
5082  {
5083  ce_index2++;
5084  num_ce2--;
5085  }
5086 
5087  goto read_weights1;
5088  }
5089  else if (w2 == 0 && *str2 != ASCII_SPACE)
5090  {
5091  ce_index2++;
5092  num_ce2--;
5093 
5094  goto read_weights1;
5095  }
5096  else if (w1 != w2)
5097  {
5098  assert ((is_desc && w1 > w2) || (!is_desc && w1 < w2));
5099  break;
5100  }
5101 
5102  assert (w1 == w2);
5103 
5104  ce_index1++;
5105  ce_index2++;
5106 
5107  num_ce1--;
5108  num_ce2--;
5109  }
5110 
5111  if (force_key)
5112  {
5113  *key = str2_begin;
5114  *byte_size = size2;
5115  return NO_ERROR;
5116  }
5117 
5118  if (!is_desc)
5119  { /* normal index */
5120  *key = (unsigned char *) str2_begin;
5121 
5122  /* common part plus a character with non-zero weight */
5123  while (str2 < str2_end)
5124  {
5125  lang_get_uca_w_l13 (cd, true, str2, CAST_BUFLEN (str2_end - str2), &uca_w_l13_2, &num_ce2, &str2_next,
5126  &dummy);
5127  str2 = str2_next;
5128 
5129  if (UCA_GET_L1_W (uca_w_l13_2[0]) != 0)
5130  {
5131  break;
5132  }
5133  }
5134 
5135  assert (str2 <= str2_end);
5136  key_size = CAST_BUFLEN (str2 - str2_begin);
5137  }
5138  else
5139  { /* reverse index */
5140  assert (is_desc);
5141  /* common part plus a character with non-zero weight from str1 */
5142  while (str1 < str1_end)
5143  {
5144  lang_get_uca_w_l13 (cd, true, str1, CAST_BUFLEN (str1_end - str1), &uca_w_l13_1, &num_ce1, &str1_next,
5145  &dummy);
5146  str1 = str1_next;
5147 
5148  if (UCA_GET_L1_W (uca_w_l13_1[0]) != 0)
5149  {
5150  break;
5151  }
5152  }
5153 
5154  if (str1 >= str1_end)
5155  {
5156  /* str1 exhaused or at last char, we use str2 as key */
5157  *key = (unsigned char *) str2_begin;
5158  key_size = CAST_BUFLEN (str2_end - str2_begin);
5159  }
5160  else
5161  {
5162  assert (str1 < str1_end);
5163  *key = (unsigned char *) str1_begin;
5164  key_size = CAST_BUFLEN (str1 - str1_begin);
5165  }
5166  }
5167 
5168  *byte_size = key_size;
5169 
5170  return NO_ERROR;
5171 }
5172 
5173 /*
5174  * lang_split_key_euckr() - finds the prefix key for EUC-KR collation
5175  *
5176  * return: error status
5177  * lang_coll(in):
5178  * is_desc(in):
5179  * str1(in):
5180  * size1(in):
5181  * str2(in):
5182  * size2(in):
5183  * key(out): key
5184  * byte_size(out): size in bytes in key
5185  *
5186  * Note : this function is used by index prefix computation
5187  */
5188 static int
5189 lang_split_key_euckr (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
5190  const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
5191  bool ignore_trailing_space)
5192 {
5193  const unsigned char *str1_next, *str2_next;
5194  int key_size, char1_size, char2_size;
5195  const unsigned char *str1_end, *str2_end;
5196  const unsigned char *str1_begin, *str2_begin;
5197  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;
5198 
5199  assert (key != NULL);
5200  assert (byte_size != NULL);
5201 
5202  str1_end = str1 + size1;
5203  str2_end = str2 + size2;
5204  str1_begin = str1;
5205  str2_begin = str2;
5206 
5207  for (; str1 < str1_end && str2 < str2_end;)
5208  {
5209  str1_next = intl_nextchar_euc (str1, &char1_size);
5210  str2_next = intl_nextchar_euc (str2, &char2_size);
5211 
5212  if (char1_size != char2_size || memcmp (str1, str2, char1_size) != 0)
5213  {
5214  break;
5215  }
5216 
5217  str1 = str1_next;
5218  str2 = str2_next;
5219  }
5220 
5221  if (!is_desc)
5222  { /* normal index */
5223  *key = (unsigned char *) str2_begin;
5224 
5225  /* common part plus a character with non-zero weight */
5226  while (str2 < str2_end)
5227  {
5228  bool is_zero_weight = false;
5229  str2_next = intl_nextchar_euc (str2, &char2_size);
5230  if (*str2 == ASCII_SPACE || *str2 == 0 || (*str2 == EUC_SPACE && char2_size == 2 && *(str2 + 1) == EUC_SPACE))
5231  {
5232  is_zero_weight = (weight[SPACE] == 0);
5233  }
5234  str2 = str2_next;
5235  if (!is_zero_weight)
5236  {
5237  break;
5238  }
5239  }
5240 
5241  assert (str2 <= str2_end);
5242  key_size = CAST_BUFLEN (str2 - str2_begin);
5243  }
5244  else
5245  { /* reverse index */
5246  assert (is_desc);
5247 
5248  /* common part plus a character with non-zero weight from str1 */
5249  while (str1 < str1_end)
5250  {
5251  bool is_zero_weight = false;
5252  str1_next = intl_nextchar_euc (str1, &char1_size);
5253  if (*str1 == ASCII_SPACE || *str1 == 0 || (*str1 == EUC_SPACE && char1_size == 2 && *(str1 + 1) == EUC_SPACE))
5254  {
5255  is_zero_weight = (weight[SPACE] == 0);
5256  }
5257  str1 = str1_next;
5258  if (!is_zero_weight)
5259  {
5260  break;
5261  }
5262  }
5263 
5264  if (str1 >= str1_end)
5265  {
5266  /* str1 exhaused or at last char, we use str2 as key */
5267  *key = (unsigned char *) str2_begin;
5268  key_size = CAST_BUFLEN (str2_end - str2_begin);
5269  }
5270  else
5271  {
5272  assert (str1 < str1_end);
5273  *key = (unsigned char *) str1_begin;
5274  key_size = CAST_BUFLEN (str1 - str1_begin);
5275  }
5276  }
5277 
5278  *byte_size = key_size;
5279 
5280  return NO_ERROR;
5281 }
5282 
5283 /*
5284  * English Locale Data
5285  */
5286 
5287 
5288 /*
5289  * lang_initloc_en () - init locale data for English language
5290  * return:
5291  */
5292 static void
5294 {
5295  assert (ld != NULL);
5296 
5297  coll_Iso_binary.default_lang = ld;
5298  coll_Iso88591_en_cs.default_lang = ld;
5299  coll_Iso88591_en_ci.default_lang = ld;
5300 
5301  ld->is_initialized = true;
5302 }
5303 
5304 /*
5305  * lang_initloc_en_binary () - init locale data for English language
5306  * return:
5307  */
5308 static void
5310 {
5311  assert (ld != NULL);
5312 
5313  coll_Binary.default_lang = ld;
5314 
5315  ld->is_initialized = true;
5316 }
5317 
5318 /*
5319  * lang_init_common_en_cs () - init collation data for English case
5320  * sensitive (no matter the charset)
5321  * with optional ts (trailing space sensitive)
5322  * in: coll_dat (collation data)
5323  * return:
5324  */
5325 static void
5327 {
5328  int i;
5329  static bool is_common_en_cs_init = false;
5330 
5331  if (is_common_en_cs_init)
5332  {
5333  return;
5334  }
5335 
5336  for (i = 0; i < coll_data->w_count; i++)
5337  {
5338  coll_data->weights_ti[i] = coll_data->weights[i] = i;
5339  coll_data->next_cp_ti[i] = coll_data->next_cp[i] = i + 1;
5340  }
5341 
5342  coll_data->weights_ti[32] = 0;
5343  coll_data->next_cp_ti[32] = 1;
5344 
5345  is_common_en_cs_init = true;
5346 }
5347 
5348 /*
5349  * lang_init_common_en_ci () - init collation data for English case
5350  * insensitive (no matter the charset)
5351  * with optional ts (trailing space sensitive)
5352  * in: coll_data (collation data)
5353  * return:
5354  */
5355 static void
5357 {
5358  int i;
5359  static bool is_common_en_ci_init = false;
5360 
5361  if (is_common_en_ci_init)
5362  {
5363  return;
5364  }
5365 
5366  for (i = 0; i < coll_data->w_count; i++)
5367  {
5368  coll_data->weights_ti[i] = coll_data->weights[i] = i;
5369  coll_data->next_cp_ti[i] = coll_data->next_cp[i] = i + 1;
5370  }
5371 
5372  for (i = 'a'; i <= (int) 'z'; i++)
5373  {
5374  coll_data->weights_ti[i] = coll_data->weights[i] = i - ('a' - 'A');
5375  coll_data->next_cp_ti[i] = coll_data->next_cp[i] = i + 1 - ('a' - 'A');
5376  }
5377 
5378  coll_data->next_cp['z'] = coll_data->next_cp['Z'];
5379  coll_data->next_cp['a' - 1] = coll_data->next_cp['A' - 1];
5380 
5381  coll_data->next_cp_ti['z'] = coll_data->next_cp_ti['Z'];
5382  coll_data->next_cp_ti['a' - 1] = coll_data->next_cp_ti['A' - 1];
5383 
5384  /* for ignore trailing space */
5385  coll_data->weights_ti[32] = 0;
5386  coll_data->next_cp_ti[32] = 1;
5387 
5388 
5389  is_common_en_ci_init = true;
5390 }
5391 
5392 /*
5393  * lang_init_coll_en_cs () - init collation for English case sensitive
5394  * on no matter charset (iso88591, utf8, euckr)
5395  * with optional ts (trailing space sensitive)
5396  * return:
5397  */
5398 static void
5400 {
5401  assert (lang_coll != NULL);
5402 
5403  if (!(lang_coll->need_init))
5404  {
5405  return;
5406  }
5407 
5408  /* init data */
5409  lang_init_common_en_cs (&lang_coll->coll);
5410 
5411  lang_coll->need_init = false;
5412 }
5413 
5414 /*
5415  * lang_init_coll_en_ci () - init collation for English case insensitive
5416  * on no matter charset (iso88591, utf8, euckr)
5417  * with optional ts (trailing space sensitive)
5418  * return:
5419  */
5420 static void
5422 {
5423  assert (lang_coll != NULL);
5424 
5425  if (!(lang_coll->need_init))
5426  {
5427  return;
5428  }
5429 
5430  /* init data */
5431  lang_init_common_en_ci (&lang_coll->coll);
5432 
5433  lang_coll->need_init = false;
5434 }
5435 
5436 /*
5437  * lang_initloc_en () - init locale data for English language
5438  * return:
5439  */
5440 static void
5442 {
5443  int i;
5444 
5445  assert (ld != NULL);
5446 
5447  assert (ld->default_lang_coll != NULL);
5448 
5449  /* init alphabet */
5450  for (i = 0; i < LANG_CHAR_COUNT_EN; i++)
5451  {
5452  lang_upper_EN[i] = i;
5453  lang_lower_EN[i] = i;
5454  }
5455 
5456  for (i = (int) 'a'; i <= (int) 'z'; i++)
5457  {
5458  lang_upper_EN[i] = i - ('a' - 'A');
5459  lang_lower_EN[i - ('a' - 'A')] = i;
5460  }
5461 
5462  /* other initializations to follow here */
5463  coll_Utf8_binary.default_lang = ld;
5464  coll_Utf8_en_cs.default_lang = ld;
5465  coll_Utf8_en_ci.default_lang = ld;
5466 
5467  ld->is_initialized = true;
5468 }
5469 
5470 /*
5471  * lang_fastcmp_byte () - compare two character strings of ISO-8859-1 and etc
5472  * codeset
5473  *
5474  * Arguments:
5475  * lang_coll: collation data
5476  * string1: 1st character string
5477  * size1: size of 1st string
5478  * string2: 2nd character string
5479  * size2: size of 2nd string
5480  *
5481  * Returns:
5482  * Greater than 0 if string1 > string2
5483  * Equal to 0 if string1 = string2
5484  * Less than 0 if string1 < string2
5485  *
5486  * Errors:
5487  *
5488  * Note:
5489  * This function is similar to strcmp(3) or bcmp(3). It is designed to
5490  * follow SQL_TEXT character set collation. Padding character(space ' ') is
5491  * the smallest character in the set. (e.g.) "ab z" < "ab\t1"
5492  *
5493  */
5494 
5495 static int
5496 lang_fastcmp_byte (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
5497  const unsigned char *string2, const int size2, bool ignore_trailing_space)
5498 {
5499  int n, i, cmp;
5500  unsigned int c1, c2;
5501  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;
5502 
5503 
5504  n = size1 < size2 ? size1 : size2;
5505  for (i = 0, cmp = 0; i < n && cmp == 0; i++)
5506  {
5507  c1 = *string1++;
5508  if (c1 == SPACE)
5509  {
5510  c1 = ZERO;
5511  }
5512  else
5513  {
5514  c1 = weight[c1];
5515  }
5516 
5517  c2 = *string2++;
5518  if (c2 == SPACE)
5519  {
5520  c2 = ZERO;
5521  }
5522  else
5523  {
5524  c2 = weight[c2];
5525  }
5526 
5527  cmp = c1 - c2;
5528  }
5529 
5530  if (cmp || size1 == size2)
5531  {
5532  return cmp;
5533  }
5534 
5535  if (!ignore_trailing_space && size1 != size2)
5536  {
5537  return size1 - size2;
5538  }
5539 
5540  c1 = c2 = ZERO;
5541  if (size1 < size2)
5542  {
5543  n = size2 - size1;
5544  for (i = 0; i < n && cmp == 0; i++)
5545  {
5546  c2 = weight[*string2++];
5547  cmp = c1 - c2;
5548  }
5549  }
5550  else
5551  {
5552  n = size1 - size2;
5553  for (i = 0; i < n && cmp == 0; i++)
5554  {
5555  c1 = weight[*string1++];
5556  cmp = c1 - c2;
5557  }
5558  }
5559  return cmp;
5560 }
5561 
5562 /*
5563  * lang_strmatch_byte () - match or compare two character strings of
5564  * ISO-8859-1 codeset
5565  *
5566  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
5567  * lang_coll(in) : collation data
5568  * is_match(in) : true if match, otherwise is compare
5569  * str1(in):
5570  * size1(in):
5571  * str2(in): this is the pattern string in case of match
5572  * size2(in):
5573  * escape(in): pointer to escape character (multi-byte allowed)
5574  * (used in context of LIKE)
5575  * has_last_escape(in): true if it should check if last character is the
5576  * escape character
5577  * str1_match_size(out): size from str1 which is matched with str2
5578  */
5579 static int
5580 lang_strmatch_byte (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
5581  const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
5582  int *str1_match_size, bool ignore_trailing_space)
5583 {
5584  unsigned int c1, c2;
5585  const unsigned char *str1_end;
5586  const unsigned char *str2_end;
5587  const unsigned char *str1_begin;
5588  const int alpha_cnt = lang_coll->coll.w_count;
5589  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;
5590 
5591  str1_begin = str1;
5592  str1_end = str1 + size1;
5593  str2_end = str2 + size2;
5594  for (; str1 < str1_end && str2 < str2_end;)
5595  {
5596  assert (str1_end - str1 > 0);
5597  assert (str2_end - str2 > 0);
5598 
5599  c1 = *str1++;
5600  if (c1 == SPACE)
5601  {
5602  c1 = ZERO;
5603  }
5604 
5605  c2 = *str2++;
5606  if (c2 == SPACE)
5607  {
5608  c2 = ZERO;
5609  }
5610 
5611  if (is_match && escape != NULL && c2 == *escape)
5612  {
5613  if (!(has_last_escape && str2 + 1 >= str2_end))
5614  {
5615  c2 = *str2++;
5616  if (c2 == SPACE)
5617  {
5618  c2 = ZERO;
5619  }
5620  }
5621  }
5622 
5623  if (c1 < (unsigned int) alpha_cnt)
5624  {
5625  c1 = weight[c1];
5626  }
5627 
5628  if (c2 < (unsigned int) alpha_cnt)
5629  {
5630  c2 = weight[c2];
5631  }
5632 
5633  if (c1 != c2)
5634  {
5635  return c1 - c2;
5636  }
5637  }
5638 
5639  size1 = CAST_BUFLEN (str1_end - str1);
5640  size2 = CAST_BUFLEN (str2_end - str2);
5641 
5642  assert (size1 == 0 || size2 == 0);
5643 
5644  if (is_match)
5645  {
5646  assert (str1_match_size != NULL);
5647  *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
5648  }
5649 
5650  if (size1 == size2)
5651  {
5652  return 0;
5653  }
5654  else if (size2 > 0)
5655  {
5656  if (is_match)
5657  {
5658  /* pattern string should be exhausted for a full match */
5659  return -1;
5660  }
5661  for (; str2 < str2_end;)
5662  {
5663  c2 = weight[*str2++];
5664  if (c2)
5665  {
5666  return -1;
5667  }
5668  }
5669  }
5670  else
5671  {
5672  assert (size1 > 0);
5673 
5674  if (is_match)
5675  {
5676  return 0;
5677  }
5678 
5679  for (; str1 < str1_end;)
5680  {
5681  c1 = weight[*str1++];
5682  if (c1)
5683  {
5684  return 1;
5685  }
5686  }
5687  }
5688  return 0;
5689 }
5690 
5691 /*
5692  * lang_mht2str_default () -
5693  * return:
5694  * lang_coll(in):
5695  * str(in):
5696  * size(in):
5697  *
5698  */
5699 static unsigned int
5700 lang_mht2str_default (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
5701 {
5702  return mht_2str_pseudo_key (str, size);
5703 }
5704 
5705 /*
5706  * lang_mht2str_byte () -
5707  * return:
5708  * lang_coll(in):
5709  * str(in):
5710  * size(in):
5711  *
5712  */
5713 static unsigned int
5714 lang_mht2str_byte (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
5715 {
5716  const unsigned char *str_end = str + size;
5717  unsigned int pseudo_key = 0;
5718  unsigned int w;
5719 
5720  for (; str < str_end; str++)
5721  {
5722  w = lang_coll->coll.weights[*str];
5723  ADD_TO_HASH (pseudo_key, w);
5724  }
5725 
5726  return pseudo_key;
5727 }
5728 
5729 /*
5730  * lang_next_alpha_char_iso88591() - computes the next alphabetical char
5731  * return: size in bytes of the next alphabetical char
5732  * lang_coll(in): collation data
5733  * seq(in): pointer to current char
5734  * size(in): size in bytes for seq
5735  * next_seq(in/out): buffer to return next alphabetical char
5736  * len_next(in/out): length in chars for nex_seq
5737  *
5738  */
5739 static int
5740 lang_next_alpha_char_iso88591 (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
5741  unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
5742 {
5743  assert (seq != NULL);
5744  assert (next_seq != NULL);
5745  assert (len_next != NULL);
5746  assert (size > 0);
5747 
5748  *next_seq = (*seq == 0xff) ? 0xff : (*seq + 1);
5749  *len_next = 1;
5750  return 1;
5751 }
5752 
5753 /*
5754  * lang_next_coll_byte() - computes the next collatable char
5755  * return: size in bytes of the next collatable char
5756  * lang_coll(on): collation
5757  * seq(in): pointer to current char
5758  * size(in): available bytes for current char
5759  * next_seq(in/out): buffer to return next alphabetical char
5760  * len_next(in/out): length in chars of next char (always 1 for this func)
5761  *
5762  * Note : This assumes the weights and next col are define at byte level.
5763  */
5764 static int
5765 lang_next_coll_byte (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
5766  unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
5767 {
5768  unsigned int cp_alpha_char, cp_next_alpha_char;
5769  const int alpha_cnt = lang_coll->coll.w_count;
5770  const unsigned int *next_alpha_char = (ignore_trailing_space) ? lang_coll->coll.next_cp_ti : lang_coll->coll.next_cp;
5771 
5772  assert (seq != NULL);
5773  assert (next_seq != NULL);
5774  assert (len_next != NULL);
5775  assert (size > 0);
5776 
5777  cp_alpha_char = (unsigned int) *seq;
5778 
5779  if (cp_alpha_char < (unsigned int) alpha_cnt)
5780  {
5781  cp_next_alpha_char = next_alpha_char[cp_alpha_char];
5782  }
5783  else
5784  {
5785  cp_next_alpha_char = (cp_alpha_char == 0xff) ? 0xff : (cp_alpha_char + 1);
5786  }
5787 
5788  assert (cp_next_alpha_char <= 0xff);
5789 
5790  *next_seq = (unsigned char) cp_next_alpha_char;
5791  *len_next = 1;
5792 
5793  return 1;
5794 }
5795 
5796 
5797 /*
5798  * Turkish Locale Data
5799  */
5800 
5801 /*
5802  * lang_init_coll_Utf8_tr_cs () - init collation data for Turkish
5803  * return:
5804  * lang_coll(in):
5805  */
5806 static void
5808 {
5809  int i;
5810  unsigned int *lang_Weight_TR;
5811  unsigned int *lang_Next_alpha_char_TR;
5812  unsigned int *lang_Weight_TR_ti;
5813  unsigned int *lang_Next_alpha_char_TR_ti;
5814 
5815  const unsigned int special_upper_cp[] = {
5816  0xc7, /* capital C with cedilla */
5817  0x11e, /* capital letter G with breve */
5818  0x130, /* capital letter I with dot above */
5819  0xd6, /* capital letter O with diaeresis */
5820  0x15e, /* capital letter S with cedilla */
5821  0xdc /* capital letter U with diaeresis */
5822  };
5823 
5824  const unsigned int special_prev_upper_cp[] = { 'C', 'G', 'I', 'O', 'S', 'U' };
5825 
5826  const unsigned int special_lower_cp[] = {
5827  0xe7, /* small c with cedilla */
5828  0x11f, /* small letter g with breve */
5829  0x131, /* small letter dotless i */
5830  0xf6, /* small letter o with diaeresis */
5831  0x15f, /* small letter s with cedilla */
5832  0xfc /* small letter u with diaeresis */
5833  };
5834 
5835  const unsigned int special_prev_lower_cp[] = { 'c', 'g', 'h', 'o', 's', 'u' };
5836 
5837  assert (lang_coll != NULL);
5838 
5839  if (!(lang_coll->need_init))
5840  {
5841  return;
5842  }
5843 
5844  lang_Weight_TR = lang_coll->coll.weights;
5845  lang_Next_alpha_char_TR = lang_coll->coll.next_cp;
5846 
5847  lang_Weight_TR_ti = lang_coll->coll.weights_ti;
5848  lang_Next_alpha_char_TR_ti = lang_coll->coll.next_cp_ti;
5849 
5850  for (i = 0; i < LANG_CHAR_COUNT_TR; i++)
5851  {
5852  lang_Weight_TR[i] = i;
5853  lang_Next_alpha_char_TR[i] = i + 1;
5854  lang_Weight_TR_ti[i] = i;
5855  lang_Next_alpha_char_TR_ti[i] = i + 1;
5856  }
5857 
5858  assert (DIM (special_lower_cp) == DIM (special_upper_cp));
5859 
5860  /* specific turkish letters: weighting for string compare */
5861  for (i = 0; i < (int) DIM (special_upper_cp); i++)
5862  {
5863  unsigned int j;
5864  unsigned int cp = special_upper_cp[i];
5865  unsigned cp_repl = 1 + special_prev_upper_cp[i];
5866  unsigned int w_repl = lang_Weight_TR[cp_repl];
5867 
5868  lang_Weight_TR[cp] = w_repl;
5869  lang_Weight_TR_ti[cp] = w_repl;
5870 
5871  assert (cp_repl < cp);
5872  for (j = cp_repl; j < cp; j++)
5873  {
5874  if (lang_Weight_TR[j] >= w_repl)
5875  {
5876  (lang_Weight_TR[j])++;
5877  (lang_Weight_TR_ti[j])++;
5878  }
5879  }
5880  }
5881 
5882  for (i = 0; i < (int) DIM (special_lower_cp); i++)
5883  {
5884  unsigned int j;
5885  unsigned int cp = special_lower_cp[i];
5886  unsigned cp_repl = 1 + special_prev_lower_cp[i];
5887  unsigned int w_repl = lang_Weight_TR[cp_repl];
5888 
5889  lang_Weight_TR[cp] = w_repl;
5890  lang_Weight_TR_ti[cp] = w_repl;
5891 
5892  assert (cp_repl < cp);
5893  for (j = cp_repl; j < cp; j++)
5894  {
5895  if (lang_Weight_TR[j] >= w_repl)
5896  {
5897  (lang_Weight_TR[j])++;
5898  (lang_Weight_TR_ti[j])++;
5899  }
5900  }
5901  }
5902 
5903  /* next letter in alphabet (for pattern searching) */
5904  for (i = 0; i < (int) DIM (special_upper_cp); i++)
5905  {
5906  unsigned int cp_special = special_upper_cp[i];
5907  unsigned int cp_prev = special_prev_upper_cp[i];
5908  unsigned int cp_next = cp_prev + 1;
5909 
5910  lang_Next_alpha_char_TR[cp_prev] = cp_special;
5911  lang_Next_alpha_char_TR[cp_special] = cp_next;
5912  lang_Next_alpha_char_TR_ti[cp_prev] = cp_special;
5913  lang_Next_alpha_char_TR_ti[cp_special] = cp_next;
5914  }
5915 
5916  for (i = 0; i < (int) DIM (special_lower_cp); i++)
5917  {
5918  unsigned int cp_special = special_lower_cp[i];
5919  unsigned int cp_prev = special_prev_lower_cp[i];
5920  unsigned int cp_next = cp_prev + 1;
5921 
5922  lang_Next_alpha_char_TR[cp_prev] = cp_special;
5923  lang_Next_alpha_char_TR[cp_special] = cp_next;
5924  lang_Next_alpha_char_TR_ti[cp_prev] = cp_special;
5925  lang_Next_alpha_char_TR_ti[cp_special] = cp_next;
5926  }
5927 
5928  lang_Weight_TR_ti[32] = 0;
5929  lang_Next_alpha_char_TR_ti[32] = 1;
5930 
5931  /* other initializations to follow here */
5932 
5933  lang_coll->need_init = false;
5934 }
5935 
5936 /*
5937  * lang_initloc_tr_iso () - init locale data for Turkish language
5938  * (ISO charset)
5939  * return:
5940  * ld(in/out):
5941  */
5942 static void
5944 {
5945  assert (ld != NULL);
5946 
5947  ld->is_initialized = true;
5948 }
5949 
5950 /*
5951  * lang_initloc_tr_utf8 () - init locale data for Turkish language (UTF8)
5952  * return:
5953  * ld(in/out):
5954  */
5955 static void
5957 {
5958  int i;
5959 
5960  const unsigned int special_upper_cp[] = {
5961  0xc7, /* capital C with cedilla */
5962  0x11e, /* capital letter G with breve */
5963  0x130, /* capital letter I with dot above */
5964  0xd6, /* capital letter O with diaeresis */
5965  0x15e, /* capital letter S with cedilla */
5966  0xdc /* capital letter U with diaeresis */
5967  };
5968 
5969  const unsigned int special_lower_cp[] = {
5970  0xe7, /* small c with cedilla */
5971  0x11f, /* small letter g with breve */
5972  0x131, /* small letter dotless i */
5973  0xf6, /* small letter o with diaeresis */
5974  0x15f, /* small letter s with cedilla */
5975  0xfc /* small letter u with diaeresis */
5976  };
5977 
5978  assert (ld != NULL);
5979 
5980  assert (ld->default_lang_coll != NULL);
5981 
5982  /* init alphabet */
5983  for (i = 0; i < LANG_CHAR_COUNT_TR; i++)
5984  {
5985  lang_upper_TR[i] = i;
5986  lang_lower_TR[i] = i;
5987  }
5988 
5989  for (i = (int) 'a'; i <= (int) 'z'; i++)
5990  {
5991  lang_upper_TR[i] = i - ('a' - 'A');
5992  lang_lower_TR[i - ('a' - 'A')] = i;
5993 
5994  lang_lower_TR[i] = i;
5995  lang_upper_TR[i - ('a' - 'A')] = i - ('a' - 'A');
5996  }
5997 
5998  assert (DIM (special_lower_cp) == DIM (special_upper_cp));
5999  /* specific turkish letters: */
6000  for (i = 0; i < (int) DIM (special_lower_cp); i++)
6001  {
6002  lang_lower_TR[special_lower_cp[i]] = special_lower_cp[i];
6003  lang_upper_TR[special_lower_cp[i]] = special_upper_cp[i];
6004 
6005  lang_lower_TR[special_upper_cp[i]] = special_lower_cp[i];
6006  lang_upper_TR[special_upper_cp[i]] = special_upper_cp[i];
6007  }
6008 
6009  memcpy (lang_upper_i_TR, lang_upper_TR, LANG_CHAR_COUNT_TR * sizeof (lang_upper_TR[0]));
6010  memcpy (lang_lower_i_TR, lang_lower_TR, LANG_CHAR_COUNT_TR * sizeof (lang_lower_TR[0]));
6011 
6012  /* identifiers alphabet : same as Unicode data */
6013  lang_upper_i_TR[0x131] = 'I'; /* small letter dotless i */
6014  lang_lower_i_TR[0x130] = 'i'; /* capital letter I with dot above */
6015 
6016  /* exceptions in TR casing for user alphabet : */
6017  lang_upper_TR[0x131] = 'I'; /* small letter dotless i */
6018  lang_lower_TR[0x131] = 0x131; /* small letter dotless i */
6019  lang_upper_TR['i'] = 0x130; /* capital letter I with dot above */
6020  lang_lower_TR['i'] = 'i';
6021 
6022  lang_lower_TR[0x130] = 'i'; /* capital letter I with dot above */
6023  lang_upper_TR[0x130] = 0x130; /* capital letter I with dot above */
6024  lang_upper_TR['I'] = 'I';
6025  lang_lower_TR['I'] = 0x131; /* small letter dotless i */
6026 
6027  /* other initializations to follow here */
6028  coll_Utf8_tr_cs.default_lang = ld;
6029 
6030  ld->is_initialized = true;
6031 }
6032 
6033 
6034 /*
6035  * Korean Locale Data
6036  */
6037 
6038 /*
6039  * lang_initloc_ko_iso () - init locale data for Korean language with ISO
6040  * charset
6041  * return:
6042  */
6043 static void
6045 {
6046  assert (ld != NULL);
6047 
6048  ld->is_initialized = true;
6049 }
6050 
6051 /*
6052  * lang_initloc_ko_utf8 () - init locale data for Korean language with UTF-8
6053  * charset
6054  * return:
6055  */
6056 static void
6058 {
6059  assert (ld != NULL);
6060 
6061  coll_Utf8_ko_cs.default_lang = ld;
6062 
6063  ld->is_initialized = true;
6064 }
6065 
6066 
6067 /*
6068  * lang_initloc_ko_euc () - init locale data for Korean language with EUC-KR
6069  * charset
6070  * return:
6071  */
6072 static void
6074 {
6075  assert (ld != NULL);
6076 
6077  coll_Euckr_bin.default_lang = ld;
6078 
6079  ld->is_initialized = true;
6080 }
6081 
6082 /*
6083  * lang_fastcmp_ko () - compare two EUC-KR character strings
6084  *
6085  * Arguments:
6086  * lang_coll: collation data
6087  * string1: 1st character string
6088  * size1: size of 1st string
6089  * string2: 2nd character string
6090  * size2: size of 2nd string
6091  *
6092  * Returns:
6093  * Greater than 0 if string1 > string2
6094  * Equal to 0 if string1 = string2
6095  * Less than 0 if string1 < string2
6096  *
6097  */
6098 static int
6099 lang_fastcmp_ko (const LANG_COLLATION * lang_coll, const unsigned char *string1, int size1,
6100  const unsigned char *string2, int size2, bool ignore_trailing_space)
6101 {
6102  int cmp;
6103  unsigned char c1, c2;
6104  const unsigned char *str1_end;
6105  const unsigned char *str2_end;
6106  const unsigned int *weight = (ignore_trailing_space) ? lang_coll->coll.weights_ti : lang_coll->coll.weights;
6107 
6108  assert (size1 >= 0 && size2 >= 0);
6109 
6110  str1_end = string1 + size1;
6111  str2_end = string2 + size2;
6112 
6113  for (cmp = 0; string1 < str1_end && string2 < str2_end && cmp == 0;)
6114  {
6115  c1 = *string1++;
6116  if (c1 == ASCII_SPACE)
6117  {
6118  c1 = ZERO;
6119  }
6120  else if (c1 == EUC_SPACE && string1 < str1_end && *string1 == EUC_SPACE)
6121  {
6122  c1 = ZERO;
6123  string1++;
6124  }
6125 
6126  c2 = *string2++;
6127  if (c2 == ASCII_SPACE)
6128  {
6129  c2 = ZERO;
6130  }
6131  else if (c2 == EUC_SPACE && string2 < str2_end && *string2 == EUC_SPACE)
6132  {
6133  c2 = ZERO;
6134  string2++;
6135  }
6136  cmp = c1 - c2;
6137  }
6138 
6139  if (cmp != 0)
6140  {
6141  return cmp;
6142  }
6143 
6144  size1 = CAST_BUFLEN (str1_end - string1);
6145  size2 = CAST_BUFLEN (str2_end - string2);
6146 
6147  assert (size1 == 0 || size2 == 0);
6148 
6149  if (size1 == size2)
6150  {
6151  return cmp;
6152  }
6153 
6154  c1 = c2 = ZERO;
6155  if (size1 < size2)
6156  {
6157  assert (size1 == 0 && size2 > 0);
6158 
6159  for (; string2 < str2_end && c2 == ZERO;)
6160  {
6161  c2 = *string2++;
6162  if (c2 == ASCII_SPACE)
6163  {
6164  c2 = weight[SPACE];
6165  }
6166  else if (c2 == EUC_SPACE && string2 < str2_end && *string2 == EUC_SPACE)
6167  {
6168  c2 = weight[SPACE];
6169  string2++;
6170  }
6171  }
6172  }
6173  else
6174  {
6175  assert (size1 > 0 && size2 == 0);
6176 
6177  for (; string1 < str1_end && c1 == ZERO;)
6178  {
6179  c1 = *string1++;
6180  if (c1 == ASCII_SPACE)
6181  {
6182  c1 = weight[SPACE];
6183  }
6184  else if (c1 == EUC_SPACE && string1 < str1_end && *string1 == EUC_SPACE)
6185  {
6186  c1 = weight[SPACE];
6187  string1++;
6188  }
6189  }
6190  }
6191  return c1 - c2;
6192 }
6193 
6194 /*
6195  * lang_mht2str_ko () -
6196  *
6197  * Arguments:
6198  * lang_coll: collation data
6199  * str: character string
6200  * size: size of string
6201  *
6202  *
6203  */
6204 static unsigned int
6205 lang_mht2str_ko (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size)
6206 {
6207  const unsigned char *str_end;
6208  unsigned int pseudo_key = 0;
6209  unsigned int w;
6210 
6211  assert (size >= 0);
6212 
6213  str_end = str + size;
6214 
6215  /* the caller of hash function eliminated only trailing ASCII spaces */
6216  /* eliminate the remaining both trailing EUC and ASCII spaces */
6217  while (str_end > str)
6218  {
6219  if (*(str_end - 1) == ASCII_SPACE)
6220  {
6221  str_end--;
6222  continue;
6223  }
6224  else if (str_end > str + 1 && *(str_end - 1) == EUC_SPACE && *(str_end - 2) == EUC_SPACE)
6225  {
6226  str_end--;
6227  str_end--;
6228  continue;
6229  }
6230  break;
6231  }
6232 
6233  for (; str < str_end;)
6234  {
6235  w = *str++;
6236  if (w == EUC_SPACE && str < str_end && *str == EUC_SPACE)
6237  {
6238  w = ZERO;
6239  str++;
6240  }
6241  else if (w == ASCII_SPACE)
6242  {
6243  w = ZERO;
6244  }
6245 
6246  ADD_TO_HASH (pseudo_key, w);
6247  }
6248 
6249  return pseudo_key;
6250 }
6251 
6252 
6253 /*
6254  * lang_strmatch_ko () - compare two EUC-KR character strings
6255  *
6256  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
6257  * lang_coll(in) : collation data
6258  * is_match(in) : true if match, otherwise is compare
6259  * str1(in):
6260  * size1(in):
6261  * str2(in): this is the pattern string in case of match
6262  * size2(in):
6263  * escape(in): pointer to escape character (multi-byte allowed)
6264  * (used in context of LIKE)
6265  * has_last_escape(in): true if it should check if last character is the
6266  * escape character
6267  * str1_match_size(out): size from str1 which is matched with str2
6268  *
6269  */
6270 static int
6271 lang_strmatch_ko (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
6272  const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
6273  int *str1_match_size, bool ignore_trailing_space)
6274 {
6275  const unsigned char *str1_end;
6276  const unsigned char *str2_end;
6277  const unsigned char *str1_next;
6278  const unsigned char *str2_next;
6279  const unsigned char *str1_begin;
6280  int char1_size, char2_size, cmp = 0;
6281  unsigned int c1, c2;
6282 
6283  assert (size1 >= 0 && size2 >= 0);
6284 
6285  str1_begin = str1;
6286  str1_end = str1 + size1;
6287  str2_end = str2 + size2;
6288 
6289  for (; str1 < str1_end && str2 < str2_end;)
6290  {
6291  assert (str1_end - str1 > 0);
6292  assert (str2_end - str2 > 0);
6293 
6294  str1_next = intl_nextchar_euc (str1, &char1_size);
6295  str2_next = intl_nextchar_euc (str2, &char2_size);
6296 
6297  if (is_match && escape != NULL && memcmp (str2, escape, char2_size) == 0)
6298  {
6299  if (!(has_last_escape && str2_next >= str2_end))
6300  {
6301  str2 = str2_next;
6302  str2_next = intl_nextchar_euc (str2, &char2_size);
6303  }
6304  }
6305 
6306  c1 = *str1;
6307  c2 = *str2;
6308  if (*str1 == ASCII_SPACE || (*str1 == EUC_SPACE && str1 + 1 < str1_end && *(str1 + 1) == EUC_SPACE))
6309  {
6310  c1 = ZERO;
6311  }
6312 
6313  if (*str2 == ASCII_SPACE || (*str2 == EUC_SPACE && str2 + 1 < str2_end && *(str2 + 1) == EUC_SPACE))
6314  {
6315  c2 = ZERO;
6316  }
6317 
6318  if (c1 == c2 && c1 == 0)
6319  {
6320  ;
6321  }
6322  else if (char1_size != char2_size)
6323  {
6324  return (char1_size < char2_size) ? (-1) : 1;
6325  }
6326  else
6327  {
6328  cmp = memcmp (str1, str2, char1_size);
6329  if (cmp != 0)
6330  {
6331  return cmp;
6332  }
6333  }
6334 
6335  str1 = str1_next;
6336  str2 = str2_next;
6337  }
6338 
6339  size1 = CAST_BUFLEN (str1_end - str1);
6340  size2 = CAST_BUFLEN (str2_end - str2);
6341 
6342  if (is_match)
6343  {
6344  assert (str1_match_size != NULL);
6345  *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
6346  }
6347 
6348  assert (size1 == 0 || size2 == 0);
6349  assert (cmp == 0);
6350 
6351  if (size1 == size2)
6352  {
6353  return 0;
6354  }
6355  else if (size2 > 0)
6356  {
6357  if (is_match)
6358  {
6359  return -1;
6360  }
6361 
6362  for (; str2 < str2_end;)
6363  {
6364  c2 = *str2++;
6365  if (c2 == ASCII_SPACE)
6366  {
6367  c2 = ZERO;
6368  }
6369  else if (c2 == EUC_SPACE && str2 < str2_end && *str2 == EUC_SPACE)
6370  {
6371  c2 = ZERO;
6372  str2++;
6373  }
6374 
6375  if (c2 > 0)
6376  {
6377  return -1;
6378  }
6379  }
6380  }
6381  else
6382  {
6383  assert (size1 > 0);
6384 
6385  if (is_match)
6386  {
6387  return 0;
6388  }
6389 
6390  for (; str1 < str1_end;)
6391  {
6392  c1 = *str1++;
6393  if (c1 == ASCII_SPACE)
6394  {
6395  c1 = ZERO;
6396  }
6397  else if (c1 == EUC_SPACE && str1 < str1_end && *str1 == EUC_SPACE)
6398  {
6399  c1 = ZERO;
6400  str1++;
6401  }
6402 
6403  if (c1 > 0)
6404  {
6405  return 1;
6406  }
6407  }
6408  }
6409  return cmp;
6410 }
6411 
6412 /*
6413  * lang_next_alpha_char_ko() - computes the next alphabetical char
6414  * return: size in bytes of the next alphabetical char
6415  * lang_coll(in): collation data
6416  * seq(in): pointer to current char
6417  * size(in): size in bytes for seq
6418  * next_seq(in/out): buffer to return next alphabetical char
6419  * len_next(in/out): length in chars for nex_seq
6420  */
6421 static int
6422 lang_next_alpha_char_ko (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
6423  unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
6424 {
6425  int char_size;
6426  assert (seq != NULL);
6427  assert (next_seq != NULL);
6428  assert (len_next != NULL);
6429  assert (size > 0);
6430 
6431  (void) intl_char_size ((unsigned char *) seq, 1, INTL_CODESET_KSC5601_EUC, &char_size);
6432  memcpy (next_seq, seq, char_size);
6433 
6434  assert (char_size <= 3);
6435  /* increment last byte of current character without carry and without mixing ASCII range with korean range; this
6436  * works for EUC-KR characters encoding which don't have terminal byte = FF */
6437  if ((char_size == 1 && *next_seq < 0x7f) || (char_size > 1 && next_seq[char_size - 1] < 0xff))
6438  {
6439  next_seq[char_size - 1]++;
6440  }
6441 
6442  *len_next = 1;
6443  return char_size;
6444 }
6445 
6446 /*
6447  * lang_fastcmp_binary () - string compare for "binary" collation (with binary
6448  * charset). Space character does not count with
6449  * zero weight
6450  * return:
6451  * lang_coll(in):
6452  * string1(in):
6453  * size1(in):
6454  * string2(in):
6455  * size2(in):
6456  */
6457 static int
6458 lang_fastcmp_binary (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
6459  const unsigned char *string2, const int size2, bool ignore_trailing_space)
6460 {
6461  int i, size;
6462 
6463  size = size1 < size2 ? size1 : size2;
6464  for (i = 0; i < size; i++, string1++, string2++)
6465  {
6466  /* compare weights of the two chars */
6467  if (*string1 > *string2)
6468  {
6469  return 1;
6470  }
6471  else if (*string1 < *string2)
6472  {
6473  return -1;
6474  }
6475  }
6476 
6477  if (size1 < size2)
6478  {
6479  size = size2 - size1;
6480  for (i = 0; i < size; i++)
6481  {
6482  /* ignore tailing white spaces */
6483  if (*string2++ > 0)
6484  {
6485  return -1;
6486  }
6487  }
6488  }
6489  else if (size1 > size2)
6490  {
6491  size = size1 - size2;
6492  for (i = 0; i < size; i++)
6493  {
6494  /* ignore trailing white spaces */
6495  if (*string1++ > 0)
6496  {
6497  return 1;
6498  }
6499  }
6500  }
6501 
6502  return 0;
6503 }
6504 
6505 /*
6506  * lang_strmatch_binary () - match or compare two character strings of
6507  * Binary (Raw-byte) codeset
6508  *
6509  * return: negative if str1 < str2, positive if str1 > str2, zero otherwise
6510  * lang_coll(in) : collation data
6511  * is_match(in) : true if match, otherwise is compare
6512  * str1(in):
6513  * size1(in):
6514  * str2(in): this is the pattern string in case of match
6515  * size2(in):
6516  * escape(in): pointer to escape character (multi-byte allowed)
6517  * (used in context of LIKE)
6518  * has_last_escape(in): true if it should check if last character is the
6519  * escape character
6520  * str1_match_size(out): size from str1 which is matched with str2
6521  */
6522 static int
6523 lang_strmatch_binary (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *str1, int size1,
6524  const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape,
6525  int *str1_match_size, bool ignore_trailing_space)
6526 {
6527  unsigned int c1, c2;
6528  const unsigned char *str1_end;
6529  const unsigned char *str2_end;
6530  const unsigned char *str1_begin;
6531 
6532  str1_begin = str1;
6533  str1_end = str1 + size1;
6534  str2_end = str2 + size2;
6535  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
6536  {
6537  assert (str1_end - str1 > 0);
6538  assert (str2_end - str2 > 0);
6539 
6540  c1 = *str1;
6541  c2 = *str2;
6542 
6543  if (is_match && escape != NULL && c2 == *escape)
6544  {
6545  str2++;
6546  if (!(has_last_escape && str2 + 1 >= str2_end))
6547  {
6548  c2 = *str2;
6549  }
6550  }
6551 
6552  if (c1 != c2)
6553  {
6554  return (c1 < c2) ? -1 : 1;
6555  }
6556  }
6557 
6558  size1 = CAST_BUFLEN (str1_end - str1);
6559  size2 = CAST_BUFLEN (str2_end - str2);
6560 
6561  assert (size1 == 0 || size2 == 0);
6562 
6563  if (is_match)
6564  {
6565  assert (str1_match_size != NULL);
6566  *str1_match_size = CAST_BUFLEN (str1 - str1_begin);
6567  }
6568 
6569  if (size1 == size2)
6570  {
6571  return 0;
6572  }
6573  else if (size2 > 0)
6574  {
6575  if (is_match)
6576  {
6577  /* pattern string should be exhausted for a full match */
6578  return -1;
6579  }
6580  for (; str2 < str2_end; str2++)
6581  {
6582  if (*str2 > 0)
6583  {
6584  return -1;
6585  }
6586  }
6587  }
6588  else
6589  {
6590  assert (size1 > 0);
6591 
6592  if (is_match)
6593  {
6594  return 0;
6595  }
6596 
6597  for (; str1 < str1_end; str1++)
6598  {
6599  if (*str1 > 0)
6600  {
6601  return 1;
6602  }
6603  }
6604  }
6605  return 0;
6606 }
6607 
6608 /*
6609  * lang_split_key_binary() - finds the prefix key for "binary" collation
6610  * (binary/raw-byte charset)
6611  *
6612  * return: error status
6613  * lang_coll(in):
6614  * is_desc(in):
6615  * str1(in):
6616  * size1(in):
6617  * str2(in):
6618  * size2(in):
6619  * key(out): key
6620  * byte_size(out): size in bytes of key
6621  *
6622  * Note : this function is used by index prefix computation (BTREE building)
6623  */
6624 static int
6625 lang_split_key_binary (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
6626  const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
6627  bool ignore_trailing_space)
6628 {
6629  const unsigned char *str1_end, *str2_end;
6630  const unsigned char *str1_begin, *str2_begin;
6631  int key_size;
6632 
6633  assert (key != NULL);
6634  assert (byte_size != NULL);
6635 
6636  str1_end = str1 + size1;
6637  str2_end = str2 + size2;
6638  str1_begin = str1;
6639  str2_begin = str2;
6640 
6641  for (; str1 < str1_end && str2 < str2_end; str1++, str2++)
6642  {
6643  if (*str1 != *str2)
6644  {
6645  assert ((!is_desc && *str1 < *str2) || (is_desc && *str1 > *str2));
6646  break;
6647  }
6648  }
6649 
6650  if (!is_desc)
6651  { /* normal index */
6652  *key = (unsigned char *) str2_begin;
6653 
6654  /* common part plus a character with non-zero weight */
6655  while (str2 < str2_end)
6656  {
6657  if (*str2++ != 0)
6658  {
6659  break;
6660  }
6661  }
6662  assert (str2 <= str2_end);
6663  key_size = CAST_BUFLEN (str2 - str2_begin);
6664  }
6665  else
6666  { /* reverse index */
6667  assert (is_desc);
6668 
6669  /* common part plus a character with non-zero weight from str1 */
6670  while (str1 < str1_end)
6671  {
6672  if (*str1++ != 0)
6673  {
6674  break;
6675  }
6676  }
6677 
6678  if (str1 >= str1_end)
6679  {
6680  /* str1 exhaused or at last char, we use str2 as key */
6681  *key = (unsigned char *) str2_begin;
6682  key_size = CAST_BUFLEN (str2_end - str2_begin);
6683  }
6684  else
6685  {
6686  assert (str1 < str1_end);
6687  *key = (unsigned char *) str1_begin;
6688  key_size = CAST_BUFLEN (str1 - str1_begin);
6689  }
6690  }
6691 
6692  *byte_size = key_size;
6693 
6694  return NO_ERROR;
6695 }
6696 
6697 
6698 #if defined(WINDOWS)
6699 #define GET_SYM_ADDR(lib, sym) GetProcAddress((HMODULE)lib, sym)
6700 #else
6701 #define GET_SYM_ADDR(lib, sym) dlsym(lib, sym)
6702 #endif
6703 
6704 #define SHLIB_GET_ADDR(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME) \
6705  do { \
6706  if (snprintf (sym_name, LOC_LIB_SYMBOL_NAME_SIZE - 1, "" SYM_NAME "_%s", LOC_NAME) < 0) \
6707  goto error_loading_symbol; \
6708  v = (SYM_TYPE) GET_SYM_ADDR (lh, sym_name); \
6709  if (v == NULL) \
6710  { \
6711  goto error_loading_symbol; \
6712  } \
6713  } while (0)
6714 
6715 #define SHLIB_GET_ADDR_W_REF(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME) \
6716  do { \
6717  snprintf (sym_name, LOC_LIB_SYMBOL_NAME_SIZE, "" SYM_NAME "_ref_%s", \
6718  LOC_NAME); \
6719  temp_char_sym = (char *) GET_SYM_ADDR (lh, sym_name); \
6720  if (temp_char_sym == NULL) \
6721  { \
6722  goto error_loading_symbol; \
6723  } \
6724  strcpy (sym_name, temp_char_sym); \
6725  v = (SYM_TYPE) GET_SYM_ADDR (lh, sym_name); \
6726  if (v == NULL) \
6727  { \
6728  goto error_loading_symbol; \
6729  } \
6730  } while (0)
6731 
6732 #define SHLIB_GET_VAL(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME) \
6733  do { \
6734  SYM_TYPE* aux; \
6735  SHLIB_GET_ADDR(aux, SYM_NAME, SYM_TYPE*, lh, LOC_NAME); \
6736  v = *aux; \
6737  } while (0);
6738 
6739 
6740 /*
6741  * lang_locale_data_load_from_lib() - loads locale data from shared libray
6742  *
6743  * return: error code
6744  * lld(out): lang locale data
6745  * lib_handle(in)
6746  * lf(in): locale file info
6747  * is_load_for_dump (in): true if load is in context of dump tool
6748  */
6749 int
6750 lang_locale_data_load_from_lib (LANG_LOCALE_DATA * lld, void *lib_handle, const LOCALE_FILE * lf, bool is_load_for_dump)
6751 {
6752  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
6753  char err_msg[ERR_MSG_SIZE + PATH_MAX];
6754  char **temp_array_sym;
6755  int *temp_num_sym;
6756  int err_status = NO_ERROR;
6757  int i, count_coll_to_load;
6758  const char *alpha_suffix = NULL;
6759  bool load_w_identifier_name;
6760  int txt_conv_type;
6761  bool sym_loc_name_found = false;
6762 
6763  assert (lld != NULL);
6764  assert (lib_handle != NULL);
6765  assert (lf != NULL);
6766  assert (lf->locale_name != NULL);
6767 
6768  SHLIB_GET_ADDR (lld->lang_name, "locale_name", char *, lib_handle, lf->locale_name);
6769  sym_loc_name_found = true;
6770 
6771  SHLIB_GET_ADDR (lld->checksum, "locale_checksum", char *, lib_handle, lf->locale_name);
6772  if (strlen (lld->checksum) != 32)
6773  {
6774  snprintf (err_msg, sizeof (err_msg) - 1, "invalid checksum in locale" " library %s", lf->lib_file);
6775  err_status = ER_LOC_INIT;
6776  LOG_LOCALE_ERROR (err_msg, err_status, false);
6777  goto exit;
6778  }
6779 
6780  SHLIB_GET_ADDR (lld->date_format, "date_format", char *, lib_handle, lld->lang_name);
6781  SHLIB_GET_ADDR (lld->time_format, "time_format", char *, lib_handle, lld->lang_name);
6782  SHLIB_GET_ADDR (lld->datetime_format, "datetime_format", char *, lib_handle, lld->lang_name);
6783  SHLIB_GET_ADDR (lld->timestamp_format, "timestamp_format", char *, lib_handle, lld->lang_name);
6784  SHLIB_GET_ADDR (lld->datetimetz_format, "datetimetz_format", char *, lib_handle, lld->lang_name);
6785  SHLIB_GET_ADDR (lld->timestamptz_format, "timestamptz_format", char *, lib_handle, lld->lang_name);
6786 
6787  SHLIB_GET_ADDR (temp_array_sym, "month_names_abbreviated", char **, lib_handle, lld->lang_name);
6788  for (i = 0; i < CAL_MONTH_COUNT; i++)
6789  {
6790  lld->month_short_name[i] = temp_array_sym[i];
6791  }
6792 
6793  SHLIB_GET_ADDR (temp_array_sym, "month_names_wide", char **, lib_handle, lld->lang_name);
6794  for (i = 0; i < CAL_MONTH_COUNT; i++)
6795  {
6796  lld->month_name[i] = temp_array_sym[i];
6797  }
6798 
6799  SHLIB_GET_ADDR (temp_array_sym, "day_names_abbreviated", char **, lib_handle, lld->lang_name);
6800  for (i = 0; i < CAL_DAY_COUNT; i++)
6801  {
6802  lld->day_short_name[i] = temp_array_sym[i];
6803  }
6804 
6805  SHLIB_GET_ADDR (temp_array_sym, "day_names_wide", char **, lib_handle, lld->lang_name);
6806  for (i = 0; i < CAL_DAY_COUNT; i++)
6807  {
6808  lld->day_name[i] = temp_array_sym[i];
6809  }
6810 
6811  SHLIB_GET_ADDR (temp_array_sym, "am_pm", char **, lib_handle, lld->lang_name);
6812  for (i = 0; i < CAL_AM_PM_COUNT; i++)
6813  {
6814  lld->am_pm[i] = temp_array_sym[i];
6815  }
6816 
6817  SHLIB_GET_ADDR (lld->day_short_parse_order, "day_names_abbr_parse_order", char *, lib_handle, lld->lang_name);
6818 
6819  SHLIB_GET_ADDR (lld->day_parse_order, "day_names_wide_parse_order", char *, lib_handle, lld->lang_name);
6820 
6821  SHLIB_GET_ADDR (lld->month_short_parse_order, "month_names_abbr_parse_order", char *, lib_handle, lld->lang_name);
6822 
6823  SHLIB_GET_ADDR (lld->month_parse_order, "month_names_wide_parse_order", char *, lib_handle, lld->lang_name);
6824 
6825  SHLIB_GET_ADDR (lld->am_pm_parse_order, "am_pm_parse_order", char *, lib_handle, lld->lang_name);
6826 
6827  SHLIB_GET_VAL (lld->number_decimal_sym, "number_decimal_sym", char, lib_handle, lld->lang_name);
6828 
6829  SHLIB_GET_VAL (lld->number_group_sym, "number_group_sym", char, lib_handle, lld->lang_name);
6830 
6831  int currency_code;
6832  SHLIB_GET_VAL (currency_code, "default_currency_code", int, lib_handle, lld->lang_name);
6833 
6834  lld->default_currency_code = (DB_CURRENCY) currency_code;
6835 
6836  /* alphabet */
6837  SHLIB_GET_ADDR (temp_num_sym, "alphabet_a_type", int *, lib_handle, lld->lang_name);
6838  assert (*temp_num_sym >= ALPHABET_UNICODE && *temp_num_sym <= ALPHABET_TAILORED);
6839  lld->alphabet.a_type = (ALPHABET_TYPE) (*temp_num_sym);
6840 
6841  if (lld->alphabet.a_type == ALPHABET_UNICODE)
6842  {
6843  alpha_suffix = "unicode";
6844  }
6845  else if (lld->alphabet.a_type == ALPHABET_ASCII)
6846  {
6847  alpha_suffix = "ascii";
6848  }
6849  else
6850  {
6851  alpha_suffix = lld->lang_name;
6852  }
6853  err_status = lang_locale_load_alpha_from_lib (&(lld->alphabet), false, alpha_suffix, lib_handle, lf);
6854  if (err_status != NO_ERROR)
6855  {
6856  goto exit;
6857  }
6858 
6859  /* identifier alphabet */
6860  SHLIB_GET_ADDR (temp_num_sym, "ident_alphabet_a_type", int *, lib_handle, lld->lang_name);
6861  assert (*temp_num_sym >= ALPHABET_UNICODE && *temp_num_sym <= ALPHABET_TAILORED);
6862  lld->ident_alphabet.a_type = (ALPHABET_TYPE) (*temp_num_sym);
6863 
6864  load_w_identifier_name = false;
6866  {
6867  alpha_suffix = "unicode";
6868  }
6869  else if (lld->ident_alphabet.a_type == ALPHABET_ASCII)
6870  {
6871  alpha_suffix = "ascii";
6872  }
6873  else
6874  {
6875  alpha_suffix = lld->lang_name;
6876  load_w_identifier_name = true;
6877  }
6878 
6879  err_status =
6880  lang_locale_load_alpha_from_lib (&(lld->ident_alphabet), load_w_identifier_name, alpha_suffix, lib_handle, lf);
6881  if (err_status != NO_ERROR)
6882  {
6883  goto exit;
6884  }
6885 
6886  /* console conversion */
6887  SHLIB_GET_VAL (txt_conv_type, "tc_conv_type", int, lib_handle, lld->lang_name);
6888 
6889  if (txt_conv_type == TEXT_CONV_ISO_88591_BUILTIN)
6890  {
6891  lld->txt_conv = &con_Iso_8859_1_conv;
6892  }
6893  else if (txt_conv_type == TEXT_CONV_ISO_88599_BUILTIN)
6894  {
6895  lld->txt_conv = &con_Iso_8859_9_conv;
6896  }
6897  else if (txt_conv_type == TEXT_CONV_NO_CONVERSION)
6898  {
6899  lld->txt_conv = NULL;
6900  }
6901  else
6902  {
6903  unsigned char *is_lead_byte;
6904  assert (txt_conv_type == TEXT_CONV_GENERIC_1BYTE || txt_conv_type == TEXT_CONV_GENERIC_2BYTE);
6905 
6906  lld->txt_conv = (TEXT_CONVERSION *) malloc (sizeof (TEXT_CONVERSION));
6907  if (lld->txt_conv == NULL)
6908  {
6910  err_status = ER_OUT_OF_VIRTUAL_MEMORY;
6911  goto exit;
6912  }
6913  memset (lld->txt_conv, 0, sizeof (TEXT_CONVERSION));
6914 
6915  lld->txt_conv->conv_type = (TEXT_CONV_TYPE) txt_conv_type;
6916 
6917  SHLIB_GET_ADDR (is_lead_byte, "tc_is_lead_byte", unsigned char *, lib_handle, lld->lang_name);
6918  memcpy (lld->txt_conv->byte_flag, is_lead_byte, 256);
6919 
6920  SHLIB_GET_VAL (lld->txt_conv->utf8_first_cp, "tc_utf8_first_cp", unsigned int, lib_handle, lld->lang_name);
6921 
6922  SHLIB_GET_VAL (lld->txt_conv->utf8_last_cp, "tc_utf8_last_cp", unsigned int, lib_handle, lld->lang_name);
6923 
6924  SHLIB_GET_VAL (lld->txt_conv->text_first_cp, "tc_text_first_cp", unsigned int, lib_handle, lld->lang_name);
6925 
6926  SHLIB_GET_VAL (lld->txt_conv->text_last_cp, "tc_text_last_cp", unsigned int, lib_handle, lld->lang_name);
6927 
6928  SHLIB_GET_ADDR (lld->txt_conv->win_codepages, "tc_win_codepages", char *, lib_handle, lld->lang_name);
6929 
6930  SHLIB_GET_ADDR (lld->txt_conv->nl_lang_str, "tc_nl_lang_str", char *, lib_handle, lld->lang_name);
6931 
6932  SHLIB_GET_ADDR (lld->txt_conv->utf8_to_text, "tc_utf8_to_text", CONV_CP_TO_BYTES *, lib_handle, lld->lang_name);
6933 
6934  SHLIB_GET_ADDR (lld->txt_conv->text_to_utf8, "tc_text_to_utf8", CONV_CP_TO_BYTES *, lib_handle, lld->lang_name);
6935  }
6936 
6937  err_status = lang_locale_load_normalization_from_lib (&(lld->unicode_norm), lib_handle, lf);
6938  if (err_status != NO_ERROR)
6939  {
6940  goto exit;
6941  }
6942 
6943  /* collation data */
6944  if (is_load_for_dump)
6945  {
6946  goto exit;
6947  }
6948 
6949  err_status = lang_load_count_coll_from_lib (&count_coll_to_load, lib_handle, lf);
6950  if (err_status != NO_ERROR)
6951  {
6952  goto exit;
6953  }
6954 
6955  for (i = 0; i < count_coll_to_load; i++)
6956  {
6957  /* get name of collation */
6958  char *collation_name = NULL;
6959  LANG_COLLATION *lang_coll = NULL;
6960  COLL_DATA *coll = NULL;
6961 
6962  err_status = lang_load_get_coll_name_from_lib (i, &collation_name, lib_handle, lf);
6963  if (err_status != NO_ERROR)
6964  {
6965  goto exit;
6966  }
6967 
6968  if (lang_get_collation_by_name (collation_name) != NULL)
6969  {
6970  /* collation already loaded */
6971  continue;
6972  }
6973 
6974  lang_coll = (LANG_COLLATION *) malloc (sizeof (LANG_COLLATION));
6975  if (lang_coll == NULL)
6976  {
6977  LOG_LOCALE_ERROR ("memory allocation failed", ER_LOC_INIT, false);
6978  err_status = ER_LOC_INIT;
6979  goto exit;
6980  }
6981  memset (lang_coll, 0, sizeof (LANG_COLLATION));
6982 
6983  assert (strlen (collation_name) < (int) sizeof (lang_coll->coll.coll_name));
6984  strncpy (lang_coll->coll.coll_name, collation_name, sizeof (lang_coll->coll.coll_name) - 1);
6985 
6986  coll = &(lang_coll->coll);
6987  err_status = lang_load_coll_from_lib (coll, lib_handle, lf);
6988  if (err_status != NO_ERROR)
6989  {
6990  assert (lang_coll != NULL);
6991  free (lang_coll);
6992  goto exit;
6993  }
6994 
6995  lang_coll->codeset = INTL_CODESET_UTF8;
6996  lang_coll->built_in = 0;
6997 
6998  /* by default enable optimizations */
6999  lang_coll->options.allow_like_rewrite = true;
7000  lang_coll->options.allow_index_opt = true;
7001  lang_coll->options.allow_prefix_index = true;
7002 
7004  {
7005  lang_coll->options.allow_index_opt = false;
7006  lang_coll->options.allow_like_rewrite = false;
7007  }
7008 
7009  if (coll->uca_exp_num > 1)
7010  {
7011  lang_coll->fastcmp = lang_strcmp_utf8_uca;
7012  lang_coll->strmatch = lang_strmatch_utf8_uca;
7014  lang_coll->split_key = lang_split_key_w_exp;
7015  lang_coll->mht2str = lang_mht2str_utf8_exp;
7016  lang_coll->options.allow_like_rewrite = false;
7017  lang_coll->options.allow_prefix_index = false;
7018  }
7019  else if (coll->count_contr > 0)
7020  {
7021  lang_coll->fastcmp = lang_strcmp_utf8_w_contr;
7022  lang_coll->strmatch = lang_strmatch_utf8_w_contr;
7024  lang_coll->split_key = lang_split_key_utf8;
7025  lang_coll->mht2str = lang_mht2str_utf8;
7026  }
7027  else
7028  {
7029  lang_coll->fastcmp = lang_strcmp_utf8;
7030  lang_coll->strmatch = lang_strmatch_utf8;
7032  lang_coll->split_key = lang_split_key_utf8;
7033  lang_coll->mht2str = lang_mht2str_utf8;
7034  }
7035 
7036  err_status = register_collation (lang_coll);
7037  if (err_status != NO_ERROR)
7038  {
7039  assert (lang_coll != NULL);
7040  free (lang_coll);
7041  goto exit;
7042  }
7043 
7044  lang_coll->default_lang = lld;
7045 
7046  /* first collation in locale is the default collation of locale */
7047  if (lld->default_lang_coll == NULL)
7048  {
7049  lld->default_lang_coll = lang_coll;
7050  }
7051  }
7052 
7053 
7054 exit:
7055  return err_status;
7056 
7057 error_loading_symbol:
7058  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
7059  sym_name, lf->lib_file, lf->locale_name);
7060  if (!sym_loc_name_found)
7061  {
7062  strcat (err_msg,
7063  "\n Locale might not be compiled into the selected " "library.\n Check configuration and recompile locale"
7064  ", if necessary,\n using the make_locale script");
7065  }
7066  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, is_load_for_dump);
7067 
7068  return ER_LOC_INIT;
7069 }
7070 
7071 /*
7072  * lang_load_count_coll_from_lib() - reads and returns the number of
7073  * collations in library
7074  *
7075  * return: error code
7076  * count_coll(out): number of collations in lib associated with locale
7077  * lib_handle(in):
7078  * lf(in): locale file info
7079  */
7080 int
7081 lang_load_count_coll_from_lib (int *count_coll, void *lib_handle, const LOCALE_FILE * lf)
7082 {
7083  char err_msg[ERR_MSG_SIZE + PATH_MAX];
7084  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
7085 
7086  assert (count_coll != NULL);
7087  assert (lib_handle != NULL);
7088  assert (lf != NULL);
7089  assert (lf->locale_name != NULL);
7090 
7091  SHLIB_GET_VAL (*count_coll, "count_coll", int, lib_handle, lf->locale_name);
7092 
7093  return NO_ERROR;
7094 
7095 error_loading_symbol:
7096  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
7097  sym_name, lf->lib_file, lf->locale_name);
7098  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7099 
7100  return ER_LOC_INIT;
7101 }
7102 
7103 /*
7104  * lang_load_get_coll_name_from_lib() - reads and returns the name of n-th
7105  * collation in library
7106  *
7107  * return: error code
7108  * coll_pos(in): collation index to return
7109  * coll_name(out): name of collation
7110  * lib_handle(in):
7111  * lf(in): locale file info
7112  */
7113 int
7114 lang_load_get_coll_name_from_lib (const int coll_pos, char **coll_name, void *lib_handle, const LOCALE_FILE * lf)
7115 {
7116  char err_msg[ERR_MSG_SIZE + PATH_MAX];
7117  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
7118  char coll_suffix[COLL_NAME_SIZE + LANG_MAX_LANGNAME + 5];
7119 
7120  assert (coll_name != NULL);
7121  assert (lib_handle != NULL);
7122  assert (lf != NULL);
7123  assert (lf->locale_name != NULL);
7124 
7125  *coll_name = NULL;
7126  snprintf (coll_suffix, sizeof (coll_suffix) - 1, "%d_%s", coll_pos, lf->locale_name);
7127  SHLIB_GET_ADDR (*coll_name, "collation", char *, lib_handle, coll_suffix);
7128 
7129  return NO_ERROR;
7130 
7131 error_loading_symbol:
7132  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
7133  sym_name, lf->lib_file, lf->locale_name);
7134  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7135 
7136  return ER_LOC_INIT;
7137 }
7138 
7139 /*
7140  * lang_load_coll_from_lib() - loads collation data from library
7141  *
7142  * return: error code
7143  * cd(out): collation data
7144  * lib_handle(in):
7145  * lf(in): locale file info
7146  */
7147 int
7148 lang_load_coll_from_lib (COLL_DATA * cd, void *lib_handle, const LOCALE_FILE * lf)
7149 {
7150  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
7151  char *temp_char_sym;
7152  int *temp_num_sym;
7153  char err_msg[ERR_MSG_SIZE + PATH_MAX];
7154  int err_status = NO_ERROR;
7155  char *coll_checksum = NULL;
7156 
7157  assert (cd != NULL);
7158  assert (lib_handle != NULL);
7159  assert (lf != NULL);
7160  assert (lf->locale_name != NULL);
7161 
7162  SHLIB_GET_ADDR (temp_char_sym, "coll_name", char *, lib_handle, cd->coll_name);
7163 
7164  if (strcmp (temp_char_sym, cd->coll_name))
7165  {
7166  err_status = ER_LOC_INIT;
7167  snprintf (err_msg, sizeof (err_msg) - 1, "Collation %s not found in shared library %s", cd->coll_name,
7168  lf->lib_file);
7169  LOG_LOCALE_ERROR (err_msg, err_status, false);
7170  goto exit;
7171  }
7172 
7173  SHLIB_GET_ADDR (coll_checksum, "coll_checksum", char *, lib_handle, cd->coll_name);
7174  strncpy (cd->checksum, coll_checksum, 32);
7175  cd->checksum[32] = '\0';
7176 
7177  SHLIB_GET_VAL (cd->coll_id, "coll_id", int, lib_handle, cd->coll_name);
7178 
7179  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_strength", int *, lib_handle, cd->coll_name);
7180  assert (*temp_num_sym >= TAILOR_UNDEFINED && *temp_num_sym <= TAILOR_IDENTITY);
7181  cd->uca_opt.sett_strength = (T_LEVEL) * temp_num_sym;
7182 
7183  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_backwards", int *, lib_handle, cd->coll_name);
7184  cd->uca_opt.sett_backwards = (bool) * temp_num_sym;
7185 
7186  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_caseLevel", int *, lib_handle, cd->coll_name);
7187  cd->uca_opt.sett_caseLevel = (bool) * temp_num_sym;
7188 
7189  SHLIB_GET_VAL (cd->uca_opt.sett_caseFirst, "coll_sett_caseFirst", int, lib_handle, cd->coll_name);
7190 
7191  SHLIB_GET_ADDR (temp_num_sym, "coll_sett_expansions", int *, lib_handle, cd->coll_name);
7192  cd->uca_opt.sett_expansions = (bool) * temp_num_sym;
7193 
7194  SHLIB_GET_VAL (cd->uca_opt.sett_contr_policy, "coll_sett_contr_policy", int, lib_handle, cd->coll_name);
7195 
7196  SHLIB_GET_VAL (cd->w_count, "coll_w_count", int, lib_handle, cd->coll_name);
7197 
7198  SHLIB_GET_VAL (cd->uca_exp_num, "coll_uca_exp_num", int, lib_handle, cd->coll_name);
7199 
7200  SHLIB_GET_VAL (cd->count_contr, "coll_count_contr", int, lib_handle, cd->coll_name);
7201 
7202  SHLIB_GET_ADDR (temp_num_sym, "coll_match_contr", int *, lib_handle, cd->coll_name);
7203  cd->uca_opt.sett_match_contr = (COLL_MATCH_CONTR) * temp_num_sym;
7204 
7205  if (cd->count_contr > 0)
7206  {
7207  SHLIB_GET_ADDR_W_REF (cd->contr_list, "coll_contr_list", COLL_CONTRACTION *, lib_handle, cd->coll_name);
7208 
7209  SHLIB_GET_VAL (cd->contr_min_size, "coll_contr_min_size", int, lib_handle, cd->coll_name);
7210 
7211  SHLIB_GET_VAL (cd->cp_first_contr_offset, "coll_cp_first_contr_offset", int, lib_handle, cd->coll_name);
7212 
7213  SHLIB_GET_VAL (cd->cp_first_contr_count, "coll_cp_first_contr_count", int, lib_handle, cd->coll_name);
7214 
7215  SHLIB_GET_ADDR_W_REF (cd->cp_first_contr_array, "coll_cp_first_contr_array", int *, lib_handle, cd->coll_name);
7216  }
7217 
7218  if (cd->uca_opt.sett_expansions)
7219  {
7220  assert (cd->uca_exp_num > 1);
7221 
7222  SHLIB_GET_ADDR_W_REF (cd->uca_w_l13, "coll_uca_w_l13", UCA_L13_W *, lib_handle, cd->coll_name);
7223 
7225  {
7226  SHLIB_GET_ADDR_W_REF (cd->uca_w_l4, "coll_uca_w_l4", UCA_L4_W *, lib_handle, cd->coll_name);
7227  }
7228 
7229  SHLIB_GET_ADDR_W_REF (cd->uca_num, "coll_uca_num", char *, lib_handle, cd->coll_name);
7230  }
7231  else
7232  {
7233  SHLIB_GET_ADDR_W_REF (cd->weights, "coll_weights", unsigned int *, lib_handle, cd->coll_name);
7234  }
7235 
7236  SHLIB_GET_ADDR_W_REF (cd->next_cp, "coll_next_cp", unsigned int *, lib_handle, cd->coll_name);
7237 
7238 exit:
7239  return err_status;
7240 
7241 error_loading_symbol:
7242  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
7243  sym_name, lf->lib_file, lf->locale_name);
7244  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7245 
7246  return ER_LOC_INIT;
7247 }
7248 
7249 /*
7250  * lang_locale_load_alpha_from_lib() - loads locale data from shared libray
7251  *
7252  * return: error code
7253  * a(in/out): alphabet to load
7254  * load_w_identifier_name(in): true if alphabet is to be load as "identifier"
7255  * name
7256  * lib_handle(in):
7257  * lf(in): locale file info
7258  */
7259 static int
7260 lang_locale_load_alpha_from_lib (ALPHABET_DATA * a, bool load_w_identifier_name, const char *alpha_suffix,
7261  void *lib_handle, const LOCALE_FILE * lf)
7262 {
7263  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
7264  char err_msg[ERR_MSG_SIZE + PATH_MAX];
7265  int err_status = NO_ERROR;
7266 
7267  assert (a != NULL);
7268  assert (lib_handle != NULL);
7269  assert (lf != NULL);
7270  assert (lf->locale_name != NULL);
7271 
7272  if (load_w_identifier_name)
7273  {
7274  SHLIB_GET_VAL (a->l_count, "ident_alphabet_l_count", int, lib_handle, alpha_suffix);
7275 
7276  SHLIB_GET_VAL (a->lower_multiplier, "ident_alphabet_lower_multiplier", int, lib_handle, alpha_suffix);
7277 
7278  SHLIB_GET_VAL (a->upper_multiplier, "ident_alphabet_upper_multiplier", int, lib_handle, alpha_suffix);
7279 
7280  SHLIB_GET_ADDR (a->lower_cp, "ident_alphabet_lower_cp", unsigned int *, lib_handle, alpha_suffix);
7281 
7282  SHLIB_GET_ADDR (a->upper_cp, "ident_alphabet_upper_cp", unsigned int *, lib_handle, alpha_suffix);
7283  }
7284  else
7285  {
7286  SHLIB_GET_VAL (a->l_count, "alphabet_l_count", int, lib_handle, alpha_suffix);
7287 
7288  SHLIB_GET_VAL (a->lower_multiplier, "alphabet_lower_multiplier", int, lib_handle, alpha_suffix);
7289 
7290  SHLIB_GET_VAL (a->upper_multiplier, "alphabet_upper_multiplier", int, lib_handle, alpha_suffix);
7291 
7292  SHLIB_GET_ADDR (a->lower_cp, "alphabet_lower_cp", unsigned int *, lib_handle, alpha_suffix);
7293 
7294  SHLIB_GET_ADDR (a->upper_cp, "alphabet_upper_cp", unsigned int *, lib_handle, alpha_suffix);
7295  }
7296 
7297  return err_status;
7298 
7299 error_loading_symbol:
7300  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
7301  sym_name, lf->lib_file, lf->locale_name);
7302  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7303 
7304  return ER_LOC_INIT;
7305 }
7306 
7307 /*
7308  * lang_load_library - loads the locale specific DLL/so
7309  * Returns : error code - ER_LOC_INIT if library load fails
7310  * - NO_ERROR if success
7311  * lib_file(in) : path to library
7312  * handle(out) : handle to the loaded library
7313  */
7314 int
7315 lang_load_library (const char *lib_file, void **handle)
7316 {
7317  int err_status = NO_ERROR;
7318  char err_msg[ERR_MSG_SIZE];
7319 #if defined(WINDOWS)
7320  DWORD loading_err;
7321  char *lpMsgBuf;
7322  UINT error_mode = 0;
7323 #else
7324  char *error;
7325 #endif
7326 
7327  assert (lib_file != NULL);
7328 
7329 #if defined(WINDOWS)
7330  error_mode = SetErrorMode (SEM_NOOPENFILEERRORBOX | SEM_FAILCRITICALERRORS);
7331  *handle = LoadLibrary (lib_file);
7332  SetErrorMode (error_mode);
7333  loading_err = GetLastError ();
7334 #else
7335  dlerror (); /* Clear any existing error */
7336  *handle = dlopen (lib_file, RTLD_NOW);
7337 #endif
7338 
7339  if (*handle == NULL)
7340  {
7341  err_status = ER_LOC_INIT;
7342 #if defined(WINDOWS)
7343  FormatMessage (FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ARGUMENT_ARRAY, NULL,
7344  loading_err, MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT), (char *) &lpMsgBuf, 1,
7345  (va_list *) & lib_file);
7346  snprintf (err_msg, sizeof (err_msg) - 1,
7347  "Library file is invalid or not accessible.\n" " Unable to load %s !\n %s", lib_file, lpMsgBuf);
7348  LocalFree (lpMsgBuf);
7349 #else
7350  error = dlerror ();
7351  snprintf (err_msg, sizeof (err_msg) - 1,
7352  "Library file is invalid or not accessible.\n" " Unable to load %s !\n %s", lib_file, error);
7353 #endif
7354  LOG_LOCALE_ERROR (err_msg, err_status, false);
7355  }
7356 
7357  return err_status;
7358 }
7359 
7360 /*
7361  * lang_unload_libraries - unloads the loaded locale libraries (DLL/so)
7362  * and frees additional data.
7363  */
7364 static void
7366 {
7367  int i;
7368 
7369  for (i = 0; i < loclib_Handle_count; i++)
7370  {
7371  assert (loclib_Handle[i] != NULL);
7372 #if defined(WINDOWS)
7373  FreeLibrary ((HMODULE) loclib_Handle[i]);
7374 #else
7375  dlclose (loclib_Handle[i]);
7376 #endif
7377  loclib_Handle[i] = NULL;
7378  }
7379  free (loclib_Handle);
7380  loclib_Handle = NULL;
7381  loclib_Handle_count = 0;
7382 }
7383 
7384 /*
7385  * destroy_user_locales - frees the memory holding the locales already loaded
7386  * from the locale libraries (DLL/so)
7387  */
7388 static void
7390 {
7391  int i;
7392 
7393  for (i = 0; i < lang_Count_locales; i++)
7394  {
7395  assert (lang_Loaded_locales[i] != NULL);
7396 
7397  free_lang_locale_data (lang_Loaded_locales[i]);
7398  lang_Loaded_locales[i] = NULL;
7399  }
7400 
7401  lang_Count_locales = 0;
7402 }
7403 
7404 /*
7405  * lang_locale_load_normalization_from_lib - loads normalization data from
7406  * the locale library
7407  */
7408 static int
7410 {
7411  char sym_name[LOC_LIB_SYMBOL_NAME_SIZE + 1];
7412  char err_msg[ERR_MSG_SIZE + PATH_MAX];
7413 
7414  assert (norm != NULL);
7415 
7416  memset (norm, 0, sizeof (UNICODE_NORMALIZATION));
7417 
7418  SHLIB_GET_ADDR (norm->unicode_mappings, "unicode_mappings", UNICODE_MAPPING *, lib_handle,
7420  SHLIB_GET_VAL (norm->unicode_mappings_count, "unicode_mappings_count", int, lib_handle,
7422  SHLIB_GET_ADDR (norm->unicode_mapping_index, "unicode_mapping_index", int *, lib_handle,
7424  SHLIB_GET_ADDR (norm->list_full_decomp, "list_full_decomp", int *, lib_handle, UNICODE_NORMALIZATION_DECORATOR);
7425 
7426  return NO_ERROR;
7427 
7428 error_loading_symbol:
7429  snprintf (err_msg, sizeof (err_msg) - 1, "Cannot load symbol %s from the library file %s " "for the %s locale!",
7430  sym_name, lf->lib_file, lf->locale_name);
7431  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7432 
7433  return ER_LOC_INIT;
7434 }
7435 
7436 /*
7437  * lang_get_generic_unicode_norm - gets the global unicode
7438  * normalization structure
7439  * Returns:
7440  */
7443 {
7444  return generic_Unicode_norm;
7445 }
7446 
7447 /*
7448  * lang_set_generic_unicode_norm - sets the global unicode
7449  * normalization structure
7450  */
7451 void
7453 {
7454  generic_Unicode_norm = norm;
7455 }
7456 
7457 /*
7458  * lang_free_collations - frees all collation data
7459  */
7460 static void
7462 {
7463  int i;
7464 
7465  if (lang_Count_collations <= 0)
7466  {
7467  return;
7468  }
7469  for (i = 0; i < LANG_MAX_COLLATIONS; i++)
7470  {
7471  assert (lang_Collations[i] != NULL);
7472  if (!(lang_Collations[i]->built_in))
7473  {
7474  free (lang_Collations[i]);
7475  }
7476  lang_Collations[i] = NULL;
7477  }
7478 
7480 }
7481 
7482 /*
7483  * lang_check_coll_compat - checks compatibility of current collations (of
7484  * running process) with a reference set of
7485  * collations
7486  * Returns : error code
7487  * coll_array(in): reference collations
7488  * coll_cnt(in):
7489  * client_text(in): text to display in message error for client (this can be
7490  * "server" when checking server vs database)
7491  * server_text(in): text to display in message error for server (this can be
7492  * "database" when checking server vs database)
7493  */
7494 int
7495 lang_check_coll_compat (const LANG_COLL_COMPAT * coll_array, const int coll_cnt, const char *client_text,
7496  const char *server_text)
7497 {
7498  char err_msg[ERR_MSG_SIZE];
7499  int i;
7500  int er_status = NO_ERROR;
7501 
7502  assert (coll_array != NULL);
7503  assert (coll_cnt > 0);
7504  assert (client_text != NULL);
7505  assert (server_text != NULL);
7506 
7507  if (lang_Count_collations != coll_cnt)
7508  {
7509  snprintf (err_msg, sizeof (err_msg) - 1,
7510  "Number of collations do not match : " "%s has %d collations, %s has %d collations", client_text,
7511  lang_Count_collations, server_text, coll_cnt);
7512  er_status = ER_LOC_INIT;
7513  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7514  goto exit;
7515  }
7516 
7517  for (i = 0; i < coll_cnt; i++)
7518  {
7519  const LANG_COLL_COMPAT *ref_c;
7520  LANG_COLLATION *lc;
7521 
7522  ref_c = &(coll_array[i]);
7523 
7524  assert (ref_c->coll_id >= 0 && ref_c->coll_id < LANG_MAX_COLLATIONS);
7525  /* collation id is valid, check if same collation */
7526  lc = lang_get_collation (ref_c->coll_id);
7527 
7528  if (lc->coll.coll_id != ref_c->coll_id)
7529  {
7530  snprintf (err_msg, sizeof (err_msg) - 1,
7531  "Collation '%s' with id %d from %s not found with the " "same id on %s", ref_c->coll_name,
7532  ref_c->coll_id, server_text, client_text);
7533  er_status = ER_LOC_INIT;
7534  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7535  goto exit;
7536  }
7537 
7538  if (strcmp (lc->coll.coll_name, ref_c->coll_name))
7539  {
7540  snprintf (err_msg, sizeof (err_msg) - 1,
7541  "Names of collation with id %d do not match : " "on %s, is '%s'; on %s, is '%s'", ref_c->coll_id,
7542  client_text, ref_c->coll_name, server_text, lc->coll.coll_name);
7543  er_status = ER_LOC_INIT;
7544  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7545  goto exit;
7546  }
7547 
7548  if (lc->codeset != ref_c->codeset)
7549  {
7550  snprintf (err_msg, sizeof (err_msg) - 1,
7551  "Codesets of collation '%s' with id %d do not match : "
7552  "on %s, codeset is %d; on %s, codeset is %d", ref_c->coll_name, ref_c->coll_id, client_text,
7553  ref_c->codeset, server_text, lc->codeset);
7554  er_status = ER_LOC_INIT;
7555  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7556  goto exit;
7557  }
7558 
7559  if (strcasecmp (lc->coll.checksum, ref_c->checksum))
7560  {
7561  snprintf (err_msg, sizeof (err_msg) - 1,
7562  "Collation '%s' with id %d has changed : " "on %s, checksum is '%s'; on %s, checksum is '%s'",
7563  ref_c->coll_name, ref_c->coll_id, client_text, ref_c->checksum, server_text, lc->coll.checksum);
7564  er_status = ER_LOC_INIT;
7565  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7566  goto exit;
7567  }
7568  }
7569 exit:
7570  return er_status;
7571 }
7572 
7573 /*
7574  * lang_check_locale_compat - checks compatibility of current locales (of
7575  * running process) with a reference set of
7576  * locales
7577  * Returns : error code
7578  * loc_array(in): reference locales
7579  * loc_cnt(in):
7580  * client_text(in): text to display in message error for client
7581  * server_text(in): text to display in message error for server
7582  */
7583 int
7584 lang_check_locale_compat (const LANG_LOCALE_COMPAT * loc_array, const int loc_cnt, const char *client_text,
7585  const char *server_text)
7586 {
7587  char err_msg[ERR_MSG_SIZE];
7588  int i, j;
7589  int er_status = NO_ERROR;
7590 
7591  assert (loc_array != NULL);
7592  assert (loc_cnt > 0);
7593 
7594  /* check that each locale from client is defined by server */
7595  for (i = 0; i < lang_Count_locales; i++)
7596  {
7597  LANG_LOCALE_DATA *lld = lang_Loaded_locales[i];
7598  const LANG_LOCALE_COMPAT *ref_loc = NULL;
7599 
7600  do
7601  {
7602  bool ref_found = false;
7603 
7604  for (j = 0; j < loc_cnt; j++)
7605  {
7606  ref_loc = &(loc_array[j]);
7607 
7608  if (lld->codeset == ref_loc->codeset && strcasecmp (lld->lang_name, ref_loc->lang_name) == 0)
7609  {
7610  ref_found = true;
7611  break;
7612  }
7613  }
7614 
7615  if (!ref_found)
7616  {
7617  snprintf (err_msg, sizeof (err_msg) - 1, "Locale '%s' with codeset %d loaded by %s " "not found on %s",
7618  lld->lang_name, lld->codeset, client_text, server_text);
7619  er_status = ER_LOC_INIT;
7620  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7621  goto exit;
7622  }
7623 
7624  assert (ref_found);
7625 
7626  if (strcasecmp (ref_loc->checksum, lld->checksum))
7627  {
7628  snprintf (err_msg, sizeof (err_msg) - 1,
7629  "Locale '%s' with codeset %d has changed : " "on %s, checksum is '%s'; on %s, checksum is '%s'",
7630  ref_loc->lang_name, ref_loc->codeset, server_text, ref_loc->checksum, client_text,
7631  lld->checksum);
7632  er_status = ER_LOC_INIT;
7633  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7634  goto exit;
7635  }
7636  lld = lld->next_lld;
7637 
7638  }
7639  while (lld != NULL);
7640  }
7641 
7642  /* check that each locale from server is loaded by client */
7643  for (j = 0; j < loc_cnt; j++)
7644  {
7645  bool loc_found = false;
7646  const LANG_LOCALE_COMPAT *ref_loc = NULL;
7647  LANG_LOCALE_DATA *lld = NULL;
7648 
7649  ref_loc = &(loc_array[j]);
7650 
7651  for (i = 0; i < lang_Count_locales && !loc_found; i++)
7652  {
7653  lld = lang_Loaded_locales[i];
7654 
7655  do
7656  {
7657  if (lld->codeset == ref_loc->codeset && strcasecmp (lld->lang_name, ref_loc->lang_name) == 0)
7658  {
7659  loc_found = true;
7660  break;
7661  }
7662  lld = lld->next_lld;
7663  }
7664  while (lld != NULL);
7665  }
7666 
7667  if (!loc_found)
7668  {
7669  snprintf (err_msg, sizeof (err_msg) - 1, "Locale '%s' with codeset %d defined on %s " "is not loaded by %s",
7670  ref_loc->lang_name, ref_loc->codeset, server_text, client_text);
7671  er_status = ER_LOC_INIT;
7672  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7673  goto exit;
7674  }
7675 
7676  assert (loc_found && lld != NULL);
7677 
7678  if (strcasecmp (ref_loc->checksum, lld->checksum))
7679  {
7680  snprintf (err_msg, sizeof (err_msg) - 1,
7681  "Locale '%s' with codeset %d has changed : " "on %s, checksum is '%s'; on %s, checksum is '%s'",
7682  ref_loc->lang_name, ref_loc->codeset, server_text, ref_loc->checksum, client_text, lld->checksum);
7683  er_status = ER_LOC_INIT;
7684  LOG_LOCALE_ERROR (err_msg, ER_LOC_INIT, false);
7685  goto exit;
7686  }
7687  }
7688 
7689 exit:
7690  return er_status;
7691 }
7692 
7693 #undef EUC_SPACE
7694 #undef ASCII_SPACE
7695 
7696 #undef SPACE
7697 #undef PAD
7698 #undef ZERO
int lang_set_charset(const INTL_CODESET codeset)
void(* init_coll)(LANG_COLLATION *lang_coll)
COLL_MATCH_CONTR
const char * month_short_parse_order
static unsigned int lang_Weight_EN_ci[LANG_CHAR_COUNT_EN]
static TEXT_CONVERSION * console_Conv
#define LANG_MAX_COLLATIONS
#define INTL_IS_NEXT_CONTR(v)
static void lang_free_collations(void)
#define CAL_AM_PM_COUNT
const char * charset_cubrid_name
const char * month_name[CAL_MONTH_COUNT]
int lang_set_language(const char *lang_str)
void * handle
int(* split_key)(const LANG_COLLATION *lang_coll, const bool is_desc, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size, bool ignore_trailing_space)
static bool lang_Builtin_initialized
#define NO_ERROR
Definition: error_code.h:46
static bool lang_Parser_use_client_charset
#define AU_DISABLE(save)
Definition: authenticate.h:106
const char * lang_currency_symbol(DB_CURRENCY curr)
#define LANG_SYS_COLLATION
static int lang_back_strmatch_utf8_uca_w_level(const COLL_DATA *coll_data, bool is_match, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char *escape, const bool has_last_escape, int *offset_next_level, int *str1_match_size, bool ignore_trailing_space)
CONV_CP_TO_BYTES * utf8_to_text
static unsigned int lang_Weight_EN_cs_ti[LANG_CHAR_COUNT_EN]
const char * date_format
int lang_init(void)
#define LANG_NAME_KOREAN
int lang_db_put_charset(void)
const char * lang_get_msg_Loc_name(void)
int lang_locale_data_load_from_lib(LANG_LOCALE_DATA *lld, void *lib_handle, const LOCALE_FILE *lf, bool is_load_for_dump)
#define LANG_NO_NORMALIZATION
char lang_name[LANG_MAX_LANGNAME]
static int lang_strmatch_utf8_w_contr(const LANG_COLLATION *lang_coll, bool is_match, const unsigned char *str1, int size1, const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
int intl_toupper_iso8859(unsigned char *s, int length)
Definition: intl_support.c:747
#define LANG_COLL_GENERIC_SORT_OPT
static LANG_LOCALE_DATA * lang_Loaded_locales[LANG_MAX_LOADED_LOCALES]
static LANG_COLLATION coll_Utf8_ko_cs
int lang_load_coll_from_lib(COLL_DATA *cd, void *lib_handle, const LOCALE_FILE *lf)
bool lang_get_parser_use_client_charset(void)
static int lang_next_alpha_char_iso88591(const LANG_COLLATION *lang_coll, const unsigned char *seq, const int size, unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
#define SHLIB_GET_ADDR(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME)
static void lang_initloc_en_utf8(LANG_LOCALE_DATA *ld)
bool sett_expansions
#define LOCALE_NULL_CALENDAR_NAMES
static bool lang_is_codeset_allowed(const INTL_LANG intl_id, const INTL_CODESET codeset)
static void lang_init_coll_Utf8_tr_cs(LANG_COLLATION *lang_coll)
bool lang_check_identifier(const char *name, int length)
MOP Au_root
Definition: authenticate.c:300
static char lang_timestamptz_format_TR[]
#define CAL_MONTH_COUNT
int(* next_coll_seq)(const LANG_COLLATION *lang_coll, const unsigned char *seq, const int size, unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
char * locale_name
int lang_get_lang_id_from_name(const char *lang_name, INTL_LANG *lang_id)
void(* init_conv_func)(void)
#define LANG_VARIABLE_CHARSET(x)
static void ** loclib_Handle
static unsigned int lang_upper_i_TR[LANG_CHAR_COUNT_TR]
DB_TYPE
Definition: dbtype_def.h:670
#define ER_FAILED
Definition: error_code.h:47
unsigned char size
int intl_text_single_byte_to_utf8(const char *in_buf, const int in_size, char **out_buf, int *out_size)
int lang_charset_name_to_id(const char *name, INTL_CODESET *codeset)
static int lang_strmatch_binary(const LANG_COLLATION *lang_coll, bool is_match, const unsigned char *str1, int size1, const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
static int lang_next_coll_byte(const LANG_COLLATION *lang_coll, const unsigned char *seq, const int size, unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
#define UCA_GET_L1_W(v)
COLL_MATCH_CONTR sett_match_contr
int lang_locales_count(bool check_codeset)
int lang_load_library(const char *lib_file, void **handle)
#define DEFAULT_COLL_OPTIONS
static int lang_split_key_byte(const LANG_COLLATION *lang_coll, const bool is_desc, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size, bool ignore_trailing_space)
int lang_check_locale_compat(const LANG_LOCALE_COMPAT *loc_array, const int loc_cnt, const char *client_text, const char *server_text)
static int lang_strmatch_utf8_uca_w_level(const COLL_DATA *coll_data, const int level, bool is_match, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char *escape, const bool has_last_escape, int *offset_next_level, int *str1_match_size, bool ignore_trailing_space)
#define LANG_CHARSET_ISO88591_ALIAS2
static int lang_fastcmp_ko(const LANG_COLLATION *lang_coll, const unsigned char *string1, int size1, const unsigned char *string2, int size2, bool ignore_trailing_space)
static void free_lang_locale_data(LANG_LOCALE_DATA *lld)
const char * lang_date_format_parse(const INTL_LANG lang_id, const INTL_CODESET codeset, const DB_TYPE type, INTL_CODESET *format_codeset)
static int lang_split_key_euckr(const LANG_COLLATION *lang_coll, const bool is_desc, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size, bool ignore_trailing_space)
UCA_L13_W uca_w_l13[MAX_UCA_EXP_CE]
int(* text_to_utf8_func)(const char *, const int, char **, int *)
#define assert_release(e)
Definition: error_manager.h:96
ALPHABET_DATA alphabet
const char * am_pm[CAL_AM_PM_COUNT]
static int check_env_lang_val(char *env_val, char *lang_name, char **charset_ptr, INTL_CODESET *codeset)
#define ZERO
INTL_LANG lang_get_lang_id_from_flag(const int flag, bool *has_user_format, bool *has_user_lang)
#define ADD_TO_HASH(pseudo_key, w)
ALPHABET_TYPE a_type
static UNICODE_NORMALIZATION * generic_Unicode_norm
static LANG_COLLATION coll_Iso_binary
#define LANG_COLL_NO_CONTR
T_LEVEL
static char lang_datetimetz_format_TR[]
const char * month_short_name[CAL_MONTH_COUNT]
static bool lang_Charset_initialized
#define LANG_COLL_NO_EXP
#define bool
Definition: dbi_compat.h:31
#define SPACE
unsigned int * weights_ti
static int set_current_locale(void)
static LANG_LOCALE_DATA lc_Turkish_iso88591
static int lang_fastcmp_binary(const LANG_COLLATION *lang_coll, const unsigned char *string1, const int size1, const unsigned char *string2, const int size2, bool ignore_trailing_space)
char lang_digit_fractional_symbol(const INTL_LANG lang_id)
static bool lang_Initialized
static int lang_strmatch_utf8_uca(const LANG_COLLATION *lang_coll, bool is_match, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
const char * datetime_format
static int loclib_Handle_count
int intl_cp_to_utf8(const unsigned int codepoint, unsigned char *utf8_seq)
const unsigned char * intl_nextchar_euc(const unsigned char *s, int *curr_char_length)
Definition: intl_support.c:777
#define LANG_CHARSET_EUCKR_ALIAS1
static unsigned int lang_lower_TR[LANG_CHAR_COUNT_TR]
#define LOC_LIB_SYMBOL_NAME_SIZE
static void set_default_lang(void)
void db_date_locale_init(void)
Definition: db_date.c:1531
#define GET_UCA_WEIGHT(l, i, l13w, l4w)
const char * timestamptz_format
static int init_user_locales(void)
LANG_LOCALE_DATA * next_lld
const INTL_LANG lang
UCA_OPTIONS uca_opt
TEXT_CONV_TYPE
static int lang_strmatch_ko(const LANG_COLLATION *lang_coll, bool is_match, const unsigned char *str1, int size1, const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
static unsigned int lang_Weight_EN_cs[LANG_CHAR_COUNT_EN]
const LANG_LOCALE_DATA * lang_locale(void)
static unsigned int lang_mht2str_byte(const LANG_COLLATION *lang_coll, const unsigned char *str, const int size)
#define LOCALE_NULL_DATE_FORMATS
INTL_CODESET lang_Loc_charset
static INTL_LANG lang_Lang_id
static unsigned int lang_mht2str_ko(const LANG_COLLATION *lang_coll, const unsigned char *str, const int size)
const char * lang_get_collation_name(const int coll_id)
TEXT_CONV_TYPE conv_type
void lang_init_builtin(void)
static int lang_split_key_iso(const LANG_COLLATION *lang_coll, const bool is_desc, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size, bool ignore_trailing_space)
const char * charset_name
#define COLL_NAME_SIZE
int db_make_string(DB_VALUE *value, DB_CONST_C_CHAR str)
#define ASCII_SPACE
#define LANG_NAME_DEFAULT
unsigned char uca_num
unsigned int intl_back_utf8_to_cp(const unsigned char *utf8_start, const unsigned char *utf8_last, unsigned char **last_byte__prev_char)
INTL_CODESET lang_charset(void)
DB_CURRENCY
Definition: dbtype_def.h:799
const LANG_LOCALE_DATA * lang_get_first_locale_for_lang(const INTL_LANG lang)
LANG_COLLATION * lang_Collations[LANG_MAX_COLLATIONS]
static int lang_str_utf8_trail_zero_weights(const LANG_COLLATION *lang_coll, const unsigned char *str, int size)
static UCA_L4_W uca_l4_max_weight
UNICODE_MAPPING * unicode_mappings
void er_set(int severity, const char *file_name, const int line_no, int err_id, int num_args,...)
static bool lang_Language_initialized
static unsigned int lang_Next_alpha_char_EN_ci[LANG_CHAR_COUNT_EN]
bool allow_like_rewrite
INTL_CODESET codeset
LANG_COLLATION * lang_get_collation(const int coll_id)
CONV_CP_TO_BYTES * text_to_utf8
static unsigned int lang_Next_alpha_char_TR_ti[LANG_CHAR_COUNT_TR]
static LANG_COLLATION coll_Iso88591_en_ci
static LANG_COLLATION * built_In_collations[]
int intl_text_utf8_to_dbcs(const char *in_buf, const int in_size, char **out_buf, int *out_size)
ALPHABET_DATA ident_alphabet
int intl_text_utf8_to_single_byte(const char *in_buf, const int in_size, char **out_buf, int *out_size)
static int lang_w_map_EN[LANG_W_MAP_COUNT_EN]
int lang_strmatch_utf8_uca_w_coll_data(const COLL_DATA *coll_data, bool is_match, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
#define assert(x)
char lang_digit_grouping_symbol(const INTL_LANG lang_id)
static int lang_strcmp_utf8_uca(const LANG_COLLATION *lang_coll, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, bool ignore_trailing_space)
static void lang_init_common_en_cs(COLL_DATA *coll_data)
unsigned int text_first_cp
#define ER_OUT_OF_VIRTUAL_MEMORY
Definition: error_code.h:50
#define UCA_GET_L2_W(v)
#define INTL_CONTR_FOUND(v)
static int lang_split_key_binary(const LANG_COLLATION *lang_coll, const bool is_desc, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size, bool ignore_trailing_space)
static void lang_initloc_tr_iso(LANG_LOCALE_DATA *ld)
LANG_COLLATION * lang_get_collation_by_name(const char *coll_name)
#define LOCALE_DUMMY_ALPHABET(codeset)
void lang_set_parser_use_client_charset(bool use)
#define LANG_MAX_LOADED_LOCALES
#define LANG_MAX_BUILTIN_COLLATIONS
static unsigned int lang_mht2str_utf8(const LANG_COLLATION *lang_coll, const unsigned char *str, const int size)
static LANG_LOCALE_DATA * find_lang_locale_data(const char *name, const INTL_CODESET codeset, LANG_LOCALE_DATA **last_lang_locale)
const char * timestamp_format
const char * day_parse_order
static unsigned int lang_Next_alpha_char_TR[LANG_CHAR_COUNT_TR]
static LANG_COLLATION coll_Euckr_bin
INTL_CODESET lang_charset_cubrid_name_to_id(const char *name)
UCA_L4_W uca_w_l4[MAX_UCA_EXP_CE]
static LANG_COLLATION coll_Iso88591_en_cs
#define LANG_CHARSET_EUCKR
unsigned int utf8_first_cp
#define SHLIB_GET_VAL(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME)
static LANG_COLLATION coll_Utf8_binary
const char * day_short_parse_order
const char * am_pm_parse_order
int locale_get_cfg_locales(LOCALE_FILE **p_locale_files, int *p_num_locales, bool is_lang_init)
int lang_collation_count(void)
#define LANG_CHARSET_UTF8_ALIAS1
#define CAL_DAY_COUNT
const char * time_format
static LANG_COLLATION coll_Utf8_en_ci
static void lang_initloc_tr_utf8(LANG_LOCALE_DATA *ld)
unsigned int cp_first_contr_count
TEXT_CONVERSION con_Iso_8859_9_conv
Definition: intl_support.c:116
void lang_final(void)
static int lang_locale_load_normalization_from_lib(UNICODE_NORMALIZATION *norm, void *lib_handle, const LOCALE_FILE *lf)
UNICODE_NORMALIZATION * lang_get_generic_unicode_norm(void)
#define NULL
Definition: freelistheap.h:34
static void lang_init_coll_en_ci(LANG_COLLATION *lang_coll)
#define strncpy_bufsize(buf, str)
Definition: porting.h:340
static char lang_date_format_TR[]
char checksum[32+1]
unsigned int intl_utf8_to_cp(const unsigned char *utf8, const int size, unsigned char **next_char)
static int lang_Count_locales
void lang_init_console_txt_conv(void)
static int register_lang_locale_data(LANG_LOCALE_DATA *lld)
unsigned int * lower_cp
static LANG_COLLATION coll_Utf8_en_cs
#define ER_LOC_INIT
Definition: error_code.h:1370
bool allow_prefix_index
#define LANG_CHARSET_UTF8
ALPHABET_TYPE
const char * day_name[CAL_DAY_COUNT]
static LANG_LOCALE_DATA lc_Turkish_utf8
static unsigned int lang_Next_alpha_char_EN_cs[LANG_CHAR_COUNT_EN]
static int lang_fastcmp_byte(const LANG_COLLATION *lang_coll, const unsigned char *string1, const int size1, const unsigned char *string2, const int size2, bool ignore_trailing_space)
int * cp_first_contr_array
unsigned int utf8_last_cp
static int lang_next_coll_seq_utf8_w_contr(const LANG_COLLATION *lang_coll, const unsigned char *seq, const int size, unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
static int lang_next_coll_char_utf8(const LANG_COLLATION *lang_coll, const unsigned char *seq, const int size, unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
static unsigned int lang_Weight_EN_ci_ti[LANG_CHAR_COUNT_EN]
static int lang_str_utf8_trail_zero_weights_w_exp(const COLL_DATA *coll_data, const int level, const unsigned char *str, int size)
int intl_char_size(const unsigned char *src, int length_in_chars, INTL_CODESET src_codeset, int *byte_count)
static void lang_initloc_ko_utf8(LANG_LOCALE_DATA *ld)
static int lang_strcmp_utf8_w_contr(const LANG_COLLATION *lang_coll, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, bool ignore_trailing_space)
int db_put_internal(DB_OBJECT *obj, const char *name, DB_VALUE *value)
Definition: db_obj.c:347
INTL_LANG lang_id(void)
static void lang_initloc_ko_iso(LANG_LOCALE_DATA *ld)
const char * month_parse_order
static int set_msg_lang_from_env(void)
int count(int &result, const cub_regex_object &reg, const std::string &src, const int position, const INTL_CODESET codeset)
#define cmp
Definition: mprec.h:351
int pr_clear_value(DB_VALUE *value)
const LANG_LOCALE_DATA * lang_get_specific_locale(const INTL_LANG lang, const INTL_CODESET codeset)
static LANG_LOCALE_DATA lc_English_utf8
#define UNICODE_NORMALIZATION_DECORATOR
INTL_CODESET charset_id
DB_CURRENCY lang_currency()
static char lang_datetime_format_TR[]
#define LANG_CHAR_COUNT_EN
const char * lang_charset_cubrid_name(const INTL_CODESET codeset)
void(* initloc)(LANG_LOCALE_DATA *ld)
char c_buf[LOC_MAX_UCA_CHARS_SEQ *INTL_UTF8_MAX_CHAR_SIZE]
const char * lang_name
static void destroy_user_locales(void)
UNICODE_NORMALIZATION unicode_norm
#define EUC_SPACE
void tp_apply_sys_charset(void)
static unsigned int lang_upper_EN[LANG_CHAR_COUNT_EN]
#define CAST_BUFLEN
Definition: porting.h:471
const char * envvar_get(const char *name)
static UCA_L13_W uca_l13_max_weight
static COLL_CONTRACTION * lang_get_contr_for_string(const COLL_DATA *coll_data, const unsigned char *str, const int str_size, unsigned int cp)
static unsigned int lang_Weight_TR_ti[LANG_CHAR_COUNT_TR]
static void error(const char *msg)
Definition: gencat.c:331
int lang_set_charset_lang(const char *lang_charset)
static int lang_split_key_utf8(const LANG_COLLATION *lang_coll, const bool is_desc, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size, bool ignore_trailing_space)
static LANG_LOCALE_DATA lc_Korean_iso88591
unsigned char byte_flag[256]
static unsigned int lang_lower_i_TR[LANG_CHAR_COUNT_TR]
#define LANG_CHARSET_ISO88591
static int lang_locale_load_alpha_from_lib(ALPHABET_DATA *a, bool load_w_identifier_name, const char *alpha_suffix, void *lib_handle, const LOCALE_FILE *lf)
static DB_CURRENCY lang_Loc_currency
static int lang_next_alpha_char_ko(const LANG_COLLATION *lang_coll, const unsigned char *seq, const int size, unsigned char *next_seq, int *len_next, bool ignore_trailing_space)
static LANG_COLLATION coll_Binary
static LANG_COLLATION coll_Utf8_tr_cs
static unsigned int lang_Next_alpha_char_EN_cs_ti[LANG_CHAR_COUNT_EN]
static int lang_get_builtin_lang_id_from_name(const char *lang_name, INTL_LANG *lang_id)
static int loclib_Handle_size
static char lang_timestamp_format_TR[]
COLL_CONTRACTION * contr_list
#define ARG_FILE_LINE
Definition: error_manager.h:44
int lang_get_charset_env_string(char *buf, int buf_size, const char *lang_name, const INTL_CODESET codeset)
int(* strmatch)(const LANG_COLLATION *lang_coll, bool is_match, const unsigned char *string1, int size1, const unsigned char *string2, int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
static int register_collation(LANG_COLLATION *coll)
#define snprintf_dots_truncate(dest, max_len,...)
Definition: porting.h:323
unsigned int INTL_LANG
Definition: intl_support.h:132
const char * lang_get_codeset_name(int codeset_id)
static void lang_initloc_ko_euc(LANG_LOCALE_DATA *ld)
UCA_L13_W * uca_w_l13
static LANG_LOCALE_DATA lc_Korean_utf8
#define AU_ENABLE(save)
Definition: authenticate.h:113
TEXT_CONVERSION * txt_conv
static LANG_LOCALE_DATA * lang_Loc_data
UCA_L4_W * uca_w_l4
static int lang_strmatch_utf8(const LANG_COLLATION *lang_coll, bool is_match, const unsigned char *str1, int size1, const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
#define INTL_MASK_CONTR
void init_builtin_calendar_names(LANG_LOCALE_DATA *lld)
#define LANG_NAME_ENGLISH
bool allow_index_opt
static char lang_Lang_name[LANG_MAX_LANGNAME]
#define free_and_init(ptr)
Definition: memory_alloc.h:147
#define strlen(s1)
Definition: intl_support.c:43
static unsigned int lang_get_w_first_el(const COLL_DATA *coll, const unsigned char *str, const int str_size, unsigned char **next_char, bool ignore_trailing_space)
LANG_COLLATION * default_lang_coll
unsigned int(* mht2str)(const LANG_COLLATION *lang_coll, const unsigned char *str, const int size)
char * prm_get_string_value(PARAM_ID prm_id)
TEXT_CONVERSION con_Iso_8859_1_conv
Definition: intl_support.c:128
INTL_CODESET codeset
LANG_LOCALE_DATA * default_lang
#define LANG_CHARSET_ISO88591_ALIAS1
const char * lang_get_lang_name_from_id(const INTL_LANG lang_id)
char * intl_get_money_symbol_console(const DB_CURRENCY currency)
unsigned int * next_cp
static LANG_LOCALE_DATA lc_Korean_euckr
enum intl_codeset INTL_CODESET
Definition: intl_support.h:190
int lang_set_flag_from_lang(const char *lang_str, bool has_user_format, bool has_user_lang, int *flag)
const ALPHABET_DATA * lang_user_alphabet_w_coll(const int collation_id)
static const DB_CHARSET lang_Db_charsets[]
int lang_set_flag_from_lang_id(const INTL_LANG lang, bool has_user_format, bool has_user_lang, int *flag)
static int lang_strmatch_byte(const LANG_COLLATION *lang_coll, bool is_match, const unsigned char *str1, int size1, const unsigned char *str2, int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
INTL_CODESET codeset
static void lang_get_uca_w_l13(const COLL_DATA *coll_data, const bool use_contractions, const unsigned char *str, const int size, UCA_L13_W **uca_w_l13, int *num_ce, unsigned char **str_next, unsigned int *cp_out)
const char * datetimetz_format
int intl_cmp_char(const unsigned char *s1, const unsigned char *s2, INTL_CODESET codeset, int *char_size)
int char_isalnum(int c)
Definition: chartype.c:97
static void lang_get_uca_w_l4(const COLL_DATA *coll_data, const bool use_contractions, const unsigned char *str, const int size, UCA_L4_W **uca_w_l4, int *num_ce, unsigned char **str_next, unsigned int *cp_out)
const char * lang_name
static unsigned int lang_Weight_TR[LANG_CHAR_COUNT_TR]
T_LEVEL sett_strength
const char * introducer
int i
Definition: dynamic_load.c:954
static int lang_split_key_w_exp(const LANG_COLLATION *lang_coll, const bool is_desc, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size, bool ignore_trailing_space)
#define LOG_LOCALE_ERROR(msg, er_status, do_print)
while(1)
Definition: cnvlex.c:816
unsigned int UCA_L13_W
static char lang_time_format_TR[]
static bool lang_Init_w_error
for(p=libs;*p;p++)
Definition: dynamic_load.c:968
static char lang_Loc_name[LANG_MAX_LANGNAME]
unsigned short int UCA_L4_W
INTL_CODESET lang_get_client_charset(void)
static void lang_get_uca_back_weight_l13(const COLL_DATA *coll_data, const bool use_contractions, const unsigned char *str_start, const unsigned char *str_last, UCA_L13_W **uca_w_l13, int *num_ce, unsigned char **str_prev, unsigned int *cp_out)
#define LANG_W_MAP_COUNT_EN
const INTL_CODESET codeset
static unsigned int lang_upper_TR[LANG_CHAR_COUNT_TR]
#define LANG_NAME_TURKISH
int intl_text_dbcs_to_utf8(const char *in_buf, const int in_size, char **out_buf, int *out_size)
const char * lang_charset_introducer(const INTL_CODESET codeset)
char coll_name[COLL_NAME_SIZE]
static INTL_CODESET lang_get_default_codeset(const INTL_LANG intl_id)
int(* utf8_to_text_func)(const char *, const int, char **, int *)
static void lang_init_coll_en_cs(LANG_COLLATION *lang_coll)
unsigned int mht_2str_pseudo_key(const void *key, int key_size)
Definition: memory_hash.c:175
int db_make_int(DB_VALUE *value, const int num)
static LANG_LOCALE_DATA lc_English_binary
static unsigned int lang_mht2str_utf8_exp(const LANG_COLLATION *lang_coll, const unsigned char *str, const int size)
int(* fastcmp)(const LANG_COLLATION *lang_coll, const unsigned char *string1, const int size1, const unsigned char *string2, const int size2, bool ignore_trailing_space)
static bool lang_Msg_env_initialized
static void lang_initloc_en_binary(LANG_LOCALE_DATA *ld)
TEXT_CONVERSION * lang_get_txt_conv(void)
int lang_check_coll_compat(const LANG_COLL_COMPAT *coll_array, const int coll_cnt, const char *client_text, const char *server_text)
void lang_set_generic_unicode_norm(UNICODE_NORMALIZATION *norm)
#define LANG_SYS_CODESET
int lang_load_count_coll_from_lib(int *count_coll, void *lib_handle, const LOCALE_FILE *lf)
unsigned int text_last_cp
int intl_tolower_iso8859(unsigned char *s, int length)
Definition: intl_support.c:721
#define LANG_MAX_LANGNAME
static void lang_unload_libraries(void)
unsigned int * next_cp_ti
unsigned int * weights
const char * lang_charset_name(const INTL_CODESET codeset)
DB_CURRENCY lang_locale_currency(const char *locale_str)
DB_CURRENCY default_currency_code
static unsigned int lang_mht2str_default(const LANG_COLLATION *lang_coll, const unsigned char *str, const int size)
static int lang_Count_collations
#define INTL_GET_NEXT_CONTR_ID(v)
const char * lang_get_Lang_name(void)
#define CI_COLL_OPTIONS
static int lang_strcmp_utf8(const LANG_COLLATION *lang_coll, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, bool ignore_trailing_space)
static char lang_Msg_loc_name[LANG_MAX_LANGNAME]
static void lang_initloc_en_iso88591(LANG_LOCALE_DATA *ld)
static unsigned int lang_lower_EN[LANG_CHAR_COUNT_EN]
char coll_name[COLL_NAME_SIZE]
bool lang_is_coll_name_allowed(const char *name)
void envvar_trim_char(char *env_val, const int c)
static LANG_LOCALE_DATA lc_English_iso88591
const char ** p
Definition: dynamic_load.c:945
int locale_check_and_set_default_files(LOCALE_FILE *lf, bool is_lang_init)
int contr_min_size
LANG_DEFAULTS builtin_Langs[]
#define SHLIB_GET_ADDR_W_REF(v, SYM_NAME, SYM_TYPE, lh, LOC_NAME)
char * uca_num
#define ERR_MSG_SIZE
static unsigned int lang_Next_alpha_char_EN_ci_ti[LANG_CHAR_COUNT_EN]
int lang_get_client_collation(void)
int char_isalpha(int c)
Definition: chartype.c:61
unsigned int cp_first_contr_offset
const char * day_short_name[CAL_DAY_COUNT]
const unsigned char *const intl_Len_utf8_char
unsigned int * upper_cp
#define LANG_CHAR_COUNT_TR
static void lang_init_common_en_ci(COLL_DATA *coll_data)
int lang_load_get_coll_name_from_lib(const int coll_pos, char **coll_name, void *lib_handle, const LOCALE_FILE *lf)