CUBRID Engine  latest
language_support.h
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 
20 /*
21  * language_support.h : Multi-language and character set support
22  *
23  */
24 
25 #ifndef _LANGUAGE_SUPPORT_H_
26 #define _LANGUAGE_SUPPORT_H_
27 
28 #ident "$Id$"
29 
30 #include <stddef.h>
31 
32 #include "intl_support.h"
33 #include "locale_support.h"
34 
35 /*
36  * currently built-in language names.
37  */
38 #define LANG_NAME_KOREAN "ko_KR"
39 #define LANG_NAME_ENGLISH "en_US"
40 #define LANG_NAME_TURKISH "tr_TR"
41 #define LANG_CHARSET_UTF8 "utf8"
42 #define LANG_CHARSET_UTF8_ALIAS1 "utf-8"
43 #define LANG_CHARSET_EUCKR "euckr"
44 #define LANG_CHARSET_EUCKR_ALIAS1 "euc-kr"
45 #define LANG_CHARSET_ISO88591 "iso88591"
46 #define LANG_CHARSET_ISO88591_ALIAS1 "iso-88591"
47 #define LANG_CHARSET_ISO88591_ALIAS2 "iso-8859-1"
48 #define LANG_NAME_DEFAULT LANG_NAME_ENGLISH
49 #define LANG_CHARSET_DEFAULT LANG_NAME_ENGLISH "." LANG_CHARSET_ISO88591
50 
51 #define LANG_MAX_COLLATIONS 256
52 #define LANG_MAX_BUILTIN_COLLATIONS 32
53 #define LANG_MAX_LOADED_LOCALES 32
54 
55 #define LANG_COERCIBLE_COLL LANG_SYS_COLLATION
56 #define LANG_COERCIBLE_CODESET LANG_SYS_CODESET
57 
58 #define LANG_IS_COERCIBLE_COLL(c) \
59  ((c) == LANG_COLL_ISO_BINARY || (c) == LANG_COLL_UTF8_BINARY \
60  || (c) == LANG_COLL_EUCKR_BINARY)
61 
62 /* common collation to be used at runtime */
63 #define LANG_RT_COMMON_COLL(c1, c2, coll) \
64  do { \
65  coll = -1; \
66  if ((c1) == (c2)) \
67  { \
68  coll = (c1); \
69  } \
70  else if (LANG_IS_COERCIBLE_COLL (c1)) \
71  { \
72  if (!LANG_IS_COERCIBLE_COLL (c2)) \
73  { \
74  coll = (c2); \
75  } \
76  else \
77  { \
78  if ((c2) == LANG_COLL_ISO_BINARY) \
79  { \
80  coll = (c2); \
81  } \
82  } \
83  } \
84  else if (LANG_IS_COERCIBLE_COLL (c2)) \
85  { \
86  coll = (c1); \
87  } \
88  } while (0)
89 
90 /*
91  * message for fundamental error that occur before any messages catalogs
92  * can be accessed or opened.
93  */
94 #define LANG_ERR_NO_CUBRID "The `%s' environment variable is not set.\n"
95 
96 #define LANG_MAX_LANGNAME 256
97 
98 #define LANG_VARIABLE_CHARSET(x) ((x) != INTL_CODESET_ASCII && \
99  (x) != INTL_CODESET_RAW_BITS && \
100  (x) != INTL_CODESET_RAW_BYTES && \
101  (x) != INTL_CODESET_ISO88591)
102 
103 enum
104 {
117 };
118 
119 #define LANG_GET_BINARY_COLLATION(c) (((c) == INTL_CODESET_UTF8) \
120  ? LANG_COLL_UTF8_BINARY : \
121  (((c) == INTL_CODESET_KSC5601_EUC) ? LANG_COLL_EUCKR_BINARY : \
122  (((c) == INTL_CODESET_ISO88591) ? LANG_COLL_ISO_BINARY : \
123  LANG_COLL_BINARY)))
124 
125 typedef struct db_charset DB_CHARSET;
127 {
128  const char *charset_name;
129  const char *charset_desc;
130  const char *space_char;
131  const char *introducer;
132  const char *charset_cubrid_name;
135 };
136 
137 /* collation optimizations */
138 typedef struct coll_opt COLL_OPT;
139 struct coll_opt
140 {
141  /* enabled by default; disabled for case insensitive collations and collations with expansions */
143 
144  /* enabled by default; disabled for collations having identical sort key for different strings (case insensitive
145  * collations). In order to produce specific sort keys, an UCA collation should be configured with maximum sorting
146  * level. But, even in this case there are some acceptatable codepoints which have the same weight. These codepoints
147  * ussually represent the same graphic symbol. */
149 
150  /* enabled by default; disabled for collations with expansions */
152 };
153 
155 
158 {
160  int built_in;
161  bool need_init;
162  COLL_OPT options; /* collation options */
163 
164  /* default language to use for this collation (for casing functions) */
166 
167  COLL_DATA coll; /* collation data */
168  /* string compare */
169  int (*fastcmp) (const LANG_COLLATION * lang_coll, const unsigned char *string1, const int size1,
170  const unsigned char *string2, const int size2, bool ignore_trailing_space);
171  int (*strmatch) (const LANG_COLLATION * lang_coll, bool is_match, const unsigned char *string1, int size1,
172  const unsigned char *string2, int size2, const unsigned char *escape, const bool has_last_escape,
173  int *str1_match_size, bool ignore_trailing_space);
174  /* function to get collatable character sequence (in sort order) */
175  int (*next_coll_seq) (const LANG_COLLATION * lang_coll, const unsigned char *seq, const int size,
176  unsigned char *next_seq, int *len_next, bool ignore_trailing_space);
177  /* find common key where str1 <= key < str2 (BTREE string prefix) */
178  int (*split_key) (const LANG_COLLATION * lang_coll, const bool is_desc, const unsigned char *str1, const int size1,
179  const unsigned char *str2, const int size2, const unsigned char **key, int *byte_size,
180  bool ignore_trailing_space);
181  /* compute hash value pseudokey (mht_2str_pseudo_key) */
182  unsigned int (*mht2str) (const LANG_COLLATION * lang_coll, const unsigned char *str, const int size);
183  /* collation data init function */
184  void (*init_coll) (LANG_COLLATION * lang_coll);
185 };
186 
187 /* Language locale data */
189 {
190  /* next locale with same lang id, but different codeset */
192 
193  const char *lang_name;
196 
197  ALPHABET_DATA alphabet; /* data for lower / upper */
198  ALPHABET_DATA ident_alphabet; /* data for lower / upper for identifiers */
199 
200  LANG_COLLATION *default_lang_coll; /* default collation for this locale */
201 
202  TEXT_CONVERSION *txt_conv; /* console text conversion */
203 
204  bool is_initialized; /* init status */
205 
206  const char *time_format; /* default time format */
207  const char *date_format; /* default date format */
208  const char *datetime_format; /* default datetime format */
209  const char *timestamp_format; /* default timestamp format */
210  const char *datetimetz_format; /* default datetime_tz format */
211  const char *timestamptz_format; /* default timestamp_tz format */
212 
213  const char *day_short_name[CAL_DAY_COUNT];
214  const char *day_name[CAL_DAY_COUNT];
215  const char *month_short_name[CAL_MONTH_COUNT];
216  const char *month_name[CAL_MONTH_COUNT];
217  const char *am_pm[CAL_AM_PM_COUNT];
218 
220  const char *day_parse_order;
222  const char *month_parse_order;
223  const char *am_pm_parse_order;
224 
228 
230 
231  char *checksum;
232 
233  void (*initloc) (LANG_LOCALE_DATA * ld); /* locale data init function */
234  bool is_user_data; /* TRUE if lang data is loaded from DLL/so FALSE if built-in */
235 };
236 
239 {
240  int coll_id;
241  char coll_name[COLL_NAME_SIZE];
243  char checksum[32 + 1];
244 };
245 
248 {
249  char lang_name[LANG_MAX_LANGNAME];
251  char checksum[32 + 1];
252 };
253 
254 /* collation and charset do be used by system : */
255 #if defined(NDEBUG)
256 #define LANG_GET_COLLATION(i) lang_Collations[i]
257 #else /* DEBUG */
258 #define LANG_GET_COLLATION(i) lang_get_collation(i)
259 #endif /* NDEBUG */
260 
261 #if defined(NDEBUG)
262 #define LANG_SYS_COLLATION (LANG_GET_BINARY_COLLATION(lang_Loc_charset))
263 #define LANG_SYS_CODESET lang_Loc_charset
264 #else /* DEBUG */
265 #define LANG_SYS_COLLATION (LANG_GET_BINARY_COLLATION(lang_charset()))
266 #define LANG_SYS_CODESET lang_charset()
267 #endif /* NDEBUG */
268 
269 #ifdef __cplusplus
270 extern "C"
271 {
272 #endif
275  extern INTL_CODESET lang_charset (void);
276  extern void lang_init_builtin (void);
277  extern int lang_init (void);
278  extern void lang_init_console_txt_conv (void);
279  extern int lang_set_charset_lang (const char *lang_charset);
280  extern int lang_set_charset (const INTL_CODESET codeset);
281  extern int lang_set_language (const char *lang_str);
282  extern void lang_final (void);
283  extern int lang_locales_count (bool check_codeset);
284  extern const char *lang_get_msg_Loc_name (void);
285  extern const char *lang_get_Lang_name (void);
286  extern INTL_LANG lang_id (void);
287  extern DB_CURRENCY lang_currency (void);
288  extern DB_CURRENCY lang_locale_currency (const char *locale_str);
289  extern const char *lang_currency_symbol (DB_CURRENCY curr);
290 #if defined(ENABLE_UNUSED_FUNCTION)
291  extern int lang_char_mem_size (const char *p);
292  extern int lang_char_screen_size (const char *p);
293  extern int lang_wchar_mem_size (const wchar_t * p);
294  extern int lang_wchar_screen_size (const wchar_t * p);
295 #endif
296  extern bool lang_check_identifier (const char *name, int length);
297  extern const LANG_LOCALE_DATA *lang_locale (void);
298  extern const LANG_LOCALE_DATA *lang_get_specific_locale (const INTL_LANG lang, const INTL_CODESET codeset);
299  extern const LANG_LOCALE_DATA *lang_get_first_locale_for_lang (const INTL_LANG lang);
300  extern int lang_get_lang_id_from_name (const char *lang_name, INTL_LANG * lang_id);
301  extern const char *lang_get_lang_name_from_id (const INTL_LANG lang_id);
302  extern int lang_set_flag_from_lang (const char *lang_str, bool has_user_format, bool has_user_lang, int *flag);
303  extern int lang_set_flag_from_lang_id (const INTL_LANG lang, bool has_user_format, bool has_user_lang, int *flag);
304  extern INTL_LANG lang_get_lang_id_from_flag (const int flag, bool * has_user_format, bool * has_user_lang);
305  extern const char *lang_date_format_parse (const INTL_LANG lang_id, const INTL_CODESET codeset, const DB_TYPE type,
306  INTL_CODESET * format_codeset);
307  extern char lang_digit_grouping_symbol (const INTL_LANG lang_id);
308  extern char lang_digit_fractional_symbol (const INTL_LANG lang_id);
309  extern bool lang_is_coll_name_allowed (const char *name);
310  extern LANG_COLLATION *lang_get_collation (const int coll_id);
311  extern const char *lang_get_collation_name (const int coll_id);
312  extern LANG_COLLATION *lang_get_collation_by_name (const char *coll_name);
313  extern int lang_collation_count (void);
314  extern const char *lang_get_codeset_name (int codeset_id);
315  extern const ALPHABET_DATA *lang_user_alphabet_w_coll (const int collation_id);
316  extern TEXT_CONVERSION *lang_get_txt_conv (void);
317  extern const char *lang_charset_name (const INTL_CODESET codeset);
318  extern const char *lang_charset_cubrid_name (const INTL_CODESET codeset);
319 
320  extern int lang_strmatch_utf8_uca_w_coll_data (const COLL_DATA * coll_data, bool is_match, const unsigned char *str1,
321  const int size1, const unsigned char *str2, const int size2,
322  const unsigned char *escape, const bool has_last_escape,
323  int *str1_match_size, bool ignore_trailing_space);
324  extern int lang_get_charset_env_string (char *buf, int buf_size, const char *lang_name, const INTL_CODESET charset);
325 #if !defined (SERVER_MODE)
326  extern int lang_charset_name_to_id (const char *name, INTL_CODESET * codeset);
327  extern int lang_db_put_charset (void);
329  extern int lang_get_client_collation (void);
330  extern void lang_set_parser_use_client_charset (bool use);
331  extern bool lang_get_parser_use_client_charset (void);
332 #endif /* !SERVER_MODE */
333 
334  extern INTL_CODESET lang_charset_cubrid_name_to_id (const char *name);
335  extern const char *lang_charset_introducer (const INTL_CODESET codeset);
336  extern int lang_load_library (const char *lib_file, void **handle);
337  extern int lang_locale_data_load_from_lib (LANG_LOCALE_DATA * lld, void *lib_handle, const LOCALE_FILE * lf,
338  bool is_load_for_dump);
339  extern int lang_load_count_coll_from_lib (int *count_coll, void *lib_handle, const LOCALE_FILE * lf);
340  extern int lang_load_get_coll_name_from_lib (const int coll_pos, char **coll_name, void *lib_handle,
341  const LOCALE_FILE * lf);
342  extern int lang_load_coll_from_lib (COLL_DATA * cd, void *lib_handle, const LOCALE_FILE * lf);
343 
346  extern int lang_check_coll_compat (const LANG_COLL_COMPAT * coll_array, const int coll_cnt, const char *client_text,
347  const char *server_text);
348  extern int lang_check_locale_compat (const LANG_LOCALE_COMPAT * loc_array, const int loc_cnt, const char *client_text,
349  const char *server_text);
350 #ifdef __cplusplus
351 }
352 #endif
353 
354 #endif /* _LANGUAGE_SUPPORT_H_ */
const char * month_short_parse_order
#define LANG_MAX_COLLATIONS
#define CAL_AM_PM_COUNT
const char * charset_cubrid_name
void * handle
const char * date_format
INTL_CODESET lang_charset_cubrid_name_to_id(const char *name)
int lang_collation_count(void)
#define CAL_MONTH_COUNT
const LANG_LOCALE_DATA * lang_locale(void)
void lang_init_builtin(void)
DB_TYPE
Definition: dbtype_def.h:670
const char * lang_charset_name(const INTL_CODESET codeset)
void lang_init_console_txt_conv(void)
bool lang_is_coll_name_allowed(const char *name)
int lang_get_charset_env_string(char *buf, int buf_size, const char *lang_name, const INTL_CODESET charset)
const ALPHABET_DATA * lang_user_alphabet_w_coll(const int collation_id)
ALPHABET_DATA alphabet
const char * space_char
int lang_set_charset(const INTL_CODESET codeset)
INTL_CODESET lang_get_client_charset(void)
const char * lang_currency_symbol(DB_CURRENCY curr)
int lang_init(void)
const char * lang_get_msg_Loc_name(void)
const char * datetime_format
int lang_check_coll_compat(const LANG_COLL_COMPAT *coll_array, const int coll_cnt, const char *client_text, const char *server_text)
int lang_load_count_coll_from_lib(int *count_coll, void *lib_handle, const LOCALE_FILE *lf)
bool lang_get_parser_use_client_charset(void)
const char * timestamptz_format
LANG_LOCALE_DATA * next_lld
const char * charset_name
#define COLL_NAME_SIZE
DB_CURRENCY
Definition: dbtype_def.h:799
int lang_get_client_collation(void)
INTL_LANG lang_id(void)
bool allow_like_rewrite
INTL_CODESET codeset
const LANG_LOCALE_DATA * lang_get_specific_locale(const INTL_LANG lang, const INTL_CODESET codeset)
ALPHABET_DATA ident_alphabet
const char * lang_charset_cubrid_name(const INTL_CODESET codeset)
int lang_set_language(const char *lang_str)
const char * lang_charset_introducer(const INTL_CODESET codeset)
int lang_set_charset_lang(const char *lang_charset)
TEXT_CONVERSION * lang_get_txt_conv(void)
const char * timestamp_format
void lang_set_generic_unicode_norm(UNICODE_NORMALIZATION *norm)
const char * day_parse_order
const char * lang_get_codeset_name(int codeset_id)
int lang_load_coll_from_lib(COLL_DATA *cd, void *lib_handle, const LOCALE_FILE *lf)
const char * day_short_parse_order
const char * am_pm_parse_order
const char * lang_get_Lang_name(void)
#define CAL_DAY_COUNT
const char * time_format
int lang_set_flag_from_lang(const char *lang_str, bool has_user_format, bool has_user_lang, int *flag)
int lang_set_flag_from_lang_id(const INTL_LANG lang, bool has_user_format, bool has_user_lang, int *flag)
bool allow_prefix_index
int lang_load_library(const char *lib_file, void **handle)
const char * charset_desc
int lang_load_get_coll_name_from_lib(const int coll_pos, char **coll_name, void *lib_handle, const LOCALE_FILE *lf)
const char * month_parse_order
INTL_LANG lang_get_lang_id_from_flag(const int flag, bool *has_user_format, bool *has_user_lang)
INTL_CODESET charset_id
int lang_strmatch_utf8_uca_w_coll_data(const COLL_DATA *coll_data, bool is_match, const unsigned char *str1, const int size1, const unsigned char *str2, const int size2, const unsigned char *escape, const bool has_last_escape, int *str1_match_size, bool ignore_trailing_space)
const char * lang_name
int lang_db_put_charset(void)
int lang_locale_data_load_from_lib(LANG_LOCALE_DATA *lld, void *lib_handle, const LOCALE_FILE *lf, bool is_load_for_dump)
char lang_digit_grouping_symbol(const INTL_LANG lang_id)
UNICODE_NORMALIZATION unicode_norm
DB_CURRENCY lang_currency(void)
DB_CURRENCY lang_locale_currency(const char *locale_str)
void lang_set_parser_use_client_charset(bool use)
unsigned int INTL_LANG
Definition: intl_support.h:132
int lang_get_lang_id_from_name(const char *lang_name, INTL_LANG *lang_id)
TEXT_CONVERSION * txt_conv
int lang_charset_name_to_id(const char *name, INTL_CODESET *codeset)
bool allow_index_opt
int lang_locales_count(bool check_codeset)
LANG_COLLATION * default_lang_coll
INTL_CODESET codeset
LANG_LOCALE_DATA * default_lang
void lang_final(void)
int lang_check_locale_compat(const LANG_LOCALE_COMPAT *loc_array, const int loc_cnt, const char *client_text, const char *server_text)
UNICODE_NORMALIZATION * lang_get_generic_unicode_norm(void)
enum intl_codeset INTL_CODESET
Definition: intl_support.h:190
INTL_CODESET lang_charset(void)
INTL_CODESET codeset
const char * datetimetz_format
LANG_COLLATION * lang_get_collation(const int coll_id)
const char * introducer
bool lang_check_identifier(const char *name, int length)
LANG_COLLATION * lang_get_collation_by_name(const char *coll_name)
char lang_digit_fractional_symbol(const INTL_LANG lang_id)
#define LANG_MAX_LANGNAME
DB_CURRENCY default_currency_code
INTL_CODESET lang_Loc_charset
const char * lang_get_collation_name(const int coll_id)
const char ** p
Definition: dynamic_load.c:945
const char * lang_date_format_parse(const INTL_LANG lang_id, const INTL_CODESET codeset, const DB_TYPE type, INTL_CODESET *format_codeset)
const LANG_LOCALE_DATA * lang_get_first_locale_for_lang(const INTL_LANG lang)
LANG_COLLATION * lang_Collations[LANG_MAX_COLLATIONS]
const char * lang_get_lang_name_from_id(const INTL_LANG lang_id)