cubrid-doxygen/locale__support_8h_source.html

 /*
  * Copyright 2008 Search Solution Corporation
  * Copyright 2016 CUBRID Corporation
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
  *  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  *
  */


 /*
  * locale_support.h : Locale support using LDML files
  *
  */

 #ifndef _LOCALE_SUPPORT_H_
 #define _LOCALE_SUPPORT_H_

 #ident "$Id$"

 #include <stddef.h>
 #include "porting.h"
 #include "dbtype_def.h"
 #include "locale_lib_common.h"

 /* Maximum Unicode characters
  * Do not change this above 65536 */
 #define MAX_UNICODE_CHARS 65536

 /* Allowed multiplier for data string casing.
  * How many times a string can grow or shrink (in characters) when performing
  * lower / upper */
 #define INTL_CASING_EXPANSION_MULTIPLIER 2

 /* Allowed multiplier for identifier casing.
  * How many times a string identifier can grow (in bytes size) when
  * performing lower / upper on DB identifiers.
  * This growing can occur only in UTF-8 charset (see Unicode data, for example
  * lower case for U+023A is U+2C65 - 2 bytes to 3 bytes).
  * This restriction does not apply to user strings.
  * The Turkish rules for casing do not apply on identifiers, only for user
  * strings. Identifier casing rules are the same for locales, assuming they
  * use the same charset and Unicode data (built-in en_US.utf8,ko_KR.utf8 and
  * tr_TR.utf8 have different Unicode data then LDML de_DE.utf8, hence they
  * produce different identifier casing rules).
  */
 #define INTL_IDENTIFIER_CASING_SIZE_MULTIPLIER 2

 /* Mask for next sequence. Used to determine next string in sorting order
  * in LIKE operator.
  * If the value of 'next' has this bit set, then the next sequence is a
  * contraction, and the lower part of value indicates the contraction id
  * Otherwise, the 'next' value indicates a Unicode codepoint */
 #define INTL_MASK_CONTR  0x80000000

 #define INTL_IS_NEXT_CONTR(v) \
   (((v) & INTL_MASK_CONTR) == INTL_MASK_CONTR)

 #define INTL_GET_NEXT_CONTR_ID(v) ((v) & (~INTL_MASK_CONTR))

 /*
  * Encoding of L1-L3 UCA weights on 32 bit unsigned int:
  * 33333332 22222222 1111111 1111111
  * L1 = 0000-ffff
  * L2 = 0000-01ff
  * L3 = 0000-007f
  */
 #define UCA_GET_L1_W(v) ((v) & 0x0000ffff)
 #define UCA_GET_L2_W(v) (((v) & 0x01ff0000) >> 16)
 #define UCA_GET_L3_W(v) (((v) & 0xfe000000) >> 25)

 #define LOC_LOCALE_STR_SIZE 10
 #define LOC_DATA_BUFF_SIZE  256

 #define COLL_NAME_SIZE 32
 #define LOC_LIB_SYMBOL_NAME_SIZE 64


 /* constants for Gregorian calendar */
 #define CAL_MONTH_COUNT  12
 #define CAL_DAY_COUNT  7
 #define CAL_AM_PM_COUNT  12

 /* Length in character of abbreviated format text for month : "Mon" */
 #define LOC_CAL_FMT_MONTH_ABBR_LEN  3
 /* Length in character of wide format text for month : "Month" */
 #define LOC_CAL_FMT_MONTH_WIDE_LEN  5
 /* Length in character of abbreviated format text for day : "Dy" */
 #define LOC_CAL_FMT_DAY_ABBR_LEN  2
 /* Length in character of wide format text for day : "Day" */
 #define LOC_CAL_FMT_DAY_WIDE_LEN  3
 /* Length in character of wide format text for day : "AM" */
 #define LOC_CAL_FMT_AM_LEN  2

 /* Multiplier for number of characters that a calendar token can have;
  * It applies for each token format text.
  * Current value is set based on the 'Day' format text and longest day name
  * in km_KH which is 14 chars */
 #define LOC_PARSE_FRMT_TO_TOKEN_MULT  5

 #define LOC_DATA_MONTH_ABBR_SIZE (LOC_CAL_FMT_MONTH_ABBR_LEN) * \
                  (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                  (INTL_UTF8_MAX_CHAR_SIZE)
 #define LOC_DATA_MONTH_WIDE_SIZE (LOC_CAL_FMT_MONTH_WIDE_LEN) * \
                  (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                  (INTL_UTF8_MAX_CHAR_SIZE)

 #define LOC_DATA_DAY_ABBR_SIZE (LOC_CAL_FMT_DAY_ABBR_LEN) * \
                    (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                    (INTL_UTF8_MAX_CHAR_SIZE)

 #define LOC_DATA_DAY_WIDE_SIZE (LOC_CAL_FMT_DAY_WIDE_LEN) * \
                    (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                    (INTL_UTF8_MAX_CHAR_SIZE)

 #define LOC_DATA_AM_PM_SIZE (LOC_CAL_FMT_AM_LEN) * \
                 (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                 (INTL_UTF8_MAX_CHAR_SIZE)

 #define LOC_DATA_CURRENCY_ISO_CODE_LEN 3

 #define LOC_DATA_COLL_TWO_CHARS 13
 #define LOC_DATA_TAILOR_RULES_COUNT_GROW 128
 #define LOC_DATA_COLL_CUBRID_TAILOR_COUNT_GROW 8
 #define MAX_STRLEN_FOR_COLLATION_ELEMENT 136

 #define DUMPLOCALE_IS_CALENDAR          1
 #define DUMPLOCALE_IS_NUMBERING         (1 << 1)
 #define DUMPLOCALE_IS_ALPHABET          (1 << 2)
 #define DUMPLOCALE_IS_ALPHABET_LOWER        (1 << 3)
 #define DUMPLOCALE_IS_ALPHABET_UPPER        (1 << 4)
 #define DUMPLOCALE_IS_IDENTIFIER_ALPHABET   (1 << 5)
 #define DUMPLOCALE_IS_IDENTIFIER_ALPHABET_LOWER (1 << 6)
 #define DUMPLOCALE_IS_IDENTIFIER_ALPHABET_UPPER (1 << 7)
 #define DUMPLOCALE_IS_COLLATION_CP_ORDER    (1 << 8)
 #define DUMPLOCALE_IS_COLLATION_WEIGHT_ORDER    (1 << 9)
 #define DUMPLOCALE_IS_NORMALIZATION     (1 << 10)
 #define DUMPLOCALE_IS_TEXT_CONV         (1 << 11)

 #define ERR_MSG_SIZE 512

 #define LOG_LOCALE_ERROR(msg, er_status, do_print) \
   do { \
       er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, er_status, 1, msg); \
       if (do_print) \
     { \
       fprintf (stderr, "Error processing locales: %s\n", msg); \
       util_log_write_errstr ("Error processing locales: %s\n", msg); \
     } \
     } while (0)

 #define MAPPING_INDEX_MASK  0x100000

 #define SET_MAPPING_INDEX(val, is_used, offset)   \
   do {                        \
     val = (offset);               \
     if (is_used)                  \
       {                       \
     val |= MAPPING_INDEX_MASK;        \
       }                       \
   } while (0);

 #define CP_HAS_MAPPINGS(val)              \
   (((val) & MAPPING_INDEX_MASK) == MAPPING_INDEX_MASK)

 #define GET_MAPPING_OFFSET(val) ((val) & ~MAPPING_INDEX_MASK)

 typedef unsigned short UCA_CP;
 typedef unsigned short UCA_W;

 typedef struct locale_file LOCALE_FILE;
 struct locale_file
 {
   char *locale_name;
   char *ldml_file;
   char *lib_file;
 };

 typedef struct ldml_context LDML_CONTEXT;
 struct ldml_context
 {
   char *ldml_file;
   int line_no;
 };

 /* Collation structures */
 /* Tailoring level */
 typedef enum
 {
   TAILOR_UNDEFINED = 0,
   TAILOR_PRIMARY,
   TAILOR_SECONDARY,
   TAILOR_TERTIARY,
   TAILOR_QUATERNARY,
   TAILOR_IDENTITY
 } T_LEVEL;

 /* Tailoring anchoring direction */
 typedef enum
 {
   TAILOR_AFTER = 0,
   TAILOR_BEFORE
 } TAILOR_DIR;

 /* Type of char data. If tag is cp, ecp, buffer type ill be BUF_TYPE_CODE
  * If tag is ch, ech, buffer type will be BUF_TYPE_CHAR. */
 typedef enum
 {
   BUF_TYPE_CHAR,
   BUF_TYPE_CODE
 } CP_BUF_TYPE;

 /* Tailoring position */
 typedef enum
 {
   RULE_POS_BUFFER = 0,      /* Non-logical position, use buffer */

   RULE_POS_FIRST_VAR,       /* Logical first variable */
   RULE_POS_LAST_VAR,        /* Logical last variable */

   RULE_POS_FIRST_PRI_IGN,   /* Logical first primary ignorable */
   RULE_POS_LAST_PRI_IGN,    /* Logical last primary ignorable */

   RULE_POS_FIRST_SEC_IGN,   /* Logical first secondary ignorable */
   RULE_POS_LAST_SEC_IGN,    /* Logical last secondary ignorable */

   RULE_POS_FIRST_TERT_IGN,  /* Logical first tertiary ignorable */
   RULE_POS_LAST_TERT_IGN,   /* Logical last tertiary ignorable */

   RULE_POS_FIRST_NON_IGN,   /* Logical first non-ignorable */
   RULE_POS_LAST_NON_IGN,    /* Logical last non-ignorable */

   RULE_POS_FIRST_TRAIL,     /* Logical first trailing */
   RULE_POS_LAST_TRAIL       /* Logical last trailing */
 } RULE_POS_TYPE;

 typedef struct tailor_rule TAILOR_RULE;
 struct tailor_rule
 {
   T_LEVEL level;        /* weight level : primary, .. identity */

   /* anchor (reference) buffer, for which the rule is defined it may contain one or two (for expansion rule) UTF-8
    * chars buffer is nul-terminated */
   char anchor_buf[LOC_DATA_COLL_TWO_CHARS];

   /* Reference : */
   RULE_POS_TYPE r_pos_type; /* processing flag : logical position or buffer value for reference */
   char *r_buf;          /* Buffer containing UTF-8 characters of reference */
   int r_buf_size;

   TAILOR_DIR direction;     /* direction for applying rule : after, before */

   /* Buffer containing UTF-8 characters to be tailored */
   /* buffer is NOT nul-terminated */
   char *t_buf;
   int t_buf_size;

   bool multiple_chars;      /* true : indicates a rule for tailoring multiple chars false : rule for a single
                  * character */
 };


 /*
  * CUBRID_TAILOR_RULE - Structure used for representing the rules for
  *          absolute tailoring e.g. manually setting the weights
  *          and collation elements for unicode character or
  *              character ranges.
 */
 typedef struct cubrid_tailor_rule CUBRID_TAILOR_RULE;
 struct cubrid_tailor_rule
 {
   /* The first and last (incl.) codepoints of the codepoint range to be tailored, in text format for later validation
    * and parsing. */
   char start_cp_buf[LOC_DATA_BUFF_SIZE];
   char end_cp_buf[LOC_DATA_BUFF_SIZE];
   CP_BUF_TYPE start_cp_buf_type;
   CP_BUF_TYPE end_cp_buf_type;

   char start_weight[MAX_STRLEN_FOR_COLLATION_ELEMENT];
   /* Buffer containing the weight value to use in the rule. Buffer is NOT NULL-terminated. Example :
    * [100.0.0.0][0.0.0.2]...etc. */

   char step[MAX_STRLEN_FOR_COLLATION_ELEMENT];
   /* The step (per level) with which we increase the weight range. Default value is 0 for all levels, so
    * single-codepoint and identical tailoring can be easily implemented. */
 };

 typedef enum
 {
   CONTR_IGNORE = 0x0,
   CONTR_TAILORING_USE = 0x1,
   CONTR_DUCET_USE = 0x2
 } COLL_CONTR_POLICY;

 /* Matching of a pattern containing a contraction starter on last position:
  * if "ch" is a contraction, then :
  * "bac" is not matched in "bachxxx", if MATCH_CONTR_BOUND_FORBID
  * "bac" is matched in "bachxxx", if MATCH_CONTR_BOUND_ALLOW */
 typedef enum
 {
   MATCH_CONTR_BOUND_FORBID = 0,
   MATCH_CONTR_BOUND_ALLOW = 1
 } COLL_MATCH_CONTR;

 /* UCA sort options */
 typedef struct uca_options UCA_OPTIONS;
 struct uca_options
 {
   /* collation settings */
   T_LEVEL sett_strength;    /* collation strength (primary, .. identity) */
   bool sett_backwards;      /* backwards on/off */
   bool sett_caseLevel;      /* caseLevel on/off */
   int sett_caseFirst;       /* 0=off; 1=upper ; 2=lower */
   bool sett_expansions;     /* use expansions */

   /* how to handle contractions, should be regarded as bit-field flag */
   int sett_contr_policy;

   /* set only when sorting for 'next' with expansions : not serialized */
   bool use_only_first_ce;

   /* how to handle string matching when contractions spans over the boundary */
   COLL_MATCH_CONTR sett_match_contr;
 };

 /* Below there are members containing the symbol name from where to load
    * certain weight arrays. By default, the symbol name is the corresponding
    * name of the exported weight array. However, if 2 collations have
    * identical weight arrays after compiling, the symbol name corresponding to
    * one of the arrays will be set to the name of the other, and the actual
    * array will not be exported into the shared library */
 typedef struct coll_data_ref COLL_DATA_REF;
 struct coll_data_ref
 {
   char coll_weights_ref[LOC_LIB_SYMBOL_NAME_SIZE];
   char coll_next_cp_ref[LOC_LIB_SYMBOL_NAME_SIZE];
   char coll_uca_num_ref[LOC_LIB_SYMBOL_NAME_SIZE];
   char coll_uca_w_l13_ref[LOC_LIB_SYMBOL_NAME_SIZE];
   char coll_uca_w_l4_ref[LOC_LIB_SYMBOL_NAME_SIZE];
   char coll_contr_list_ref[LOC_LIB_SYMBOL_NAME_SIZE];
   char coll_cp_first_contr_array_ref[LOC_LIB_SYMBOL_NAME_SIZE];
 };

 typedef struct coll_data COLL_DATA;
 struct coll_data
 {
   int coll_id;          /* collation id */
   char coll_name[COLL_NAME_SIZE];   /* collation name */

   UCA_OPTIONS uca_opt;

   unsigned int *weights;    /* array of weight (one weight per CP) */
   unsigned int *next_cp;    /* next CP (in order defined by collation) */
   unsigned int *weights_ti; /* array of weight for ignore trailing space */
   unsigned int *next_cp_ti; /* next CP (for ignore trailing space) */

   int w_count;          /* # of codepoints in this collation */

   /* Size of uca_w = 'w_count' X 'uca_exp_num' X 'sizeof (UCA_W)' */
   /* For each codepoint entry in uca_w only the corresponding uca_num weights are used */
   int uca_exp_num;      /* max number of CE per codepoint */
   char *uca_num;        /* number of CE for each codepoint */
   UCA_L13_W *uca_w_l13;     /* weight array L1, L2, L3 */
   UCA_L4_W *uca_w_l4;

   COLL_CONTRACTION *contr_list; /* contactions lists; contractions are stored in binary ascending order of UTF-8 buffer
                  */
   int count_contr;
   int contr_min_size;       /* size of smallest contraction buffer (in bytes) */

   /* array of first contraction index for each codepoint contains 'w_count' elements : value -1 means CP is not a
    * contraction starter other value = index of contraction in contractions list ('contr_list') */
   int *cp_first_contr_array;
   /* codepoint value from which 'cp_first_contr_array' can be used */
   unsigned int cp_first_contr_offset;
   /* # of codepoints in 'cp_first_contr_array' */
   unsigned int cp_first_contr_count;

   char checksum[32 + 1];
 };

 typedef struct coll_tailoring COLL_TAILORING;
 struct coll_tailoring
 {
   char coll_name[COLL_NAME_SIZE];   /* collation name */

   int coll_id;

   UCA_OPTIONS uca_opt;

   /* number of codepoints to take into account for collation -1 means unlimited (we support up to MAX_UNICODE_CHARS) */
   int sett_max_cp;

   /* collation tailoring rules */
   int count_rules;      /* # of tailorings */
   int max_rules;        /* # of max (allocated tailorings) */
   TAILOR_RULE *rules;       /* tailoring rules */

   CUBRID_TAILOR_RULE *cub_rules;    /* absolute tailoring rules */
   int cub_count_rules;      /* # of tailorings */
   int cub_max_rules;        /* # of max (allocated tailorings) */
   LDML_CONTEXT ldml_context;
 };


 /* Alphabet usage mode :
  *  - built-in routines codeset specific (lower_cp, upper_cp not used)
  *  - using data in lower_cp, upper_cp arrays
  * */
 typedef enum
 {
   ALPHABET_USE_BUILTIN = 0,
   ALPHABET_USE_DATA
 } ALPHABET_USE_MODE;

 /* Alphabet generation type :
  * in case several locales use the same UNICODE or ASCII modes, only one
  * reference copy is loaded */
 typedef enum
 {
   ALPHABET_UNICODE = 0,
   ALPHABET_ASCII,
   ALPHABET_TAILORED
 } ALPHABET_TYPE;

 /* alphabet structures (lower, upper) */
 typedef struct alphabet_data ALPHABET_DATA;
 struct alphabet_data
 {
   ALPHABET_TYPE a_type;
   int codeset;          /* codeset of alphabet : not serialized */
   int l_count;          /* number of elements */

   int lower_multiplier;     /* how many codepoints contains each lower entry */
   unsigned int *lower_cp;   /* lower CP */

   int upper_multiplier;     /* how many codepoints contains each upper entry */
   unsigned int *upper_cp;   /* upper CP */

   bool do_not_save;     /* used by genlocale if shared alphabet */
 };

 typedef enum
 {
   TR_UPPER = 0,
   TR_LOWER
 } TRANSFORM_TYPE;

 /* Describes how a text tranforms into another text
  * Used for lower / upper casing rule description */
 typedef struct transform_rule TRANSFORM_RULE;
 struct transform_rule
 {
   TRANSFORM_TYPE type;

   char *src;
   int src_size;

   char *dest;
   int dest_size;
 };


 typedef struct alphabet_tailoring ALPHABET_TAILORING;
 struct alphabet_tailoring
 {
   /* number of codepoints the optimization process will to take into account for casing : -1 means unlimited (we
    * support up to MAX_UNICODE_CHARS) */
   int sett_max_letters;

   int alphabet_mode;        /* 0 : default UnicodeData 1 : UnicodeData with specified file 2 : ASCII letter and
                  * casing */
   /* file path for Unicode data (if 'alphabet_mode' == 1) */
   char unicode_data_file[PATH_MAX];

   int count_rules;      /* # of tailorings */
   int max_rules;        /* # of max (allocated tailorings) */
   TRANSFORM_RULE *rules;
   LDML_CONTEXT ldml_context;
 };

 /* text conversions */
 typedef enum
 {
   TEXT_CONV_NO_CONVERSION = 0,
   TEXT_CONV_ISO_88591_BUILTIN,
   TEXT_CONV_ISO_88599_BUILTIN,
   TEXT_CONV_GENERIC_1BYTE,  /* user defined UTF-8 to single byte codepage */
   TEXT_CONV_GENERIC_2BYTE   /* user defined UTF-8 to double byte codepage */
 } TEXT_CONV_TYPE;

 #define TXT_CONV_SYSTEM_STR_SIZE    256
 typedef struct text_conversion TEXT_CONVERSION;
 struct text_conversion
 {
   TEXT_CONV_TYPE conv_type;

   /* both identifiers are used to ensure locale binary files portability */
   char *win_codepages;      /* Windows codepage identifier */
   char *nl_lang_str;        /* Linux language string */

   unsigned char byte_flag[256]; /* used in DBCS encoding schemes : 0 : single byte character 1 : leading byte for
                  * double byte char 2 : invalid byte */
   /* UTF-8 to text */
   unsigned int utf8_first_cp;
   unsigned int utf8_last_cp;
   CONV_CP_TO_BYTES *utf8_to_text;

   /* text to UTF-8 */
   unsigned int text_first_cp;
   unsigned int text_last_cp;
   CONV_CP_TO_BYTES *text_to_utf8;

   int (*utf8_to_text_func) (const char *, const int, char **, int *);
   int (*text_to_utf8_func) (const char *, const int, char **, int *);
   void (*init_conv_func) (void);
 };

 typedef struct text_conversion_prm TEXT_CONVERSION_PRM;
 struct text_conversion_prm
 {
   TEXT_CONV_TYPE conv_type;

   char win_codepages[TXT_CONV_SYSTEM_STR_SIZE]; /* Windows codepage identifier */
   char nl_lang_str[TXT_CONV_SYSTEM_STR_SIZE];   /* Linux language string */

   char conv_file[PATH_MAX];
 };

 #define UNICODE_NORMALIZATION_DECORATOR "std"

 typedef struct unicode_normalization UNICODE_NORMALIZATION;
 struct unicode_normalization
 {
   UNICODE_MAPPING *unicode_mappings;
   int unicode_mappings_count;   /* total number of mappings, fully, partially or not decomposed. */
   int *unicode_mapping_index;
   int *list_full_decomp;

   bool do_not_save;
 };

 #define CAL_SIMPLE_DATE_FORMAT_SIZE  30
 #define CAL_COMP_DATE_FORMAT_SIZE  48
 #define CAL_SIMPLE_DATE_TZ_FORMAT_SIZE  52
 #define CAL_COMP_DATE_TZ_FORMAT_SIZE 70

 /* user defined LOCALE DATA */
 typedef struct locale_collation LOCALE_COLLATION;
 struct locale_collation
 {
   COLL_TAILORING tail_coll; /* collation info gathered from LDML */
   COLL_DATA opt_coll;       /* optimized collation data */
   COLL_DATA_REF coll_ref;   /* collation array export identifiers */
   bool do_not_save;     /* set true if collation is shared and already processed */
 };

 typedef struct locale_data LOCALE_DATA;
 struct locale_data
 {
   /* name of locale : used for validation; should be set by application, before LDML parsing */
   char locale_name[LOC_LOCALE_STR_SIZE];

   /* calendar info : only Gregorian calendar is supported */
   char dateFormat[CAL_SIMPLE_DATE_FORMAT_SIZE]; /* date format */
   char timeFormat[CAL_SIMPLE_DATE_FORMAT_SIZE]; /* time format */

   char datetimeFormat[CAL_COMP_DATE_FORMAT_SIZE];   /* datetime format */
   char timestampFormat[CAL_COMP_DATE_FORMAT_SIZE];  /* datetime format */

   char timetzFormat[CAL_SIMPLE_DATE_TZ_FORMAT_SIZE];    /* timetz format */
   char datetimetzFormat[CAL_COMP_DATE_TZ_FORMAT_SIZE];  /* datetimetz format */
   char timestamptzFormat[CAL_COMP_DATE_TZ_FORMAT_SIZE]; /* timestamptz format */

   /* name of months , week days, day periods */
   char month_names_abbreviated[CAL_MONTH_COUNT][LOC_DATA_MONTH_ABBR_SIZE];
   char month_names_wide[CAL_MONTH_COUNT][LOC_DATA_MONTH_WIDE_SIZE];
   char day_names_abbreviated[CAL_DAY_COUNT][LOC_DATA_DAY_ABBR_SIZE];
   char day_names_wide[CAL_DAY_COUNT][LOC_DATA_DAY_WIDE_SIZE];
   char am_pm[CAL_AM_PM_COUNT][LOC_DATA_AM_PM_SIZE];

   char month_names_abbr_parse_order[CAL_MONTH_COUNT];
   char month_names_wide_parse_order[CAL_MONTH_COUNT];
   char day_names_abbr_parse_order[CAL_DAY_COUNT];
   char day_names_wide_parse_order[CAL_DAY_COUNT];
   char am_pm_parse_order[CAL_AM_PM_COUNT];

   /* numeric symbols : digit grouping, decimal */
   char number_decimal_sym;
   char number_group_sym;
   DB_CURRENCY default_currency_code;    /* ISO code for default locale currency. */

   LOCALE_COLLATION *collations;
   int coll_cnt;

   ALPHABET_TAILORING alpha_tailoring;
   ALPHABET_DATA alphabet;   /* data for user lower / uppper */
   ALPHABET_DATA identif_alphabet;   /* data for lower / uppper for identifiers */

   /* unicode data file used for alphabets and normalization */
   int unicode_mode;     /* 0 : default UnicodeData 1 : UnicodeData with specified file */
   /* file path for Unicode data (if 'alphabet_mode' == 1) */
   char unicode_data_file[PATH_MAX];

   /* normalization */
   UNICODE_NORMALIZATION unicode_normalization;

   /* console text conversion */
   TEXT_CONVERSION txt_conv;
   TEXT_CONVERSION_PRM txt_conv_prm;

   /* data members used during processing : */
   int curr_period;      /* processing index for calendar : 0-11 : months 0-6 : week days 0-12 : AM, PM period
                  * names */
   int name_type;        /* processing flag for calendar name : 1 - abbr 2 - wide; 0 - uninitialized */

   /* processing : last anchor : used when build a new collation rule */
   /* buffer is nul-terminated */
   char last_anchor_buf[LOC_DATA_COLL_TWO_CHARS];
   RULE_POS_TYPE last_rule_pos_type; /* processing flag : logical position or buffer */
   TAILOR_DIR last_rule_dir; /* processing flag : after, before */
   T_LEVEL last_rule_level;  /* processing flag : weight level : primary, .. identity (used for validation) */

   /* processing : last tailoring reference : used when building collation rules pointer to a buffer : either a
    * tailoring buffer (not nul-terminated) in a rule or an anchor buffer (last_anchor_buf) */
   char *last_r_buf_p;
   int last_r_buf_size;

   /* processing : used for intermediary (partial) content data in LDML buffer is nul-terminated */
   char data_buffer[LOC_DATA_BUFF_SIZE];
   int data_buf_count;

   char checksum[32 + 1];

   LDML_CONTEXT ldml_context;
 };

 #ifdef __cplusplus
 extern "C"
 {
 #endif

   void locale_init_data (LOCALE_DATA * ld, const char *locale_name);
   void locale_destroy_data (LOCALE_DATA * ld);
   void locale_destroy_alphabet_data (const ALPHABET_DATA * a);
   void locale_destroy_normalization_data (UNICODE_NORMALIZATION * norm);
   int locale_get_cfg_locales (LOCALE_FILE ** p_locale_files, int *p_num_locales, bool is_lang_init);
   int locale_check_and_set_default_files (LOCALE_FILE * lf, bool is_lang_init);
   int locale_prepare_C_file (void);
   int locale_compile_locale (LOCALE_FILE * lf, LOCALE_DATA * ld, bool is_verbose);
   void locale_mark_duplicate_collations (LOCALE_DATA ** ld, int start_index, int end_index, bool is_verbose);
   int locale_save_all_to_C_file (LOCALE_DATA ** ld, int start_index, int end_index, LOCALE_FILE * lf);
   int locale_dump (void *data, LOCALE_FILE * lf, int dl_settings, int start_value, int end_value);
   int locale_dump_lib_collations (void *lib_handle, const LOCALE_FILE * lf, int dl_settings, int start_value,
                   int end_value);
   void locale_free_shared_data (void);

 #ifdef __cplusplus
 }
 #endif

 #endif              /* _LOCALE_SUPPORT_H_ */
locale_data::unicode_mode
int unicode_mode
Definition: locale_support.h:610

COLL_MATCH_CONTR
COLL_MATCH_CONTR
Definition: locale_support.h:308

locale_destroy_normalization_data
void locale_destroy_normalization_data(UNICODE_NORMALIZATION *norm)
Definition: locale_support.c:7000

locale_data::curr_period
int curr_period
Definition: locale_support.h:622

TAILOR_DIR
TAILOR_DIR
Definition: locale_support.h:208

locale_free_shared_data
void locale_free_shared_data(void)
Definition: locale_support.c:6921

uca_options::sett_caseLevel
bool sett_caseLevel
Definition: locale_support.h:321

CAL_AM_PM_COUNT
#define CAL_AM_PM_COUNT
Definition: locale_support.h:91

CAL_COMP_DATE_TZ_FORMAT_SIZE
#define CAL_COMP_DATE_TZ_FORMAT_SIZE
Definition: locale_support.h:555

TAILOR_AFTER
Definition: locale_support.h:210

text_conversion::utf8_to_text
CONV_CP_TO_BYTES * utf8_to_text
Definition: locale_support.h:516

coll_tailoring::uca_opt
UCA_OPTIONS uca_opt
Definition: locale_support.h:398

RULE_POS_FIRST_PRI_IGN
Definition: locale_support.h:230

uca_options::sett_expansions
bool sett_expansions
Definition: locale_support.h:323

COLL_CONTR_POLICY
COLL_CONTR_POLICY
Definition: locale_support.h:297

coll_tailoring::count_rules
int count_rules
Definition: locale_support.h:404

TAILOR_PRIMARY
Definition: locale_support.h:200

locale_data::data_buf_count
int data_buf_count
Definition: locale_support.h:640

LOC_DATA_MONTH_ABBR_SIZE
#define LOC_DATA_MONTH_ABBR_SIZE
Definition: locale_support.h:110

locale_data::ldml_context
LDML_CONTEXT ldml_context
Definition: locale_support.h:644

CAL_MONTH_COUNT
#define CAL_MONTH_COUNT
Definition: locale_support.h:89

locale_file::locale_name
char * locale_name
Definition: locale_support.h:183

RULE_POS_LAST_PRI_IGN
Definition: locale_support.h:231

alphabet_data::codeset
int codeset
Definition: locale_support.h:440

cubrid_tailor_rule::start_cp_buf_type
CP_BUF_TYPE start_cp_buf_type
Definition: locale_support.h:285

locale_data::identif_alphabet
ALPHABET_DATA identif_alphabet
Definition: locale_support.h:607

text_conversion_prm::conv_type
TEXT_CONV_TYPE conv_type
Definition: locale_support.h:531

ALPHABET_USE_MODE
ALPHABET_USE_MODE
Definition: locale_support.h:419

uca_options::sett_match_contr
COLL_MATCH_CONTR sett_match_contr
Definition: locale_support.h:332

TEXT_CONV_ISO_88591_BUILTIN
Definition: locale_support.h:495

tailor_rule::t_buf
char * t_buf
Definition: locale_support.h:264

TEXT_CONV_GENERIC_2BYTE
Definition: locale_support.h:498

TAILOR_UNDEFINED
Definition: locale_support.h:199

ldml_context::ldml_file
char * ldml_file
Definition: locale_support.h:191

locale_file::lib_file
char * lib_file
Definition: locale_support.h:185

porting.h

RULE_POS_TYPE
RULE_POS_TYPE
Definition: locale_support.h:223

RULE_POS_LAST_VAR
Definition: locale_support.h:228

locale_data::last_r_buf_p
char * last_r_buf_p
Definition: locale_support.h:635

unicode_mapping
Definition: locale_lib_common.h:93

RULE_POS_FIRST_VAR
Definition: locale_support.h:227

alphabet_tailoring::alphabet_mode
int alphabet_mode
Definition: locale_support.h:480

coll_data::coll_id
int coll_id
Definition: locale_support.h:356

coll_tailoring::coll_id
int coll_id
Definition: locale_support.h:396

uca_options::sett_caseFirst
int sett_caseFirst
Definition: locale_support.h:322

alphabet_data::do_not_save
bool do_not_save
Definition: locale_support.h:449

CONTR_TAILORING_USE
Definition: locale_support.h:300

ldml_context::line_no
int line_no
Definition: locale_support.h:192

TR_LOWER
Definition: locale_support.h:455

locale_collation::opt_coll
COLL_DATA opt_coll
Definition: locale_support.h:562

locale_data::last_rule_pos_type
RULE_POS_TYPE last_rule_pos_type
Definition: locale_support.h:629

alphabet_tailoring::max_rules
int max_rules
Definition: locale_support.h:486

alphabet_data::a_type
ALPHABET_TYPE a_type
Definition: locale_support.h:439

MATCH_CONTR_BOUND_FORBID
Definition: locale_support.h:310

T_LEVEL
T_LEVEL
Definition: locale_support.h:197

coll_tailoring::cub_max_rules
int cub_max_rules
Definition: locale_support.h:410

tailor_rule::r_pos_type
RULE_POS_TYPE r_pos_type
Definition: locale_support.h:256

locale_collation
Definition: locale_support.h:559

coll_data::weights_ti
unsigned int * weights_ti
Definition: locale_support.h:363

locale_mark_duplicate_collations
void locale_mark_duplicate_collations(LOCALE_DATA **ld, int start_index, int end_index, bool is_verbose)
Definition: locale_support.c:4952

LOC_LIB_SYMBOL_NAME_SIZE
#define LOC_LIB_SYMBOL_NAME_SIZE
Definition: locale_support.h:85

locale_data::alphabet
ALPHABET_DATA alphabet
Definition: locale_support.h:606

RULE_POS_LAST_NON_IGN
Definition: locale_support.h:240

LOC_DATA_DAY_ABBR_SIZE
#define LOC_DATA_DAY_ABBR_SIZE
Definition: locale_support.h:117

tailor_rule
Definition: locale_support.h:247

coll_data::uca_opt
UCA_OPTIONS uca_opt
Definition: locale_support.h:359

TEXT_CONV_TYPE
TEXT_CONV_TYPE
Definition: locale_support.h:492

uca_options
Definition: locale_support.h:316

LOC_DATA_AM_PM_SIZE
#define LOC_DATA_AM_PM_SIZE
Definition: locale_support.h:125

RULE_POS_FIRST_TRAIL
Definition: locale_support.h:242

text_conversion::conv_type
TEXT_CONV_TYPE conv_type
Definition: locale_support.h:505

COLL_NAME_SIZE
#define COLL_NAME_SIZE
Definition: locale_support.h:84

uca_options::use_only_first_ce
bool use_only_first_ce
Definition: locale_support.h:329

locale_save_all_to_C_file
int locale_save_all_to_C_file(LOCALE_DATA **ld, int start_index, int end_index, LOCALE_FILE *lf)
Definition: locale_support.c:5096

locale_file::ldml_file
char * ldml_file
Definition: locale_support.h:184

CAL_SIMPLE_DATE_FORMAT_SIZE
#define CAL_SIMPLE_DATE_FORMAT_SIZE
Definition: locale_support.h:552

DB_CURRENCY
DB_CURRENCY
Definition: dbtype_def.h:799

locale_data::number_group_sym
char number_group_sym
Definition: locale_support.h:599

RULE_POS_FIRST_SEC_IGN
Definition: locale_support.h:233

locale_collation::coll_ref
COLL_DATA_REF coll_ref
Definition: locale_support.h:563

locale_get_cfg_locales
int locale_get_cfg_locales(LOCALE_FILE **p_locale_files, int *p_num_locales, bool is_lang_init)
Definition: locale_support.c:5126

unicode_normalization::unicode_mappings
UNICODE_MAPPING * unicode_mappings
Definition: locale_support.h:544

TEXT_CONV_GENERIC_1BYTE
Definition: locale_support.h:497

text_conversion::text_to_utf8
CONV_CP_TO_BYTES * text_to_utf8
Definition: locale_support.h:521

coll_tailoring::cub_count_rules
int cub_count_rules
Definition: locale_support.h:409

locale_data::collations
LOCALE_COLLATION * collations
Definition: locale_support.h:602

CP_BUF_TYPE
CP_BUF_TYPE
Definition: locale_support.h:216

ALPHABET_USE_DATA
Definition: locale_support.h:422

locale_lib_common.h

unicode_normalization
Definition: locale_support.h:542

tailor_rule::t_buf_size
int t_buf_size
Definition: locale_support.h:265

UCA_W
unsigned short UCA_W
Definition: locale_support.h:178

text_conversion::text_first_cp
unsigned int text_first_cp
Definition: locale_support.h:519

ldml_context
Definition: locale_support.h:189

transform_rule::dest_size
int dest_size
Definition: locale_support.h:469

MAX_STRLEN_FOR_COLLATION_ELEMENT
#define MAX_STRLEN_FOR_COLLATION_ELEMENT
Definition: locale_support.h:134

coll_data::count_contr
int count_contr
Definition: locale_support.h:377

transform_rule
Definition: locale_support.h:461

unicode_normalization::do_not_save
bool do_not_save
Definition: locale_support.h:549

TAILOR_BEFORE
Definition: locale_support.h:211

CONTR_IGNORE
Definition: locale_support.h:299

text_conversion::utf8_first_cp
unsigned int utf8_first_cp
Definition: locale_support.h:514

RULE_POS_LAST_TRAIL
Definition: locale_support.h:243

RULE_POS_BUFFER
Definition: locale_support.h:225

unicode_normalization::unicode_mapping_index
int * unicode_mapping_index
Definition: locale_support.h:546

ALPHABET_TAILORED
Definition: locale_support.h:432

tailor_rule::multiple_chars
bool multiple_chars
Definition: locale_support.h:267

coll_contraction
Definition: locale_lib_common.h:44

locale_collation::tail_coll
COLL_TAILORING tail_coll
Definition: locale_support.h:561

text_conversion::nl_lang_str
char * nl_lang_str
Definition: locale_support.h:509

LOC_DATA_MONTH_WIDE_SIZE
#define LOC_DATA_MONTH_WIDE_SIZE
Definition: locale_support.h:113

tailor_rule::r_buf
char * r_buf
Definition: locale_support.h:257

ALPHABET_USE_BUILTIN
Definition: locale_support.h:421

CAL_DAY_COUNT
#define CAL_DAY_COUNT
Definition: locale_support.h:90

uca_options::sett_backwards
bool sett_backwards
Definition: locale_support.h:320

coll_tailoring::cub_rules
CUBRID_TAILOR_RULE * cub_rules
Definition: locale_support.h:408

coll_data::cp_first_contr_count
unsigned int cp_first_contr_count
Definition: locale_support.h:386

TAILOR_SECONDARY
Definition: locale_support.h:201

dbtype_def.h

locale_destroy_data
void locale_destroy_data(LOCALE_DATA *ld)
Definition: locale_support.c:4298

TEXT_CONV_ISO_88599_BUILTIN
Definition: locale_support.h:496

transform_rule::src_size
int src_size
Definition: locale_support.h:466

text_conversion::win_codepages
char * win_codepages
Definition: locale_support.h:508

transform_rule::src
char * src
Definition: locale_support.h:465

tailor_rule::level
T_LEVEL level
Definition: locale_support.h:249

alphabet_data::lower_cp
unsigned int * lower_cp
Definition: locale_support.h:444

cubrid_tailor_rule::end_cp_buf_type
CP_BUF_TYPE end_cp_buf_type
Definition: locale_support.h:286

CAL_COMP_DATE_FORMAT_SIZE
#define CAL_COMP_DATE_FORMAT_SIZE
Definition: locale_support.h:553

coll_tailoring
Definition: locale_support.h:392

ALPHABET_ASCII
Definition: locale_support.h:431

locale_data::number_decimal_sym
char number_decimal_sym
Definition: locale_support.h:598

ALPHABET_TYPE
ALPHABET_TYPE
Definition: locale_support.h:428

text_conversion_prm
Definition: locale_support.h:529

coll_data::cp_first_contr_array
int * cp_first_contr_array
Definition: locale_support.h:382

text_conversion::utf8_last_cp
unsigned int utf8_last_cp
Definition: locale_support.h:515

coll_tailoring::rules
TAILOR_RULE * rules
Definition: locale_support.h:406

TEXT_CONV_NO_CONVERSION
Definition: locale_support.h:494

locale_data::last_rule_level
T_LEVEL last_rule_level
Definition: locale_support.h:631

transform_rule::type
TRANSFORM_TYPE type
Definition: locale_support.h:463

RULE_POS_FIRST_NON_IGN
Definition: locale_support.h:239

coll_data::w_count
int w_count
Definition: locale_support.h:366

transform_rule::dest
char * dest
Definition: locale_support.h:468

cubrid_tailor_rule
Definition: locale_support.h:279

locale_init_data
void locale_init_data(LOCALE_DATA *ld, const char *locale_name)
Definition: locale_support.c:4275

ALPHABET_UNICODE
Definition: locale_support.h:430

locale_data::unicode_normalization
UNICODE_NORMALIZATION unicode_normalization
Definition: locale_support.h:615

coll_data::contr_list
COLL_CONTRACTION * contr_list
Definition: locale_support.h:375

locale_collation::do_not_save
bool do_not_save
Definition: locale_support.h:564

alphabet_data::l_count
int l_count
Definition: locale_support.h:441

coll_data::uca_w_l13
UCA_L13_W * uca_w_l13
Definition: locale_support.h:372

coll_data::uca_w_l4
UCA_L4_W * uca_w_l4
Definition: locale_support.h:373

locale_data::name_type
int name_type
Definition: locale_support.h:624

locale_compile_locale
int locale_compile_locale(LOCALE_FILE *lf, LOCALE_DATA *ld, bool is_verbose)
Definition: locale_support.c:4557

locale_check_and_set_default_files
int locale_check_and_set_default_files(LOCALE_FILE *lf, bool is_lang_init)
Definition: locale_support.c:5260

CAL_SIMPLE_DATE_TZ_FORMAT_SIZE
#define CAL_SIMPLE_DATE_TZ_FORMAT_SIZE
Definition: locale_support.h:554

text_conversion
Definition: locale_support.h:503

locale_dump
int locale_dump(void *data, LOCALE_FILE *lf, int dl_settings, int start_value, int end_value)
Definition: locale_support.c:6269

BUF_TYPE_CHAR
Definition: locale_support.h:218

alphabet_data::lower_multiplier
int lower_multiplier
Definition: locale_support.h:443

alphabet_tailoring::count_rules
int count_rules
Definition: locale_support.h:485

LOC_LOCALE_STR_SIZE
#define LOC_LOCALE_STR_SIZE
Definition: locale_support.h:81

TAILOR_IDENTITY
Definition: locale_support.h:204

coll_data
Definition: locale_support.h:354

coll_data::uca_exp_num
int uca_exp_num
Definition: locale_support.h:370

tailor_rule::direction
TAILOR_DIR direction
Definition: locale_support.h:260

alphabet_data
Definition: locale_support.h:437

coll_data::next_cp
unsigned int * next_cp
Definition: locale_support.h:362

alphabet_tailoring::rules
TRANSFORM_RULE * rules
Definition: locale_support.h:487

coll_tailoring::max_rules
int max_rules
Definition: locale_support.h:405

alphabet_tailoring::sett_max_letters
int sett_max_letters
Definition: locale_support.h:478

unicode_normalization::unicode_mappings_count
int unicode_mappings_count
Definition: locale_support.h:545

locale_data::default_currency_code
DB_CURRENCY default_currency_code
Definition: locale_support.h:600

locale_data::txt_conv_prm
TEXT_CONVERSION_PRM txt_conv_prm
Definition: locale_support.h:619

locale_data::last_r_buf_size
int last_r_buf_size
Definition: locale_support.h:636

BUF_TYPE_CODE
Definition: locale_support.h:219

RULE_POS_LAST_TERT_IGN
Definition: locale_support.h:237

TXT_CONV_SYSTEM_STR_SIZE
#define TXT_CONV_SYSTEM_STR_SIZE
Definition: locale_support.h:501

coll_tailoring::ldml_context
LDML_CONTEXT ldml_context
Definition: locale_support.h:411

locale_data::coll_cnt
int coll_cnt
Definition: locale_support.h:603

RULE_POS_LAST_SEC_IGN
Definition: locale_support.h:234

CONTR_DUCET_USE
Definition: locale_support.h:301

uca_options::sett_strength
T_LEVEL sett_strength
Definition: locale_support.h:319

coll_tailoring::sett_max_cp
int sett_max_cp
Definition: locale_support.h:401

unicode_normalization::list_full_decomp
int * list_full_decomp
Definition: locale_support.h:547

UCA_L13_W
unsigned int UCA_L13_W
Definition: locale_lib_common.h:39

RULE_POS_FIRST_TERT_IGN
Definition: locale_support.h:236

conv_cp_to_bytes
Definition: locale_lib_common.h:76

locale_destroy_alphabet_data
void locale_destroy_alphabet_data(const ALPHABET_DATA *a)
Definition: locale_support.c:4341

UCA_L4_W
unsigned short int UCA_L4_W
Definition: locale_lib_common.h:40

TAILOR_TERTIARY
Definition: locale_support.h:202

locale_dump_lib_collations
int locale_dump_lib_collations(void *lib_handle, const LOCALE_FILE *lf, int dl_settings, int start_value, int end_value)
Definition: locale_support.c:6454

locale_prepare_C_file
int locale_prepare_C_file(void)
Definition: locale_support.c:5427

alphabet_tailoring::ldml_context
LDML_CONTEXT ldml_context
Definition: locale_support.h:488

MATCH_CONTR_BOUND_ALLOW
Definition: locale_support.h:311

UCA_CP
unsigned short UCA_CP
Definition: locale_support.h:177

LOC_DATA_DAY_WIDE_SIZE
#define LOC_DATA_DAY_WIDE_SIZE
Definition: locale_support.h:121

locale_file
Definition: locale_support.h:181

text_conversion::text_last_cp
unsigned int text_last_cp
Definition: locale_support.h:520

TAILOR_QUATERNARY
Definition: locale_support.h:203

locale_data::alpha_tailoring
ALPHABET_TAILORING alpha_tailoring
Definition: locale_support.h:605

coll_data::next_cp_ti
unsigned int * next_cp_ti
Definition: locale_support.h:364

coll_data::weights
unsigned int * weights
Definition: locale_support.h:361

locale_data::last_rule_dir
TAILOR_DIR last_rule_dir
Definition: locale_support.h:630

locale_data
Definition: locale_support.h:568

tailor_rule::r_buf_size
int r_buf_size
Definition: locale_support.h:258

locale_data::txt_conv
TEXT_CONVERSION txt_conv
Definition: locale_support.h:618

alphabet_tailoring
Definition: locale_support.h:474

TR_UPPER
Definition: locale_support.h:454

LOC_DATA_BUFF_SIZE
#define LOC_DATA_BUFF_SIZE
Definition: locale_support.h:82

coll_data_ref
Definition: locale_support.h:342

uca_options::sett_contr_policy
int sett_contr_policy
Definition: locale_support.h:326

LOC_DATA_COLL_TWO_CHARS
#define LOC_DATA_COLL_TWO_CHARS
Definition: locale_support.h:131

coll_data::contr_min_size
int contr_min_size
Definition: locale_support.h:378

TRANSFORM_TYPE
TRANSFORM_TYPE
Definition: locale_support.h:452

coll_data::uca_num
char * uca_num
Definition: locale_support.h:371

alphabet_data::upper_multiplier
int upper_multiplier
Definition: locale_support.h:446

coll_data::cp_first_contr_offset
unsigned int cp_first_contr_offset
Definition: locale_support.h:384

alphabet_data::upper_cp
unsigned int * upper_cp
Definition: locale_support.h:447