File locale_support.h¶

File List > base > locale_support.h
/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */


/*
 * locale_support.h : Locale support using LDML files
 *
 */

#ifndef _LOCALE_SUPPORT_H_
#define _LOCALE_SUPPORT_H_

#ident "$Id$"

#include <stddef.h>
#include "porting.h"
#include "dbtype_def.h"
#include "locale_lib_common.h"

/* Maximum Unicode characters
 * Do not change this above 65536 */
#define MAX_UNICODE_CHARS 65536

/* Allowed multiplier for data string casing.
 * How many times a string can grow or shrink (in characters) when performing
 * lower / upper */
#define INTL_CASING_EXPANSION_MULTIPLIER 2

/* Allowed multiplier for identifier casing.
 * How many times a string identifier can grow (in bytes size) when
 * performing lower / upper on DB identifiers.
 * This growing can occur only in UTF-8 charset (see Unicode data, for example
 * lower case for U+023A is U+2C65 - 2 bytes to 3 bytes).
 * This restriction does not apply to user strings.
 * The Turkish rules for casing do not apply on identifiers, only for user
 * strings. Identifier casing rules are the same for locales, assuming they
 * use the same charset and Unicode data (built-in en_US.utf8,ko_KR.utf8 and
 * tr_TR.utf8 have different Unicode data then LDML de_DE.utf8, hence they
 * produce different identifier casing rules).
 */
#define INTL_IDENTIFIER_CASING_SIZE_MULTIPLIER 2

/* Mask for next sequence. Used to determine next string in sorting order
 * in LIKE operator.
 * If the value of 'next' has this bit set, then the next sequence is a
 * contraction, and the lower part of value indicates the contraction id
 * Otherwise, the 'next' value indicates a Unicode codepoint */
#define INTL_MASK_CONTR  0x80000000

#define INTL_IS_NEXT_CONTR(v) \
  (((v) & INTL_MASK_CONTR) == INTL_MASK_CONTR)

#define INTL_GET_NEXT_CONTR_ID(v) ((v) & (~INTL_MASK_CONTR))

/*
 * Encoding of L1-L3 UCA weights on 32 bit unsigned int:
 * 33333332 22222222 1111111 1111111
 * L1 = 0000-ffff
 * L2 = 0000-01ff
 * L3 = 0000-007f
 */
#define UCA_GET_L1_W(v) ((v) & 0x0000ffff)
#define UCA_GET_L2_W(v) (((v) & 0x01ff0000) >> 16)
#define UCA_GET_L3_W(v) (((v) & 0xfe000000) >> 25)

#define LOC_LOCALE_STR_SIZE 10
#define LOC_DATA_BUFF_SIZE  256

#define COLL_NAME_SIZE 32
#define LOC_LIB_SYMBOL_NAME_SIZE 64


/* constants for Gregorian calendar */
#define CAL_MONTH_COUNT  12
#define CAL_DAY_COUNT  7
#define CAL_AM_PM_COUNT  12

/* Length in character of abbreviated format text for month : "Mon" */
#define LOC_CAL_FMT_MONTH_ABBR_LEN  3
/* Length in character of wide format text for month : "Month" */
#define LOC_CAL_FMT_MONTH_WIDE_LEN  5
/* Length in character of abbreviated format text for day : "Dy" */
#define LOC_CAL_FMT_DAY_ABBR_LEN  2
/* Length in character of wide format text for day : "Day" */
#define LOC_CAL_FMT_DAY_WIDE_LEN  3
/* Length in character of wide format text for day : "AM" */
#define LOC_CAL_FMT_AM_LEN  2

/* Multiplier for number of characters that a calendar token can have;
 * It applies for each token format text.
 * Current value is set based on the 'Day' format text and longest day name
 * in km_KH which is 14 chars */
#define LOC_PARSE_FRMT_TO_TOKEN_MULT  5

#define LOC_DATA_MONTH_ABBR_SIZE (LOC_CAL_FMT_MONTH_ABBR_LEN) * \
                 (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                 (INTL_UTF8_MAX_CHAR_SIZE)
#define LOC_DATA_MONTH_WIDE_SIZE (LOC_CAL_FMT_MONTH_WIDE_LEN) * \
                 (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                 (INTL_UTF8_MAX_CHAR_SIZE)

#define LOC_DATA_DAY_ABBR_SIZE (LOC_CAL_FMT_DAY_ABBR_LEN) * \
                   (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                   (INTL_UTF8_MAX_CHAR_SIZE)

#define LOC_DATA_DAY_WIDE_SIZE (LOC_CAL_FMT_DAY_WIDE_LEN) * \
                   (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                   (INTL_UTF8_MAX_CHAR_SIZE)

#define LOC_DATA_AM_PM_SIZE (LOC_CAL_FMT_AM_LEN) * \
                (LOC_PARSE_FRMT_TO_TOKEN_MULT) * \
                (INTL_UTF8_MAX_CHAR_SIZE)

#define LOC_DATA_CURRENCY_ISO_CODE_LEN 3

#define LOC_DATA_COLL_TWO_CHARS 13
#define LOC_DATA_TAILOR_RULES_COUNT_GROW 128
#define LOC_DATA_COLL_CUBRID_TAILOR_COUNT_GROW 8
#define MAX_STRLEN_FOR_COLLATION_ELEMENT 136

#define DUMPLOCALE_IS_CALENDAR          1
#define DUMPLOCALE_IS_NUMBERING         (1 << 1)
#define DUMPLOCALE_IS_ALPHABET          (1 << 2)
#define DUMPLOCALE_IS_ALPHABET_LOWER        (1 << 3)
#define DUMPLOCALE_IS_ALPHABET_UPPER        (1 << 4)
#define DUMPLOCALE_IS_IDENTIFIER_ALPHABET   (1 << 5)
#define DUMPLOCALE_IS_IDENTIFIER_ALPHABET_LOWER (1 << 6)
#define DUMPLOCALE_IS_IDENTIFIER_ALPHABET_UPPER (1 << 7)
#define DUMPLOCALE_IS_COLLATION_CP_ORDER    (1 << 8)
#define DUMPLOCALE_IS_COLLATION_WEIGHT_ORDER    (1 << 9)
#define DUMPLOCALE_IS_NORMALIZATION     (1 << 10)
#define DUMPLOCALE_IS_TEXT_CONV         (1 << 11)

#define ERR_MSG_SIZE 512

#define LOG_LOCALE_ERROR(msg, er_status, do_print) \
  do { \
      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, er_status, 1, msg); \
      if (do_print) \
    { \
      fprintf (stderr, "Error processing locales: %s\n", msg); \
      util_log_write_errstr ("Error processing locales: %s\n", msg); \
    } \
    } while (0)

#define MAPPING_INDEX_MASK  0x100000

#define SET_MAPPING_INDEX(val, is_used, offset)   \
  do {                        \
    val = (offset);               \
    if (is_used)                  \
      {                       \
    val |= MAPPING_INDEX_MASK;        \
      }                       \
  } while (0);

#define CP_HAS_MAPPINGS(val)              \
  (((val) & MAPPING_INDEX_MASK) == MAPPING_INDEX_MASK)

#define GET_MAPPING_OFFSET(val) ((val) & ~MAPPING_INDEX_MASK)

typedef unsigned short UCA_CP;
typedef unsigned short UCA_W;

typedef struct locale_file LOCALE_FILE;
struct locale_file
{
  char *locale_name;
  char *ldml_file;
  char *lib_file;
};

typedef struct ldml_context LDML_CONTEXT;
struct ldml_context
{
  char *ldml_file;
  int line_no;
};

/* Collation structures */
/* Tailoring level */
typedef enum
{
  TAILOR_UNDEFINED = 0,
  TAILOR_PRIMARY,
  TAILOR_SECONDARY,
  TAILOR_TERTIARY,
  TAILOR_QUATERNARY,
  TAILOR_IDENTITY
} T_LEVEL;

/* Tailoring anchoring direction */
typedef enum
{
  TAILOR_AFTER = 0,
  TAILOR_BEFORE
} TAILOR_DIR;

/* Type of char data. If tag is cp, ecp, buffer type ill be BUF_TYPE_CODE
 * If tag is ch, ech, buffer type will be BUF_TYPE_CHAR. */
typedef enum
{
  BUF_TYPE_CHAR,
  BUF_TYPE_CODE
} CP_BUF_TYPE;

/* Tailoring position */
typedef enum
{
  RULE_POS_BUFFER = 0,      /* Non-logical position, use buffer */

  RULE_POS_FIRST_VAR,       /* Logical first variable */
  RULE_POS_LAST_VAR,        /* Logical last variable */

  RULE_POS_FIRST_PRI_IGN,   /* Logical first primary ignorable */
  RULE_POS_LAST_PRI_IGN,    /* Logical last primary ignorable */

  RULE_POS_FIRST_SEC_IGN,   /* Logical first secondary ignorable */
  RULE_POS_LAST_SEC_IGN,    /* Logical last secondary ignorable */

  RULE_POS_FIRST_TERT_IGN,  /* Logical first tertiary ignorable */
  RULE_POS_LAST_TERT_IGN,   /* Logical last tertiary ignorable */

  RULE_POS_FIRST_NON_IGN,   /* Logical first non-ignorable */
  RULE_POS_LAST_NON_IGN,    /* Logical last non-ignorable */

  RULE_POS_FIRST_TRAIL,     /* Logical first trailing */
  RULE_POS_LAST_TRAIL       /* Logical last trailing */
} RULE_POS_TYPE;

typedef struct tailor_rule TAILOR_RULE;
struct tailor_rule
{
  T_LEVEL level;        /* weight level : primary, .. identity */

  /* anchor (reference) buffer, for which the rule is defined it may contain one or two (for expansion rule) UTF-8
   * chars buffer is nul-terminated */
  char anchor_buf[LOC_DATA_COLL_TWO_CHARS];

  /* Reference : */
  RULE_POS_TYPE r_pos_type; /* processing flag : logical position or buffer value for reference */
  char *r_buf;          /* Buffer containing UTF-8 characters of reference */
  int r_buf_size;

  TAILOR_DIR direction;     /* direction for applying rule : after, before */

  /* Buffer containing UTF-8 characters to be tailored */
  /* buffer is NOT nul-terminated */
  char *t_buf;
  int t_buf_size;

  bool multiple_chars;      /* true : indicates a rule for tailoring multiple chars false : rule for a single
                 * character */
};


/*
 * CUBRID_TAILOR_RULE - Structure used for representing the rules for
 *          absolute tailoring e.g. manually setting the weights
 *          and collation elements for unicode character or
 *              character ranges.
*/
typedef struct cubrid_tailor_rule CUBRID_TAILOR_RULE;
struct cubrid_tailor_rule
{
  /* The first and last (incl.) codepoints of the codepoint range to be tailored, in text format for later validation
   * and parsing. */
  char start_cp_buf[LOC_DATA_BUFF_SIZE];
  char end_cp_buf[LOC_DATA_BUFF_SIZE];
  CP_BUF_TYPE start_cp_buf_type;
  CP_BUF_TYPE end_cp_buf_type;

  char start_weight[MAX_STRLEN_FOR_COLLATION_ELEMENT];
  /* Buffer containing the weight value to use in the rule. Buffer is NOT NULL-terminated. Example :
   * [100.0.0.0][0.0.0.2]...etc. */

  char step[MAX_STRLEN_FOR_COLLATION_ELEMENT];
  /* The step (per level) with which we increase the weight range. Default value is 0 for all levels, so
   * single-codepoint and identical tailoring can be easily implemented. */
};

typedef enum
{
  CONTR_IGNORE = 0x0,
  CONTR_TAILORING_USE = 0x1,
  CONTR_DUCET_USE = 0x2
} COLL_CONTR_POLICY;

/* Matching of a pattern containing a contraction starter on last position:
 * if "ch" is a contraction, then :
 * "bac" is not matched in "bachxxx", if MATCH_CONTR_BOUND_FORBID
 * "bac" is matched in "bachxxx", if MATCH_CONTR_BOUND_ALLOW */
typedef enum
{
  MATCH_CONTR_BOUND_FORBID = 0,
  MATCH_CONTR_BOUND_ALLOW = 1
} COLL_MATCH_CONTR;

/* UCA sort options */
typedef struct uca_options UCA_OPTIONS;
struct uca_options
{
  /* collation settings */
  T_LEVEL sett_strength;    /* collation strength (primary, .. identity) */
  bool sett_backwards;      /* backwards on/off */
  bool sett_caseLevel;      /* caseLevel on/off */
  int sett_caseFirst;       /* 0=off; 1=upper ; 2=lower */
  bool sett_expansions;     /* use expansions */

  /* how to handle contractions, should be regarded as bit-field flag */
  int sett_contr_policy;

  /* set only when sorting for 'next' with expansions : not serialized */
  bool use_only_first_ce;

  /* how to handle string matching when contractions spans over the boundary */
  COLL_MATCH_CONTR sett_match_contr;
};

/* Below there are members containing the symbol name from where to load
   * certain weight arrays. By default, the symbol name is the corresponding
   * name of the exported weight array. However, if 2 collations have
   * identical weight arrays after compiling, the symbol name corresponding to
   * one of the arrays will be set to the name of the other, and the actual
   * array will not be exported into the shared library */
typedef struct coll_data_ref COLL_DATA_REF;
struct coll_data_ref
{
  char coll_weights_ref[LOC_LIB_SYMBOL_NAME_SIZE];
  char coll_next_cp_ref[LOC_LIB_SYMBOL_NAME_SIZE];
  char coll_uca_num_ref[LOC_LIB_SYMBOL_NAME_SIZE];
  char coll_uca_w_l13_ref[LOC_LIB_SYMBOL_NAME_SIZE];
  char coll_uca_w_l4_ref[LOC_LIB_SYMBOL_NAME_SIZE];
  char coll_contr_list_ref[LOC_LIB_SYMBOL_NAME_SIZE];
  char coll_cp_first_contr_array_ref[LOC_LIB_SYMBOL_NAME_SIZE];
};

typedef struct coll_data COLL_DATA;
struct coll_data
{
  int coll_id;          /* collation id */
  char coll_name[COLL_NAME_SIZE];   /* collation name */

  UCA_OPTIONS uca_opt;

  unsigned int *weights;    /* array of weight (one weight per CP) */
  unsigned int *next_cp;    /* next CP (in order defined by collation) */
  unsigned int *weights_ti; /* array of weight for ignore trailing space */
  unsigned int *next_cp_ti; /* next CP (for ignore trailing space) */

  int w_count;          /* # of codepoints in this collation */

  /* Size of uca_w = 'w_count' X 'uca_exp_num' X 'sizeof (UCA_W)' */
  /* For each codepoint entry in uca_w only the corresponding uca_num weights are used */
  int uca_exp_num;      /* max number of CE per codepoint */
  char *uca_num;        /* number of CE for each codepoint */
  UCA_L13_W *uca_w_l13;     /* weight array L1, L2, L3 */
  UCA_L4_W *uca_w_l4;

  COLL_CONTRACTION *contr_list; /* contactions lists; contractions are stored in binary ascending order of UTF-8 buffer
                 */
  int count_contr;
  int contr_min_size;       /* size of smallest contraction buffer (in bytes) */

  /* array of first contraction index for each codepoint contains 'w_count' elements : value -1 means CP is not a
   * contraction starter other value = index of contraction in contractions list ('contr_list') */
  int *cp_first_contr_array;
  /* codepoint value from which 'cp_first_contr_array' can be used */
  unsigned int cp_first_contr_offset;
  /* # of codepoints in 'cp_first_contr_array' */
  unsigned int cp_first_contr_count;

  char checksum[32 + 1];
};

typedef struct coll_tailoring COLL_TAILORING;
struct coll_tailoring
{
  char coll_name[COLL_NAME_SIZE];   /* collation name */

  int coll_id;

  UCA_OPTIONS uca_opt;

  /* number of codepoints to take into account for collation -1 means unlimited (we support up to MAX_UNICODE_CHARS) */
  int sett_max_cp;

  /* collation tailoring rules */
  int count_rules;      /* # of tailorings */
  int max_rules;        /* # of max (allocated tailorings) */
  TAILOR_RULE *rules;       /* tailoring rules */

  CUBRID_TAILOR_RULE *cub_rules;    /* absolute tailoring rules */
  int cub_count_rules;      /* # of tailorings */
  int cub_max_rules;        /* # of max (allocated tailorings) */
  LDML_CONTEXT ldml_context;
};


/* Alphabet usage mode :
 *  - built-in routines codeset specific (lower_cp, upper_cp not used)
 *  - using data in lower_cp, upper_cp arrays
 * */
typedef enum
{
  ALPHABET_USE_BUILTIN = 0,
  ALPHABET_USE_DATA
} ALPHABET_USE_MODE;

/* Alphabet generation type :
 * in case several locales use the same UNICODE or ASCII modes, only one
 * reference copy is loaded */
typedef enum
{
  ALPHABET_UNICODE = 0,
  ALPHABET_ASCII,
  ALPHABET_TAILORED
} ALPHABET_TYPE;

/* alphabet structures (lower, upper) */
typedef struct alphabet_data ALPHABET_DATA;
struct alphabet_data
{
  ALPHABET_TYPE a_type;
  int codeset;          /* codeset of alphabet : not serialized */
  int l_count;          /* number of elements */

  int lower_multiplier;     /* how many codepoints contains each lower entry */
  unsigned int *lower_cp;   /* lower CP */

  int upper_multiplier;     /* how many codepoints contains each upper entry */
  unsigned int *upper_cp;   /* upper CP */

  bool do_not_save;     /* used by genlocale if shared alphabet */
};


/* text conversions */
typedef enum
{
  TEXT_CONV_NO_CONVERSION = 0,
  TEXT_CONV_ISO_88591_BUILTIN,
  TEXT_CONV_ISO_88599_BUILTIN,
  TEXT_CONV_GENERIC_1BYTE,  /* user defined UTF-8 to single byte codepage */
  TEXT_CONV_GENERIC_2BYTE   /* user defined UTF-8 to double byte codepage */
} TEXT_CONV_TYPE;

#define TXT_CONV_SYSTEM_STR_SIZE    256
typedef struct text_conversion TEXT_CONVERSION;
struct text_conversion
{
  TEXT_CONV_TYPE conv_type;

  /* both identifiers are used to ensure locale binary files portability */
  char *win_codepages;      /* Windows codepage identifier */
  char *nl_lang_str;        /* Linux language string */

  unsigned char byte_flag[256]; /* used in DBCS encoding schemes : 0 : single byte character 1 : leading byte for
                 * double byte char 2 : invalid byte */
  /* UTF-8 to text */
  unsigned int utf8_first_cp;
  unsigned int utf8_last_cp;
  CONV_CP_TO_BYTES *utf8_to_text;

  /* text to UTF-8 */
  unsigned int text_first_cp;
  unsigned int text_last_cp;
  CONV_CP_TO_BYTES *text_to_utf8;

  int (*utf8_to_text_func) (const char *, const int, char **, int *);
  int (*text_to_utf8_func) (const char *, const int, char **, int *);
  void (*init_conv_func) (void);
};

#define UNICODE_NORMALIZATION_DECORATOR "std"

typedef struct unicode_normalization UNICODE_NORMALIZATION;
struct unicode_normalization
{
  UNICODE_MAPPING *unicode_mappings;
  int unicode_mappings_count;   /* total number of mappings, fully, partially or not decomposed. */
  int *unicode_mapping_index;
  int *list_full_decomp;

  bool do_not_save;
};

#if defined(SA_MODE)

typedef enum
{
  TR_UPPER = 0,
  TR_LOWER
} TRANSFORM_TYPE;

/* Describes how a text tranforms into another text
 * Used for lower / upper casing rule description */
typedef struct transform_rule TRANSFORM_RULE;
struct transform_rule
{
  TRANSFORM_TYPE type;

  char *src;
  int src_size;

  char *dest;
  int dest_size;
};

typedef struct alphabet_tailoring ALPHABET_TAILORING;
struct alphabet_tailoring
{
  /* number of codepoints the optimization process will to take into account for casing : -1 means unlimited (we
   * support up to MAX_UNICODE_CHARS) */
  int sett_max_letters;

  int alphabet_mode;        /* 0 : default UnicodeData 1 : UnicodeData with specified file 2 : ASCII letter and
                 * casing */
  /* file path for Unicode data (if 'alphabet_mode' == 1) */
  char unicode_data_file[PATH_MAX];

  int count_rules;      /* # of tailorings */
  int max_rules;        /* # of max (allocated tailorings) */
  TRANSFORM_RULE *rules;
  LDML_CONTEXT ldml_context;
};

typedef struct text_conversion_prm TEXT_CONVERSION_PRM;
struct text_conversion_prm
{
  TEXT_CONV_TYPE conv_type;

  char win_codepages[TXT_CONV_SYSTEM_STR_SIZE]; /* Windows codepage identifier */
  char nl_lang_str[TXT_CONV_SYSTEM_STR_SIZE];   /* Linux language string */

  char conv_file[PATH_MAX];
};

#define CAL_SIMPLE_DATE_FORMAT_SIZE  30
#define CAL_COMP_DATE_FORMAT_SIZE  48
#define CAL_SIMPLE_DATE_TZ_FORMAT_SIZE  52
#define CAL_COMP_DATE_TZ_FORMAT_SIZE 70

/* user defined LOCALE DATA */
typedef struct locale_collation LOCALE_COLLATION;
struct locale_collation
{
  COLL_TAILORING tail_coll; /* collation info gathered from LDML */
  COLL_DATA opt_coll;       /* optimized collation data */
  COLL_DATA_REF coll_ref;   /* collation array export identifiers */
  bool do_not_save;     /* set true if collation is shared and already processed */
};

typedef struct locale_data LOCALE_DATA;
struct locale_data
{
  /* name of locale : used for validation; should be set by application, before LDML parsing */
  char locale_name[LOC_LOCALE_STR_SIZE];

  /* calendar info : only Gregorian calendar is supported */
  char dateFormat[CAL_SIMPLE_DATE_FORMAT_SIZE]; /* date format */
  char timeFormat[CAL_SIMPLE_DATE_FORMAT_SIZE]; /* time format */

  char datetimeFormat[CAL_COMP_DATE_FORMAT_SIZE];   /* datetime format */
  char timestampFormat[CAL_COMP_DATE_FORMAT_SIZE];  /* datetime format */

  char timetzFormat[CAL_SIMPLE_DATE_TZ_FORMAT_SIZE];    /* timetz format */
  char datetimetzFormat[CAL_COMP_DATE_TZ_FORMAT_SIZE];  /* datetimetz format */
  char timestamptzFormat[CAL_COMP_DATE_TZ_FORMAT_SIZE]; /* timestamptz format */

  /* name of months , week days, day periods */
  char month_names_abbreviated[CAL_MONTH_COUNT][LOC_DATA_MONTH_ABBR_SIZE];
  char month_names_wide[CAL_MONTH_COUNT][LOC_DATA_MONTH_WIDE_SIZE];
  char day_names_abbreviated[CAL_DAY_COUNT][LOC_DATA_DAY_ABBR_SIZE];
  char day_names_wide[CAL_DAY_COUNT][LOC_DATA_DAY_WIDE_SIZE];
  char am_pm[CAL_AM_PM_COUNT][LOC_DATA_AM_PM_SIZE];

  char month_names_abbr_parse_order[CAL_MONTH_COUNT];
  char month_names_wide_parse_order[CAL_MONTH_COUNT];
  char day_names_abbr_parse_order[CAL_DAY_COUNT];
  char day_names_wide_parse_order[CAL_DAY_COUNT];
  char am_pm_parse_order[CAL_AM_PM_COUNT];

  /* numeric symbols : digit grouping, decimal */
  char number_decimal_sym;
  char number_group_sym;
  DB_CURRENCY default_currency_code;    /* ISO code for default locale currency. */

  LOCALE_COLLATION *collations;
  int coll_cnt;

  ALPHABET_TAILORING alpha_tailoring;
  ALPHABET_DATA alphabet;   /* data for user lower / uppper */
  ALPHABET_DATA identif_alphabet;   /* data for lower / uppper for identifiers */

  /* unicode data file used for alphabets and normalization */
  int unicode_mode;     /* 0 : default UnicodeData 1 : UnicodeData with specified file */
  /* file path for Unicode data (if 'alphabet_mode' == 1) */
  char unicode_data_file[PATH_MAX];

  /* normalization */
  UNICODE_NORMALIZATION unicode_normalization;

  /* console text conversion */
  TEXT_CONVERSION txt_conv;
  TEXT_CONVERSION_PRM txt_conv_prm;

  /* data members used during processing : */
  int curr_period;      /* processing index for calendar : 0-11 : months 0-6 : week days 0-12 : AM, PM period
                 * names */
  int name_type;        /* processing flag for calendar name : 1 - abbr 2 - wide; 0 - uninitialized */

  /* processing : last anchor : used when build a new collation rule */
  /* buffer is nul-terminated */
  char last_anchor_buf[LOC_DATA_COLL_TWO_CHARS];
  RULE_POS_TYPE last_rule_pos_type; /* processing flag : logical position or buffer */
  TAILOR_DIR last_rule_dir; /* processing flag : after, before */
  T_LEVEL last_rule_level;  /* processing flag : weight level : primary, .. identity (used for validation) */

  /* processing : last tailoring reference : used when building collation rules pointer to a buffer : either a
   * tailoring buffer (not nul-terminated) in a rule or an anchor buffer (last_anchor_buf) */
  char *last_r_buf_p;
  int last_r_buf_size;

  /* processing : used for intermediary (partial) content data in LDML buffer is nul-terminated */
  char data_buffer[LOC_DATA_BUFF_SIZE];
  int data_buf_count;

  char checksum[32 + 1];

  LDML_CONTEXT ldml_context;
};
#endif //#if defined(SA_MODE)

#ifdef __cplusplus
extern "C"
{
#endif

  int locale_get_cfg_locales (LOCALE_FILE ** p_locale_files, int *p_num_locales, bool is_lang_init);
  int locale_check_and_set_default_files (LOCALE_FILE * lf, bool is_lang_init);

#if defined(SA_MODE)
  void locale_destroy_data (LOCALE_DATA * ld);
  int locale_compile_locale (LOCALE_FILE * lf, LOCALE_DATA * ld, bool is_verbose);
  int locale_prepare_C_file (void);
  void locale_mark_duplicate_collations (LOCALE_DATA ** ld, int start_index, int end_index, bool is_verbose);
  int locale_save_all_to_C_file (LOCALE_DATA ** ld, int start_index, int end_index, LOCALE_FILE * lf);
  int locale_dump (void *data, LOCALE_FILE * lf, int dl_settings, int start_value, int end_value);
  int locale_dump_lib_collations (void *lib_handle, const LOCALE_FILE * lf, int dl_settings, int start_value,
                  int end_value);
  void locale_free_shared_data (void);
#endif

#ifdef __cplusplus
}
#endif

#endif              /* _LOCALE_SUPPORT_H_ */