40 #define UNICODEDATA_FILE "unicodedata.txt" 43 #define UNICODE_FILE_LINE_SIZE 512 44 #define UNICODE_FILE_FIELDS 14 47 #define UNICODE_FILE_GENERAL_CAT_POS 2 48 #define UNICODE_FILE_CHAR_DECOMPOSITION_MAPPING 5 49 #define UNICODE_FILE_UPPER_CASE_MAP 12 50 #define UNICODE_FILE_LOWER_CASE_MAP 13 84 #define UNICODE_DECOMP_MAP_CP_COUNT 4 118 const int upper_multiplier);
139 char unicode_file[PATH_MAX];
165 snprintf (err_msg,
sizeof (err_msg) - 1,
166 "Invalid alphabet rule :%d" ". Destination buffer contains more than 2 characters", i);
173 upper_mult = MAX (upper_mult, dest_len);
178 lower_mult = MAX (lower_mult, dest_len);
186 printf (
"Creating ASCII alphabet\n");
202 for (cp = 0; (int) cp < a->l_count; cp++)
208 for (cp = (
int)
'a'; cp <= (int)
'z'; cp++)
221 unicode_file[
sizeof (unicode_file) - 1] =
'\0';
236 printf (
"Creating UNICODE alphabet from: %s\n", unicode_file);
261 for (cp = 0; (int) cp < a_tailoring->sett_max_letters; cp++)
271 if (unicode_data[cp].gen_cat_id ==
CAT_Lu)
277 sizeof (
uint32) * unicode_data_lower_mult);
279 else if (unicode_data[cp].gen_cat_id ==
CAT_Ll)
285 sizeof (
uint32) * unicode_data_upper_mult);
297 printf (
"Applying %d alphabet tailoring rules\n", a_tailoring->
count_rules);
305 int src_cp_count = 0;
307 int dest_cp_count = 0;
314 if (src_len != 1 || src_len != src_cp_count)
333 if (dest_len < 1 || dest_len != dest_cp_count)
342 assert (dest_cp_count <= upper_mult);
343 memset (&(a->
upper_cp[cp_src * upper_mult]), 0, upper_mult * sizeof (
uint32));
344 memcpy (&(a->
upper_cp[cp_src * upper_mult]), cp_dest, sizeof (
uint32) * MIN (dest_cp_count, upper_mult));
350 assert (dest_cp_count <= lower_mult);
351 memset (&(a->
lower_cp[cp_src * lower_mult]), 0, lower_mult * sizeof (
uint32));
352 memcpy (&(a->
lower_cp[cp_src * lower_mult]), cp_dest, sizeof (
uint32) * MIN (dest_cp_count, lower_mult));
400 if (unicode_data ==
NULL)
418 while (fgets (str,
sizeof (str), fp))
423 char *s, *end, *end_p;
436 uc = &(unicode_data[cp]);
458 end = strtok_r (str_p,
";", &save);
465 for (cat_idx = 0; cat_idx < (int) (
sizeof (list_gen_cat) /
sizeof (list_gen_cat[0])); cat_idx++)
467 if (strcmp (list_gen_cat[cat_idx].val, str_p) == 0)
481 "Invalid line %d" " of file %s contains more than 2 characters for " 498 "Invalid line %d" " of file %s contains more than 2 characters for " 515 if (str_p[0] ==
';' || str_p[0] ==
'<')
567 if (unicode_data !=
NULL)
593 if (lower_multiplier > 1 && upper_multiplier > 1)
596 "and upper multipliers with values above 1.",
ER_LOC_GEN,
true);
620 memset (a->
lower_cp, 0, max_letters * lower_multiplier * sizeof (
uint32));
621 memset (a->
upper_cp, 0, max_letters * upper_multiplier * sizeof (
uint32));
673 while (str !=
NULL && str < str_end)
679 if (result != 0 || str_cursor <= str)
684 if (i < cp_list_size)
690 while (str_cursor < str_end && strchr (delims, *str_cursor) !=
NULL)
713 int i, orig_mapping_count, curr_mapping, mapping_cursor;
717 int mapping_start, mapping_count;
722 int *unicode_decomp_map_count =
NULL;
736 if (unicode_decomp_map_count ==
NULL)
747 uc = &(unicode_data[cp]);
751 unicode_decomp_map_count[cp] = 0;
761 unicode_decomp_map_count[cp]);
772 temp_list_unicode_decomp_maps =
774 if (temp_list_unicode_decomp_maps ==
NULL)
784 orig_mapping_count = 0;
785 while (cp < MAX_UNICODE_CHARS)
787 if (unicode_decomp_map_count[cp] > 0)
789 um = &(temp_list_unicode_decomp_maps[orig_mapping_count]);
800 orig_mapping_count++;
808 mapping_cursor = orig_mapping_count;
810 while (curr_mapping < mapping_cursor)
816 um = &(temp_list_unicode_decomp_maps[curr_mapping]);
817 new_map = &(temp_list_unicode_decomp_maps[mapping_cursor]);
819 if (um->
size > 0 && um->
map[0] < MAX_UNICODE_CHARS)
821 if (unicode_decomp_map_count[um->
map[0]] > 0)
824 new_map->
cp = um->
cp;
833 for (i = 0; i < new_map->
size; i++)
847 printf (
"\nNew mapping step : %04X -> ", um->
cp);
848 for (i = 0; i < new_map->
size; i++)
850 printf (
"%04X ", new_map->
map[i]);
860 um = &(temp_list_unicode_decomp_maps[
i]);
861 if (um->
size > 0 && unicode_decomp_map_count[um->
map[0]] == 0)
884 cp = temp_list_unicode_decomp_maps[0].
map[0];
889 if (temp_list_unicode_decomp_maps[i].map[0] == (
uint32) cp)
897 cp = (
uint32) temp_list_unicode_decomp_maps[i].map[0];
900 for (j = old_cp + 1; j < cp; j++)
914 int mapping_start = 0;
915 int mapping_count = 0;
924 qsort (temp_list_unicode_decomp_maps + mapping_start, mapping_count,
sizeof (
UNICODE_CP_MAPPING),
931 if (unicode_decomp_map_count !=
NULL)
933 free (unicode_decomp_map_count);
934 unicode_decomp_map_count =
NULL;
937 if (temp_list_unicode_decomp_maps !=
NULL)
941 um = &(temp_list_unicode_decomp_maps[
i]);
948 free (temp_list_unicode_decomp_maps);
949 temp_list_unicode_decomp_maps =
NULL;
970 uc = &(unicode_data[cp]);
976 uc = &(unicode_data[cp]);
1001 uc = &(unicode_data[cp]);
1036 unsigned char *cur_pos;
1037 char cur_size, byte_count;
1072 um_cp = &(decomp_maps[
i]);
1085 for (j = 0; j < um_cp->
size; j++)
1088 cur_size += byte_count;
1089 cur_pos += byte_count;
1095 memcpy (um->
buffer, str_buf, cur_size);
1096 um->
size = cur_size;
1110 #if !defined (SERVER_MODE) 1147 p_end = str_in + size_in;
1149 for (pc = str_in; pc < p_end; pc++)
1151 if ((
unsigned char) (*pc) >= 0x80)
1153 *size_out = size_in;
1178 int composed_index, remaining_bytes;
1179 const char *str_next =
NULL;
1181 int map_start, map_end,
i, byte_count;
1182 bool match_found =
false, composition_found;
1184 const char *str_cursor;
1185 const char *str_end;
1193 str_cursor = str_in;
1194 remaining_bytes = size_in;
1195 composition_found =
false;
1197 str_end = str_in + size_in;
1199 while (str_cursor < str_end)
1203 cp =
intl_utf8_to_cp ((
unsigned char *) str_cursor, remaining_bytes, (
unsigned char **) &str_next);
1205 first_cp_size =
CAST_STRLEN (str_next - str_cursor);
1206 remaining_bytes -= first_cp_size;
1208 match_found =
false;
1212 goto match_not_found;
1219 for (i = map_start; i < map_end; i++)
1222 if (um->
size > remaining_bytes + first_cp_size)
1227 if (memcmp (um->
buffer, str_cursor, um->
size) == 0)
1230 composed_index +=
intl_cp_to_utf8 (um->
cp, (
unsigned char *) (&(composed_str[composed_index])));
1231 str_cursor += um->
size;
1233 composition_found =
true;
1244 memcpy (&(composed_str[composed_index]), str_cursor, byte_count);
1245 composed_index += byte_count;
1246 str_cursor += byte_count;
1251 *size_out = composed_index;
1252 if (composition_found)
1254 *is_composed =
true;
1277 int bytes_read, decomp_index, decomposed_size = 0;
1279 const char *src_cursor;
1280 const char *src_end;
1286 goto no_decompose_cnt;
1292 can_decompose =
false;
1293 src_end = str_in + size_in;
1294 for (src_cursor = str_in; src_cursor < src_end; src_cursor++)
1296 if ((
unsigned char) (*src_cursor) >= 0x80)
1298 can_decompose =
true;
1304 goto no_decompose_cnt;
1308 src_cursor = str_in;
1310 can_decompose =
false;
1311 src_end = str_in + size_in;
1312 while (src_cursor < src_end)
1318 if (decomp_index > -1)
1321 can_decompose =
true;
1325 decomposed_size += bytes_read;
1334 goto no_decompose_cnt;
1337 *decomp_size = decomposed_size;
1342 *decomp_size = size_in;
1362 int bytes_read, decomp_index;
1364 const char *src_cursor;
1365 const char *src_end;
1375 src_cursor = str_in;
1378 src_end = str_in + size_in;
1379 while (src_cursor < src_end)
1384 if (decomp_index > -1)
1391 memcpy (dest_cursor, src_cursor, bytes_read);
1392 dest_cursor += bytes_read;
1413 int min_size, result;
1419 result = memcmp (um1->
map, um2->
map, min_size * sizeof (
uint32));
1423 if (um1->
size > min_size)
1427 if (um2->
size > min_size)
1431 if (um1->
cp < um2->
cp)
1458 if (um1->
map[0] > um2->
map[0])
static int comp_func_grouping_unicode_cp_mapping(const void *arg1, const void *arg2)
#define UNICODE_FILE_CHAR_DECOMPOSITION_MAPPING
#define UNICODE_FILE_LOWER_CASE_MAP
bool unicode_string_need_compose(const char *str_in, const int size_in, int *size_out, const UNICODE_NORMALIZATION *norm)
unsigned char buffer[NORMALIZATION_MAX_BUF_SIZE]
#define UNICODE_FILE_UPPER_CASE_MAP
GENERAL_CATEGORY list_gen_cat[]
ALPHABET_DATA identif_alphabet
static int create_alphabet(ALPHABET_DATA *a, const int max_letters, const int lower_multiplier, const int upper_multiplier)
int str_to_uint32(unsigned int *ret_p, char **end_p, const char *str_p, int base)
char unicode_mapping_cp_count
static int unicode_make_normalization_data(UNICODE_CP_MAPPING *decomp_maps, LOCALE_DATA *ld)
void unicode_compose_string(const char *str_in, const int size_in, char *str_out, int *size_out, bool *is_composed, const UNICODE_NORMALIZATION *norm)
int intl_cp_to_utf8(const unsigned int codepoint, unsigned char *utf8_seq)
uint32 lower_cp[INTL_CASING_EXPANSION_MULTIPLIER]
static UNICODE_CHAR * unicode_data
static char last_unicode_file[PATH_MAX]
#define MAX_UNICODE_CHARS
static int load_unicode_data(const LOCALE_DATA *ld)
uint32 unicode_mapping[UNICODE_DECOMP_MAP_CP_COUNT]
UNICODE_MAPPING * unicode_mappings
static int comp_func_unicode_cp_mapping(const void *arg1, const void *arg2)
void unicode_decompose_string(const char *str_in, const int size_in, char *str_out, int *size_out, const UNICODE_NORMALIZATION *norm)
char * envvar_localedatadir_file(char *path, size_t size, const char *filename)
char unicode_data_file[PATH_MAX]
#define UNICODE_FILE_LINE_SIZE
char unicode_full_decomp_cp_count
int * unicode_mapping_index
void unicode_free_data(void)
static int count_decomp_steps(int cp)
unsigned int intl_utf8_to_cp(const unsigned char *utf8, const int size, unsigned char **next_char)
static int unicode_data_upper_mult
char unicode_data_file[PATH_MAX]
int unicode_process_alphabet(LOCALE_DATA *ld, bool is_verbose)
FILE * fopen_ex(const char *filename, const char *type)
#define INTL_UTF8_MAX_CHAR_SIZE
int unicode_process_normalization(LOCALE_DATA *ld, bool is_verbose)
int string_to_int_array(char *s, uint32 *cp_list, const int cp_list_size, const char *delims)
static void error(const char *msg)
int intl_utf8_to_cp_list(const unsigned char *utf8, const int size, unsigned int *cp_array, const int max_array_size, int *array_count)
UNICODE_NORMALIZATION unicode_normalization
#define snprintf_dots_truncate(dest, max_len,...)
uint32 upper_cp[INTL_CASING_EXPANSION_MULTIPLIER]
static void str_out(const char *fmt,...)
#define CP_HAS_MAPPINGS(val)
#define SET_MAPPING_INDEX(val, is_used, offset)
#define UNICODE_FILE_GENERAL_CAT_POS
bool prm_get_bool_value(PARAM_ID prm_id)
int unicode_mappings_count
bool unicode_string_need_decompose(const char *str_in, const int size_in, int *decomp_size, const UNICODE_NORMALIZATION *norm)
#define LOG_LOCALE_ERROR(msg, er_status, do_print)
#define GET_MAPPING_OFFSET(val)
GENERAL_CATEG_ID gen_cat_id
#define UNICODE_DECOMP_MAP_CP_COUNT
#define INTL_CASING_EXPANSION_MULTIPLIER
#define UNICODE_FILE_FIELDS
ALPHABET_TAILORING alpha_tailoring
static int count_full_decomp_cp(int cp)
static int unicode_data_lower_mult