CUBRID Engine  latest
locale_helper.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Search Solution Corporation
3  * Copyright 2016 CUBRID Corporation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /*
20  * locale_helper.cpp
21  */
22 
23 #include "locale_helper.hpp"
24 
25 #include <locale>
26 #include <codecvt>
27 #include <string>
28 
29 namespace cublocale
30 {
31  std::string get_lang_name (const LANG_COLLATION *lang_coll)
32  {
33  const char *lang_name = lang_coll->default_lang->lang_name;
34  std::string lang_str (lang_name);
35  return lang_str;
36  }
37 
38  std::string get_codeset_name (const LANG_COLLATION *lang_coll)
39  {
40  const char *codeset_name = lang_get_codeset_name (lang_coll->codeset);
41  std::string codeset_str (codeset_name);
42  return codeset_str;
43  }
44 
45  std::locale get_locale (const std::string &charset, const std::string &lang)
46  {
47  try
48  {
49  std::locale loc (lang + "." + charset);
50  return loc;
51  }
52  catch (std::exception &e)
53  {
54  // return the environment's default locale, locale name is not supported
55  assert (false);
56  return std::locale ("");
57  }
58  }
59 
60  /*
61  * convert_to_wstring () -
62  *
63  * Arguments:
64  * out: (Out) Output wide string
65  * in: (In) Input string
66  * codeset: (In) code of the input string
67  *
68  * Returns: bool
69  *
70  * Note:
71  * This function converts from a multi-byte encoded string into a wide string
72  * to perform locale-aware functionality such as searching or replacing by the regular expression with <regex>
73  * It convert given string into utf8 string and then make wide string
74  */
75  bool convert_to_wstring (std::wstring &out, const std::string &in, const INTL_CODESET codeset)
76  {
77  bool is_success = false;
78 
79  if (in.empty ())
80  {
81  // don't need to convert for empty string
82  out.clear ();
83  return true;
84  }
85 
86  std::string utf8_str;
87  if (codeset != INTL_CODESET_UTF8)
88  {
89  std::string utf8_converted;
90  utf8_converted.resize (in.size() * INTL_CODESET_MULT (INTL_CODESET_UTF8));
91  std::string::pointer utf8_str_ptr = (char *) utf8_converted.data ();
92 
93  int conv_status = 0;
94  int conv_size = 0;
95  switch (codeset)
96  {
98  conv_status = intl_fast_iso88591_to_utf8 ((const unsigned char *) in.data (), in.size (),
99  (unsigned char **) &utf8_str_ptr, &conv_size);
100  break;
102  conv_status = intl_euckr_to_utf8 ((const unsigned char *) in.data (), in.size (), (unsigned char **) &utf8_str_ptr,
103  &conv_size);
104  break;
106  intl_binary_to_utf8 ((const unsigned char *) in.data (), in.size (), (unsigned char **) &utf8_str_ptr,
107  &conv_size);
108  break;
109  default:
110  // unrecognized codeset
111  conv_status = 1;
112  assert (false);
113  break;
114  }
115 
116  /* conversion failed */
117  if (conv_status != 0)
118  {
119  return false;
120  }
121 
122  utf8_converted.resize (conv_size);
123  utf8_str.assign (utf8_converted);
124  }
125  else
126  {
127  utf8_str.assign (in);
128  }
129 
130  try
131  {
132 #if defined(WINDOWS)
133  std::wstring converted;
134  int nLen = MultiByteToWideChar (CP_UTF8, 0, utf8_str.data (), utf8_str.size (), NULL, NULL);
135  converted.resize (nLen);
136  MultiByteToWideChar (CP_UTF8, 0, utf8_str.data (), utf8_str.size (), &converted[0], nLen);
137 #else
138  std::wstring converted = std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> {}.from_bytes (utf8_str);
139 #endif
140  out.assign (std::move (converted));
141  is_success = true;
142  }
143  catch (const std::range_error &re)
144  {
145  // do nothing
146  }
147 
148  return is_success;
149  }
150 
151  /*
152  * convert_to_string () -
153  *
154  * Arguments:
155  * out: (Out) Output wide string
156  * in: (In) Input string
157  * codeset: (In) code of the input string
158  *
159  * Returns: bool
160  *
161  * Note:
162  * This function converts from a wide string into a multi-byte encoded string
163  */
164  bool convert_to_string (std::string &out, const std::wstring &in, const INTL_CODESET codeset)
165  {
166  bool is_success = false;
167 
168  if (in.empty ())
169  {
170  // don't need to convert for empty string
171  out.clear ();
172  return true;
173  }
174 
175  try
176  {
177 #if defined(WINDOWS)
178  int nLen = WideCharToMultiByte (CP_UTF8, 0, in.data (), in.size (), NULL, 0, NULL, NULL);
179  std::string converted;
180  converted.resize (nLen);
181  WideCharToMultiByte (CP_UTF8, 0, in.data (), in.size (), &converted[0], nLen, NULL, NULL);
182 #else
183  std::string converted = std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> {}.to_bytes (in);
184 #endif
185  if (codeset == INTL_CODESET_UTF8)
186  {
187  out.assign (std::move (converted));
188  is_success = true;
189  }
190  else
191  {
192  std::string to_str;
193  to_str.resize (converted.size());
194  std::string::pointer to_str_ptr = (char *) to_str.data();
195 
196  int conv_status = 0;
197  int conv_size = 0;
198  switch (codeset)
199  {
201  conv_status = intl_utf8_to_iso88591 ((const unsigned char *) converted.data (), converted.size (),
202  (unsigned char **) &to_str_ptr,
203  &conv_size);
204  break;
206  conv_status = intl_utf8_to_euckr ((const unsigned char *) converted.data (), converted.size (),
207  (unsigned char **) &to_str_ptr,
208  &conv_size);
209  break;
211  /* when coercing multibyte to binary charset, we just reinterpret each byte as one character */
212  to_str.assign (in.begin(), in.end());
213  break;
214  default:
215  // unrecognized codeset
216  conv_status = 1;
217  assert (false);
218  break;
219  }
220 
221  /* conversion failed */
222  if (conv_status != 0)
223  {
224  return false;
225  }
226 
227  to_str.resize (conv_size);
228  out.assign (std::move (to_str));
229  is_success = true;
230  }
231  }
232  catch (const std::range_error &re)
233  {
234  // do nothing
235  }
236 
237  return is_success;
238  }
239 }
std::string get_lang_name(const LANG_COLLATION *lang_coll)
std::string get_codeset_name(const LANG_COLLATION *lang_coll)
int intl_euckr_to_utf8(const unsigned char *in_buf, const int in_size, unsigned char **out_buf, int *out_size)
INTL_CODESET codeset
#define assert(x)
std::locale get_locale(const std::string &charset, const std::string &lang)
void intl_binary_to_utf8(const unsigned char *in_buf, const int in_size, unsigned char **out_buf, int *out_size)
#define NULL
Definition: freelistheap.h:34
const char * lang_name
bool convert_to_string(std::string &out, const std::wstring &in, const INTL_CODESET codeset)
const char * lang_get_codeset_name(int codeset_id)
LANG_LOCALE_DATA * default_lang
enum intl_codeset INTL_CODESET
Definition: intl_support.h:190
int intl_utf8_to_euckr(const unsigned char *in_buf, const int in_size, unsigned char **out_buf, int *out_size)
#define INTL_CODESET_MULT(codeset)
Definition: intl_support.h:77
int intl_fast_iso88591_to_utf8(const unsigned char *in_buf, const int in_size, unsigned char **out_buf, int *out_size)
int intl_utf8_to_iso88591(const unsigned char *in_buf, const int in_size, unsigned char **out_buf, int *out_size)
bool convert_to_wstring(std::wstring &out, const std::string &in, const INTL_CODESET codeset)