Skip to content

File locale_helper.cpp

File List > base > locale_helper.cpp

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/*
 * locale_helper.cpp
 */

#include "locale_helper.hpp"

#include <locale>
#include <codecvt>
#include <string>
// XXX: SHOULD BE THE LAST INCLUDE HEADER
#include "memory_wrapper.hpp"

namespace cublocale
{
  std::string get_lang_name (const LANG_COLLATION *lang_coll)
  {
    const char *lang_name = lang_coll->default_lang->lang_name;
    std::string lang_str (lang_name);
    return lang_str;
  }

  std::string get_codeset_name (const LANG_COLLATION *lang_coll)
  {
    const char *codeset_name = lang_get_codeset_name (lang_coll->codeset);
    std::string codeset_str (codeset_name);
    return codeset_str;
  }

  std::locale get_locale (const std::string &charset, const std::string &lang)
  {
    try
      {
    std::locale loc (lang + "." + charset);
    return loc;
      }
    catch (std::exception &e)
      {
    // return the environment's default locale, locale name is not supported
    assert (false);
    return std::locale ("");
      }
  }

  /*
   * convert_utf8_to_string () -
   *
   * Arguments:
   *        out:  (Out) Output string
   *        in: (In) Input string (UTF-8)
   *        codeset: (In) codeset to convert
   *
   * Returns: bool
   *
   * Note:
   *   This function converts from a unicode string (UTF-8) into a string with specified codeset
   */
  bool convert_utf8_to_string (std::string &out_string, const std::string &utf8_string, const INTL_CODESET codeset)
  {
    if (utf8_string.empty())
      {
    out_string.clear ();
    return true;
      }

    if (codeset == INTL_CODESET_UTF8)
      {
    out_string.assign (std::move (utf8_string));
      }
    else
      {
    std::string to_str;
    to_str.resize (utf8_string.size());
    std::string::pointer to_str_ptr = (char *) to_str.data();

    int conv_status = 0;
    int conv_size = 0;
    switch (codeset)
      {
      case INTL_CODESET_ISO88591:
        conv_status = intl_utf8_to_iso88591 ((const unsigned char *) utf8_string.data (), utf8_string.size (),
                         (unsigned char **) &to_str_ptr,
                         &conv_size);
        break;
      case INTL_CODESET_KSC5601_EUC:
        conv_status = intl_utf8_to_euckr ((const unsigned char *) utf8_string.data (), utf8_string.size (),
                          (unsigned char **) &to_str_ptr,
                          &conv_size);
        break;
      case INTL_CODESET_RAW_BYTES:
        /* when coercing multibyte to binary charset, we just reinterpret each byte as one character */
        to_str.assign (utf8_string.begin(), utf8_string.end());
        break;
      default:
        // unrecognized codeset
        conv_status = 1;
        assert (false);
        break;
      }

    /* conversion failed */
    if (conv_status != 0)
      {
        return false;
      }

    to_str.resize (conv_size);
    out_string.assign (std::move (to_str));
      }
    return true;
  }

  /*
   * convert_string_to_utf8 () -
   *
   * Arguments:
   *        out:  (Out) Output UTF-8 string
   *        in: (In) Input string
   *        codeset: (In) code of the input string
   *
   * Returns: bool
   *
   * Note:
   *   This function converts from a string of specified codeset into a unicoe string (UTF-8))
   */
  bool convert_string_to_utf8 (std::string &utf8_string, const std::string &input_string, const INTL_CODESET codeset)
  {
    if (input_string.empty ())
      {
    utf8_string.clear ();
    return true;
      }

    if (codeset != INTL_CODESET_UTF8)
      {
    std::string utf8_converted;
    utf8_converted.resize (input_string.size() * INTL_CODESET_MULT (INTL_CODESET_UTF8));
    std::string::pointer utf8_str_ptr = (char *) utf8_converted.data ();

    int conv_status = 0;
    int conv_size = 0;
    switch (codeset)
      {
      case INTL_CODESET_ISO88591:
        conv_status = intl_fast_iso88591_to_utf8 ((const unsigned char *) input_string.data (), input_string.size (),
              (unsigned char **) &utf8_str_ptr, &conv_size);
        break;
      case INTL_CODESET_KSC5601_EUC:
        conv_status = intl_euckr_to_utf8 ((const unsigned char *) input_string.data (), input_string.size (),
                          (unsigned char **) &utf8_str_ptr,
                          &conv_size);
        break;
      case INTL_CODESET_RAW_BYTES:
        intl_binary_to_utf8 ((const unsigned char *) input_string.data (), input_string.size (),
                 (unsigned char **) &utf8_str_ptr,
                 &conv_size);
        break;
      default:
        // unrecognized codeset
        conv_status = 1;
        assert (false);
        break;
      }

    /* conversion failed */
    if (conv_status != 0)
      {
        return false;
      }

    utf8_converted.resize (conv_size);
    utf8_string.assign (utf8_converted);
      }
    else
      {
    utf8_string.assign (input_string);
      }
    return true;
  }

  /*
   * convert_utf8_to_wstring () -
   *
   * Arguments:
   *        out:  (Out) Output wide string
   *        in: (In) Input UTF-8 string
   *
   * Returns: bool
   *
   * Note:
   *   This function converts from a multi-byte encoded string into a wide string
   *   to perform locale-aware functionality such as searching or replacing by the regular expression with <regex>
   *   It convert given string into utf8 string and then make wide string
   */
  bool convert_utf8_to_wstring (std::wstring &out, const std::string &in)
  {
    bool is_success = false;

    if (in.empty ())
      {
    // don't need to convert for empty string
    out.clear ();
    return true;
      }

    try
      {
#if defined(WINDOWS)
    std::wstring converted;
    int nLen = MultiByteToWideChar (CP_UTF8, 0, in.data (), in.size (), NULL, NULL);
    converted.resize (nLen);
    MultiByteToWideChar (CP_UTF8, 0, in.data (), in.size (), &converted[0], nLen);
#else
    std::wstring converted = std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> {} .from_bytes (in);
#endif
    out.assign (std::move (converted));
    is_success = true;
      }
    catch (const std::range_error &re)
      {
    // do nothing
      }

    return is_success;
  }

  /*
   * convert_wstring_to_utf8 () -
   *
   * Arguments:
   *        out:  (Out) Output wide string
   *        in: (In) Input string
   *
   * Returns: bool
   *
   * Note:
   *   This function converts from a wide string into a multi-byte encoded string
   */
  bool convert_wstring_to_utf8 (std::string &out, const std::wstring &in)
  {
    bool is_success = false;

    if (in.empty ())
      {
    // don't need to convert for empty string
    out.clear ();
    return true;
      }

    try
      {
    out.clear ();
#if defined(WINDOWS)
    int nLen = WideCharToMultiByte (CP_UTF8, 0, in.data (), in.size (), NULL, 0, NULL, NULL);
    out.resize (nLen);
    WideCharToMultiByte (CP_UTF8, 0, in.data (), in.size (), &out[0], nLen, NULL, NULL);
#else
    out = std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> {} .to_bytes (in);
#endif
    is_success = true;
      }
    catch (const std::range_error &re)
      {
    // do nothing
      }

    return is_success;
  }
}