2 * Copyright 2008 Search Solution Corporation
3 * Copyright 2016 CUBRID Corporation
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 * cnvlex.l - lexical scanner
32 /* Set up custom prefix */
33 #define YY_PREFIX dbcnv
36 #include "util_func.h"
38 static int cnv_fmt_lex_start (FMT_LEX_MODE mode);
39 void cnv_fmt_exit (void);
41 /* Global variables */
42 static int cnv_fmt__init = 0;
43 static const char *cnv_fmt__input = NULL;
44 static const char *cnv_fmt__next = NULL;
45 static int cnv_fmt__unlex = 0;
46 static FMT_LEX_MODE cnv_fmt__mode = FL_LOCAL_NUMBER;
48 #undef YY_READ_BUF_SIZE
49 #define YY_READ_BUF_SIZE 512
53 FMT_TOKEN_TYPE cnv_fmt_lex( FMT_TOKEN *token)
56 #define YY_INPUT( buf, result, max) \
57 result = (*(buf) = *cnv_fmt__input)? (cnv_fmt__input++, 1) : YY_NULL;
60 #define YY_USER_ACTION \
61 token->text = token->raw_text = (const char *)yytext; \
62 token->length = yyleng;
65 #define YY_USER_INIT \
66 if( !cnv_fmt__init) { \
67 atexit( cnv_fmt_exit); \
70 BEGIN( cnv_fmt_lex_start( cnv_fmt__mode));
73 #define DB_FMT_LEX_RETURN( t) \
75 cnv_fmt__next += yyleng; \
87 % s SC_BIT_STRING_FORMAT
88 % s SC_VALIDATE_DATE_FORMAT
89 % s SC_VALIDATE_FLOAT_FORMAT
90 % s SC_VALIDATE_INTEGER_FORMAT
91 % s SC_VALIDATE_MONETARY_FORMAT
92 % s SC_VALIDATE_TIME_FORMAT
93 % s SC_VALIDATE_TIMESTAMP_FORMAT
94 % s SC_VALIDATE_BIT_STRING_FORMAT
95 DATE_SPEC %[%aAbBdDEemwxyY]
96 TIME_SPEC %[%HIklMprRSTXZ] TIMESTAMP_SPEC %[cC] |
103 TIME_PATTERN[^%]BINARY_PATTERN[bB] HEX_PATTERN[hHxX] BIT_STRING_SPEC % ("%" |
111 INTEGRAL_DIGITS} ? ("."
114 DIGITS} ?) ? INTEGRAL_DIGITS
137 Z_SEQ Z + N_SEQ 9 + S_SEQ "*" + Z_THOUS Z,
141 N_SEQ} ? S_THOUS "*",
145 INTEGER_PATTERN[^X \\] | "\\X" TIME_SEPARATOR ":" DATE_SEPARATOR "/" %%
146 /* Preliminary "actions" */
147 if (cnv_fmt__unlex && yy_current_buffer->yy_eof_status != EOF_DONE)
150 * Push back current token so that it can be read again.
151 * See cnv_fmt_unlex().
154 for (i = yyleng - 1; i >= 0; --i)
158 cnv_fmt__next -= yyleng;
163 <INITIAL, SC_US_ENG_NUMBER, SC_KO_KR_NUMBER > "-"
165 DB_FMT_LEX_RETURN (FT_MINUS);
168 <INITIAL, SC_US_ENG_NUMBER, SC_KO_KR_NUMBER > "+"
170 DB_FMT_LEX_RETURN (FT_PLUS);
173 <INITIAL, SC_US_ENG_NUMBER, SC_KO_KR_NUMBER > "."
175 DB_FMT_LEX_RETURN (FT_DECIMAL);
178 <INITIAL, SC_US_ENG_NUMBER > "$"
180 DB_FMT_LEX_RETURN (FT_CURRENCY);
183 <SC_KO_KR_NUMBER > "\\" | "\243\334"
185 DB_FMT_LEX_RETURN (FT_CURRENCY);
188 <INITIAL, SC_US_ENG_NUMBER, SC_KO_KR_NUMBER > ","[0 - 9]
193 int csize = mblen (token->text, token->length);
194 token->text += csize;
195 token->length -= csize;
196 DB_FMT_LEX_RETURN (FT_THOUSANDS);
199 <INITIAL, SC_US_ENG_NUMBER, SC_KO_KR_NUMBER >[1 - 9] ([0 - 9] *[1 - 9]) ?
201 /* Number with no leading or trailing zeroes. */
202 DB_FMT_LEX_RETURN (FT_NUMBER);
205 <INITIAL, SC_US_ENG_NUMBER, SC_KO_KR_NUMBER > "*" +
207 DB_FMT_LEX_RETURN (FT_STARS);
210 <INITIAL, SC_US_ENG_NUMBER, SC_KO_KR_NUMBER > "0" +
212 DB_FMT_LEX_RETURN (FT_ZEROES);
215 <SC_US_ENG_TIME, SC_KO_KR_TIME > "0"[0 - 9]
217 DB_FMT_LEX_RETURN (FT_TIME_DIGITS_0);
220 <SC_US_ENG_TIME, SC_KO_KR_TIME > " "[0 - 9]
222 DB_FMT_LEX_RETURN (FT_TIME_DIGITS_BLANK);
225 <SC_US_ENG_TIME, SC_KO_KR_TIME >[1 - 9][0 - 9]
227 DB_FMT_LEX_RETURN (FT_TIME_DIGITS);
230 <SC_US_ENG_TIME, SC_KO_KR_TIME > "0" | ([1 - 9][0 - 9] *)
232 DB_FMT_LEX_RETURN (FT_TIME_DIGITS_ANY);
235 <SC_US_ENG_TIME, SC_KO_KR_TIME >
240 DB_FMT_LEX_RETURN (FT_DATE_SEPARATOR);
243 <SC_US_ENG_TIME, SC_KO_KR_TIME >
248 DB_FMT_LEX_RETURN (FT_TIME_SEPARATOR);
251 <SC_KO_KR_TIME > "\xb3\xe2" | "\xbf\xf9" | "\xc0\xcf"
253 DB_FMT_LEX_RETURN (FT_LOCAL_DATE_SEPARATOR);
256 <SC_US_ENG_TIME > "AM" | "am"
259 DB_FMT_LEX_RETURN (FT_AM_PM);
262 <SC_US_ENG_TIME > "PM" | "pm"
265 DB_FMT_LEX_RETURN (FT_AM_PM);
269 <SC_US_ENG_TIME > "Sunday"
272 DB_FMT_LEX_RETURN (FT_WEEKDAY_LONG);
275 <SC_US_ENG_TIME > "Monday"
278 DB_FMT_LEX_RETURN (FT_WEEKDAY_LONG);
281 <SC_US_ENG_TIME > "Tuesday"
284 DB_FMT_LEX_RETURN (FT_WEEKDAY_LONG);
287 <SC_US_ENG_TIME > "Wednesday"
290 DB_FMT_LEX_RETURN (FT_WEEKDAY_LONG);
293 <SC_US_ENG_TIME > "Thursday"
296 DB_FMT_LEX_RETURN (FT_WEEKDAY_LONG);
299 <SC_US_ENG_TIME > "Friday"
302 DB_FMT_LEX_RETURN (FT_WEEKDAY_LONG);
305 <SC_US_ENG_TIME > "Saturday"
308 DB_FMT_LEX_RETURN (FT_WEEKDAY_LONG);
311 <SC_US_ENG_TIME > "Sun"
314 DB_FMT_LEX_RETURN (FT_WEEKDAY);
317 <SC_US_ENG_TIME > "Mon"
320 DB_FMT_LEX_RETURN (FT_WEEKDAY);
323 <SC_US_ENG_TIME > "Tue"
326 DB_FMT_LEX_RETURN (FT_WEEKDAY);
329 <SC_US_ENG_TIME > "Wed"
332 DB_FMT_LEX_RETURN (FT_WEEKDAY);
335 <SC_US_ENG_TIME > "Thu"
338 DB_FMT_LEX_RETURN (FT_WEEKDAY);
341 <SC_US_ENG_TIME > "Fri"
344 DB_FMT_LEX_RETURN (FT_WEEKDAY);
347 <SC_US_ENG_TIME > "Sat"
350 DB_FMT_LEX_RETURN (FT_WEEKDAY);
355 <SC_US_ENG_TIME > "January"
358 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
361 <SC_US_ENG_TIME > "February"
364 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
367 <SC_US_ENG_TIME > "March"
370 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
373 <SC_US_ENG_TIME > "April"
376 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
379 <SC_US_ENG_TIME > "May"
382 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
385 <SC_US_ENG_TIME > "June"
388 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
391 <SC_US_ENG_TIME > "July"
394 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
397 <SC_US_ENG_TIME > "August"
400 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
403 <SC_US_ENG_TIME > "September"
406 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
409 <SC_US_ENG_TIME > "October"
412 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
415 <SC_US_ENG_TIME > "November"
418 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
421 <SC_US_ENG_TIME > "December"
424 DB_FMT_LEX_RETURN (FT_MONTH_LONG);
427 <SC_US_ENG_TIME > "Jan"
430 DB_FMT_LEX_RETURN (FT_MONTH);
433 <SC_US_ENG_TIME > "Feb"
436 DB_FMT_LEX_RETURN (FT_MONTH);
439 <SC_US_ENG_TIME > "Mar"
442 DB_FMT_LEX_RETURN (FT_MONTH);
445 <SC_US_ENG_TIME > "Apr"
448 DB_FMT_LEX_RETURN (FT_MONTH);
451 <SC_US_ENG_TIME > "May"
454 DB_FMT_LEX_RETURN (FT_MONTH);
457 <SC_US_ENG_TIME > "Jun"
460 DB_FMT_LEX_RETURN (FT_MONTH);
463 <SC_US_ENG_TIME > "Jul"
466 DB_FMT_LEX_RETURN (FT_MONTH);
469 <SC_US_ENG_TIME > "Aug"
472 DB_FMT_LEX_RETURN (FT_MONTH);
475 <SC_US_ENG_TIME > "Sep"
478 DB_FMT_LEX_RETURN (FT_MONTH);
481 <SC_US_ENG_TIME > "Oct"
484 DB_FMT_LEX_RETURN (FT_MONTH);
487 <SC_US_ENG_TIME > "Nov"
490 DB_FMT_LEX_RETURN (FT_MONTH);
493 <SC_US_ENG_TIME > "Dec"
496 DB_FMT_LEX_RETURN (FT_MONTH);
501 <SC_INTEGER_FORMAT > "X" +
503 DB_FMT_LEX_RETURN (FT_NUMBER);
506 <SC_INTEGER_FORMAT >[^X \\] +
508 DB_FMT_LEX_RETURN (FT_PATTERN);
511 <SC_INTEGER_FORMAT > "\\X"
513 /* Escaped X is treated like a pattern char. */
514 int csize = mblen (token->text, token->length);
515 token->text += csize;
516 token->length -= csize;
517 DB_FMT_LEX_RETURN (FT_PATTERN);
522 <SC_TIME_FORMAT >[^%] +
524 DB_FMT_LEX_RETURN (FT_PATTERN);
527 <SC_TIME_FORMAT > "%%"
529 /* Escaped % is treated like a pattern char. */
530 int csize = mblen (token->text, token->length);
531 token->text += csize;
532 token->length -= csize;
533 DB_FMT_LEX_RETURN (FT_PATTERN);
536 <SC_TIME_FORMAT > %[DxE]
538 int csize = mblen (token->text, token->length);
539 token->text += csize;
540 token->length -= csize;
541 DB_FMT_LEX_RETURN (FT_DATE);
544 <SC_TIME_FORMAT > %[yY]
546 int csize = mblen (token->text, token->length);
547 token->text += csize;
548 token->length -= csize;
549 DB_FMT_LEX_RETURN (FT_YEAR);
552 <SC_TIME_FORMAT > %[bBm]
554 int csize = mblen (token->text, token->length);
555 token->text += csize;
556 token->length -= csize;
557 DB_FMT_LEX_RETURN (FT_MONTH);
560 <SC_TIME_FORMAT > %[de]
562 int csize = mblen (token->text, token->length);
563 token->text += csize;
564 token->length -= csize;
565 DB_FMT_LEX_RETURN (FT_MONTHDAY);
568 <SC_TIME_FORMAT > %[aAw]
570 int csize = mblen (token->text, token->length);
571 token->text += csize;
572 token->length -= csize;
573 DB_FMT_LEX_RETURN (FT_WEEKDAY);
576 <SC_TIME_FORMAT > %[HIkl]
578 int csize = mblen (token->text, token->length);
579 token->text += csize;
580 token->length -= csize;
581 DB_FMT_LEX_RETURN (FT_HOUR);
584 <SC_TIME_FORMAT > %[M]
586 int csize = mblen (token->text, token->length);
587 token->text += csize;
588 token->length -= csize;
589 DB_FMT_LEX_RETURN (FT_MINUTE);
592 <SC_TIME_FORMAT > %[S]
594 int csize = mblen (token->text, token->length);
595 token->text += csize;
596 token->length -= csize;
597 DB_FMT_LEX_RETURN (FT_SECOND);
600 <SC_TIME_FORMAT > %[F]
602 int csize = mblen (token->text, token->length);
603 token->text += csize;
604 token->length -= csize;
605 DB_FMT_LEX_RETURN (FT_MILLISECOND);
608 <SC_TIME_FORMAT > %[p]
610 int csize = mblen (token->text, token->length);
611 token->text += csize;
612 token->length -= csize;
613 DB_FMT_LEX_RETURN (FT_AM_PM);
616 <SC_TIME_FORMAT > %[Z]
618 int csize = mblen (token->text, token->length);
619 token->text += csize;
620 token->length -= csize;
621 DB_FMT_LEX_RETURN (FT_ZONE);
624 <SC_TIME_FORMAT > %[rRTX]
626 int csize = mblen (token->text, token->length);
627 token->text += csize;
628 token->length -= csize;
629 DB_FMT_LEX_RETURN (FT_TIME);
632 <SC_TIME_FORMAT > %[cC]
634 int csize = mblen (token->text, token->length);
635 token->text += csize;
636 token->length -= csize;
637 DB_FMT_LEX_RETURN (FT_TIMESTAMP);
643 <SC_VALIDATE_DATE_FORMAT > (
654 DB_FMT_LEX_RETURN (FT_DATE_FORMAT);
657 <SC_VALIDATE_FLOAT_FORMAT > ("+") ?
662 DB_FMT_LEX_RETURN (FT_FLOAT_FORMAT);
665 <SC_VALIDATE_INTEGER_FORMAT > ("+") ?
670 DB_FMT_LEX_RETURN (FT_INTEGER_FORMAT);
673 <SC_VALIDATE_INTEGER_FORMAT > (
675 INTEGER_PATTERN} *X +
678 INTEGER_PATTERN} *) +
681 DB_FMT_LEX_RETURN (FT_INTEGER_FORMAT);
684 <SC_VALIDATE_MONETARY_FORMAT > ("$") ?
692 DB_FMT_LEX_RETURN (FT_MONETARY_FORMAT);
695 <SC_VALIDATE_TIME_FORMAT > (
706 DB_FMT_LEX_RETURN (FT_TIME_FORMAT);
709 <SC_VALIDATE_TIMESTAMP_FORMAT > (
720 DB_FMT_LEX_RETURN (FT_TIMESTAMP_FORMAT);
723 <SC_VALIDATE_BIT_STRING_FORMAT >
728 DB_FMT_LEX_RETURN (FT_BIT_STRING_FORMAT);
731 <SC_BIT_STRING_FORMAT > %("%" |
736 DB_FMT_LEX_RETURN (FT_BINARY_DIGITS);
739 <SC_BIT_STRING_FORMAT > %
744 DB_FMT_LEX_RETURN (FT_HEX_DIGITS);
747 <SC_BIT_STRING >[01] *
749 DB_FMT_LEX_RETURN (FT_BINARY_DIGITS);
752 <SC_BIT_STRING >[0 - 9 a - fA - F] *
754 DB_FMT_LEX_RETURN (FT_HEX_DIGITS);
757 <SC_KO_KR_TIME > "\xbf\xc0\xc0\xfc"
760 DB_FMT_LEX_RETURN (FT_AM_PM);
763 <SC_KO_KR_TIME > "\xbf\xc0\xc8\xc4"
766 DB_FMT_LEX_RETURN (FT_AM_PM);
769 <SC_KO_KR_TIME > "(" "\xc0\xcf" ")"
772 DB_FMT_LEX_RETURN (FT_WEEKDAY);
775 <SC_KO_KR_TIME > "(" "\xbf\xf9" ")"
778 DB_FMT_LEX_RETURN (FT_WEEKDAY);
781 <SC_KO_KR_TIME > "(" "\xc8\xad" ")"
784 DB_FMT_LEX_RETURN (FT_WEEKDAY);
787 <SC_KO_KR_TIME > "(" "\xbc\xf6" ")"
790 DB_FMT_LEX_RETURN (FT_WEEKDAY);
793 <SC_KO_KR_TIME > "(" "\xb8\xf1" ")"
796 DB_FMT_LEX_RETURN (FT_WEEKDAY);
799 <SC_KO_KR_TIME > "(" "\xb1\xdd" ")"
802 DB_FMT_LEX_RETURN (FT_WEEKDAY);
805 <SC_KO_KR_TIME > "(" "\xc5\xe4" ")"
808 DB_FMT_LEX_RETURN (FT_WEEKDAY);
814 DB_FMT_LEX_RETURN (FT_UNKNOWN);
820 cnv_fmt__next = cnv_fmt__input;
821 token->type = FT_NONE;
822 token->text = token->raw_text = cnv_fmt__input;
830 * cnv_fmt_analyze() - Initialize lexical scan of instring, using
831 * the given scan mode.
837 cnv_fmt_analyze (const char *instring, FMT_LEX_MODE mode)
840 cnv_fmt__input = cnv_fmt__next = instring;
843 cnv_fmt__mode = mode;
848 * cnv_fmt_unlex() - Push back the current token so that it can be read again.
854 cnv_fmt__unlex = 1; /* see rules section */
859 * cnv_fmt_next_token() - Return pointer to next lexical token in value string.
863 cnv_fmt_next_token ()
865 return cnv_fmt__next;
870 * cnv_fmt_lex_start() - Return the start condition for the given scan mode.
875 cnv_fmt_lex_start (FMT_LEX_MODE mode)
878 mode == FL_LOCAL_NUMBER ?
879 cnv_fmt_lex_start (cnv_fmt_number_mode (intl_zone (LC_MONETARY))) :
880 mode == FL_LOCAL_TIME ?
881 cnv_fmt_lex_start (cnv_fmt_time_mode (intl_zone (LC_TIME))) :
882 mode == FL_US_ENG_NUMBER ? SC_US_ENG_NUMBER :
883 mode == FL_KO_KR_NUMBER ? SC_KO_KR_NUMBER :
884 mode == FL_US_ENG_TIME ? SC_US_ENG_TIME :
885 mode == FL_KO_KR_TIME ? SC_KO_KR_TIME :
886 mode == FL_INTEGER_FORMAT ? SC_INTEGER_FORMAT :
887 mode == FL_TIME_FORMAT ? SC_TIME_FORMAT :
888 mode == FL_BIT_STRING_FORMAT ? SC_BIT_STRING_FORMAT :
889 mode == FL_BIT_STRING ? SC_BIT_STRING :
890 mode == FL_VALIDATE_DATE_FORMAT ? SC_VALIDATE_DATE_FORMAT :
891 mode == FL_VALIDATE_FLOAT_FORMAT ? SC_VALIDATE_FLOAT_FORMAT :
892 mode == FL_VALIDATE_INTEGER_FORMAT ? SC_VALIDATE_INTEGER_FORMAT :
893 mode == FL_VALIDATE_MONETARY_FORMAT ? SC_VALIDATE_MONETARY_FORMAT :
894 mode == FL_VALIDATE_TIME_FORMAT ? SC_VALIDATE_TIME_FORMAT :
895 mode == FL_VALIDATE_TIMESTAMP_FORMAT ? SC_VALIDATE_TIMESTAMP_FORMAT :
896 mode == FL_VALIDATE_BIT_STRING_FORMAT ? SC_VALIDATE_BIT_STRING_FORMAT :
903 * cnv_fmt_number_mode() - Return the number value scan mode for the given zone.
908 cnv_fmt_number_mode (INTL_ZONE zone)
916 mode = FL_US_ENG_NUMBER;
921 mode = FL_KO_KR_NUMBER;
926 assert (!"Zone not implemented!");
927 mode = FL_US_ENG_NUMBER;
938 * cnv_fmt_time_mode() - Return the time value scan mode for the given zone.
943 cnv_fmt_time_mode (INTL_ZONE zone)
951 mode = FL_US_ENG_TIME;
956 mode = FL_KO_KR_TIME;
962 assert (!"Zone not implemented!");
963 mode = FL_US_ENG_TIME;
973 * cnv_fmt_exit() - Free buffers when exit.
979 if (YY_CURRENT_BUFFER != NULL)
981 yy_delete_buffer (YY_CURRENT_BUFFER);
982 YY_CURRENT_BUFFER = NULL;