diff --git a/configure.ac b/configure.ac index f1f1ece88..96b082956 100644 --- a/configure.ac +++ b/configure.ac @@ -170,6 +170,8 @@ tests/test-ccl/Makefile tests/test-extended/Makefile tests/test-c++-yywrap/Makefile tests/test-concatenated-options/Makefile +tests/test-tests_strutils/Makefile +tests/test-charset/Makefile dnl --new-test-here-- This line is processed by tests/create-test. ) diff --git a/src/FlexLexer.h b/src/FlexLexer.h index bad4ce03f..a9aa4f29f 100644 --- a/src/FlexLexer.h +++ b/src/FlexLexer.h @@ -53,6 +53,11 @@ # define FLEX_STD std:: # endif +#ifndef YY_CHAR_DEFINED +#define YY_CHAR_DEFINED +typedef unsigned char YY_CHAR; +#endif + extern "C++" { struct yy_buffer_state; @@ -62,7 +67,7 @@ class FlexLexer { public: virtual ~FlexLexer() { } - const char* YYText() const { return yytext; } + const YY_CHAR* YYText() const { return yytext; } int YYLeng() const { return yyleng; } virtual void @@ -91,8 +96,16 @@ class FlexLexer { int debug() const { return yy_flex_debug; } void set_debug( int flag ) { yy_flex_debug = flag; } +#ifdef YY_CHARSET + void set_charset(char *charset); + char* get_charset(); +#endif + protected: - char* yytext; + YY_CHAR* yytext; +#ifdef YY_CHARSET + char *yycharset; +#endif int yyleng; int yylineno; // only maintained if you use %option yylineno int yy_flex_debug; // only has effect with -d or "%option debug" @@ -134,7 +147,7 @@ class yyFlexLexer : public FlexLexer { virtual void LexerOutput( const char* buf, int size ); virtual void LexerError( const char* msg ); - void yyunput( int c, char* buf_ptr ); + void yyunput( int c, YY_CHAR* buf_ptr ); int yyinput(); void yy_load_buffer_state(); @@ -153,17 +166,26 @@ class yyFlexLexer : public FlexLexer { yy_state_type yy_try_NUL_trans( yy_state_type current_state ); int yy_get_next_buffer(); +#ifdef YY_CHARSET + size_t yycharset_convert(char* source, size_t source_bytes, YY_CHAR* target, + size_t target_length, size_t* converted_bytes); + virtual size_t yycharset_handler(char *charset, + char *source, size_t source_bytes, + YY_CHAR *target, size_t target_length, + size_t *converted_bytes); +#endif + FLEX_STD istream* yyin; // input source for default LexerInput FLEX_STD ostream* yyout; // output sink for default LexerOutput // yy_hold_char holds the character lost when yytext is formed. - char yy_hold_char; + YY_CHAR yy_hold_char; // Number of characters read into yy_ch_buf. int yy_n_chars; // Points to current character in buffer. - char* yy_c_buf_p; + YY_CHAR* yy_c_buf_p; int yy_init; // whether we need to initialize int yy_start; // start state number @@ -182,12 +204,12 @@ class yyFlexLexer : public FlexLexer { // on use of certain flex features (like REJECT or yymore()). yy_state_type yy_last_accepting_state; - char* yy_last_accepting_cpos; + YY_CHAR* yy_last_accepting_cpos; yy_state_type* yy_state_buf; yy_state_type* yy_state_ptr; - char* yy_full_match; + YY_CHAR* yy_full_match; int* yy_full_state; int yy_full_lp; diff --git a/src/flex.skl b/src/flex.skl index 5188a5c23..99b65a0ab 100644 --- a/src/flex.skl +++ b/src/flex.skl @@ -77,6 +77,10 @@ m4_ifelse(M4_YY_PREFIX,yy,, #define yyalloc M4_YY_PREFIX[[alloc]] #define yyrealloc M4_YY_PREFIX[[realloc]] #define yyfree M4_YY_PREFIX[[free]] +m4_ifdef( [[M4_YY_CHARSET]], [[ +#define yycharset M4_YY_PREFIX[[charset]] +#define yycharset_handler M4_YY_PREFIX[[charset_handler]] +]]) ) %endif %endif @@ -152,6 +156,12 @@ m4preproc_define(`M4_GEN_PREFIX', [[ M4_GEN_PREFIX(`get_column') M4_GEN_PREFIX(`set_column') + m4_ifdef( [[M4_YY_CHARSET]], [[ + M4_GEN_PREFIX(`get_charset') + M4_GEN_PREFIX(`set_charset') + M4_GEN_PREFIX(`get_charset_handler') + M4_GEN_PREFIX(`set_charset_handler') + ]]) ]]) M4_GEN_PREFIX(`wrap') %endif @@ -274,7 +284,7 @@ m4_ifdef( [[M4_YY_NO_ANSI_FUNC_PROTOS]], * we want to instead treat it as an 8-bit unsigned char, hence the * double cast. */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) +#define YY_SC_TO_UI(c) ((unsigned int) (YY_CHAR)c) %ok-for-header @@ -326,6 +336,10 @@ m4_define( [[M4_YY_DOC_PARAM]], [[@param yyscanner The scanner object.]]) #define yytext YY_G(yytext_r) #define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) #define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +m4_ifdef( [[M4_YY_CHARSET]], [[ +#define yycharset YY_G(yycharset_r) +#define yycharset_handler YY_G(yycharset_handler_r) +]]) #define yy_flex_debug YY_G(yy_flex_debug_r) m4_define( [[M4_YY_INCR_LINENO]], @@ -400,6 +414,19 @@ m4_ifdef( [[M4_YY_NO_ANSI_FUNC_DEFS]], $3 $4; [[\]] $5 $6; [[\]] M4_YY_DECL_LAST_ARG]]) + m4_define( [[YYFARGS4]], [[($2,$4,$6,$8 M4_YY_DEF_LAST_ARG) [[\]] + $1 $2; [[\]] + $3 $4; [[\]] + $5 $6; [[\]] + $7 $8; [[\]] + M4_YY_DECL_LAST_ARG]]) + m4_define( [[YYFARGS5]], [[($2,$4,$6,$8,$10 M4_YY_DEF_LAST_ARG) [[\]] + $1 $2; [[\]] + $3 $4; [[\]] + $5 $6; [[\]] + $7 $8; [[\]] + $9 $10; [[\]] + M4_YY_DECL_LAST_ARG]]) ]], [[ %# Generate C99 function defs. @@ -407,6 +434,10 @@ m4_ifdef( [[M4_YY_NO_ANSI_FUNC_DEFS]], m4_define( [[YYFARGS1]], [[($1 $2 M4_YY_DEF_LAST_ARG)]]) m4_define( [[YYFARGS2]], [[($1 $2, $3 $4 M4_YY_DEF_LAST_ARG)]]) m4_define( [[YYFARGS3]], [[($1 $2, $3 $4, $5 $6 M4_YY_DEF_LAST_ARG)]]) + m4_define( [[YYFARGS4]], [[($1 $2, $3 $4, $5 $6, $7 $8 + M4_YY_DEF_LAST_ARG)]]) + m4_define( [[YYFARGS5]], [[($1 $2, $3 $4, $5 $6, $7 $8, $9 $10 + M4_YY_DEF_LAST_ARG)]]) ]]) m4_ifdef( [[M4_YY_NOT_IN_HEADER]], @@ -483,6 +514,9 @@ extern yy_size_t yyleng; %if-c-only %if-not-reentrant extern FILE *yyin, *yyout; +m4_ifdef( [[M4_YY_CHARSET]], [[ +extern char *yycharset; +]]) %endif %endif @@ -513,7 +547,7 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], }while(0) #define YY_LINENO_REWIND_TO(dst) \ do {\ - const char *p;\ + const YY_CHAR *p;\ for ( p = yy_cp-1; p >= (dst); --p)\ if ( *p == '\n' )\ --yylineno;\ @@ -547,6 +581,8 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], #define unput(c) yyunput( c, YY_G(yytext_ptr) M4_YY_CALL_LAST_ARG ) ]]) +%% [1.0] yytext/yyin/yyout/yy_state_type/yylineno etc. def's & init go here + #ifndef YY_STRUCT_YY_BUFFER_STATE #define YY_STRUCT_YY_BUFFER_STATE struct yy_buffer_state @@ -560,8 +596,8 @@ struct yy_buffer_state %endif - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ + YY_CHAR *yy_ch_buf; /* input buffer */ + YY_CHAR *yy_buf_pos; /* current position in input buffer */ /* Size of input buffer in bytes, not including room for EOB * characters. @@ -602,6 +638,12 @@ struct yy_buffer_state int yy_fill_buffer; int yy_buffer_status; + +m4_ifdef( [[M4_YY_CHARSET]],[[ + char *yy_input_buffer; + size_t yy_input_buffer_length; +]]) + m4_ifdef( [[M4_YY_NOT_IN_HEADER]], [[ #define YY_BUFFER_NEW 0 @@ -659,12 +701,12 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], %if-not-reentrant %not-for-header /* yy_hold_char holds the character lost when yytext is formed. */ -static char yy_hold_char; +static YY_CHAR yy_hold_char; static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */ yy_size_t yyleng; /* Points to current character in buffer. */ -static char *yy_c_buf_p = (char *) 0; +static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; static int yy_init = 0; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ @@ -695,7 +737,7 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], #define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER M4_YY_CALL_LAST_ARG) ]]) -YY_BUFFER_STATE yy_scan_buffer M4_YY_PARAMS( char *base, yy_size_t size M4_YY_PROTO_LAST_ARG ); +YY_BUFFER_STATE yy_scan_buffer M4_YY_PARAMS( YY_CHAR *base, yy_size_t size M4_YY_PROTO_LAST_ARG ); YY_BUFFER_STATE yy_scan_string M4_YY_PARAMS( yyconst char *yy_str M4_YY_PROTO_LAST_ARG ); YY_BUFFER_STATE yy_scan_bytes M4_YY_PARAMS( yyconst char *bytes, yy_size_t len M4_YY_PROTO_LAST_ARG ); @@ -741,8 +783,6 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], #define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) ]]) -%% [1.0] yytext/yyin/yyout/yy_state_type/yylineno etc. def's & init go here - m4_ifdef( [[M4_YY_NOT_IN_HEADER]], [[ %% [1.5] DFA @@ -827,10 +867,10 @@ struct yyguts_t size_t yy_buffer_stack_top; /**< index of top of stack. */ size_t yy_buffer_stack_max; /**< capacity of stack. */ YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ - char yy_hold_char; + YY_CHAR yy_hold_char; yy_size_t yy_n_chars; yy_size_t yyleng_r; - char *yy_c_buf_p; + YY_CHAR *yy_c_buf_p; int yy_init; int yy_start; int yy_did_buffer_switch_on_eof; @@ -838,7 +878,7 @@ struct yyguts_t int yy_start_stack_depth; int *yy_start_stack; yy_state_type yy_last_accepting_state; - char* yy_last_accepting_cpos; + YY_CHAR* yy_last_accepting_cpos; int yylineno_r; int yy_flex_debug_r; @@ -847,7 +887,7 @@ m4_ifdef( [[M4_YY_USES_REJECT]], [[ yy_state_type *yy_state_buf; yy_state_type *yy_state_ptr; - char *yy_full_match; + YY_CHAR *yy_full_match; int yy_lp; /* These are only needed for trailing context rules, @@ -859,13 +899,13 @@ m4_ifdef( [[M4_YY_USES_REJECT]], m4_ifdef( [[M4_YY_TEXT_IS_ARRAY]], [[ - char yytext_r[YYLMAX]; - char *yytext_ptr; + YY_CHAR yytext_r[YYLMAX]; + YY_CHAR *yytext_ptr; int yy_more_offset; int yy_prev_more_offset; ]], [[ - char *yytext_r; + YY_CHAR *yytext_r; int yy_more_flag; int yy_more_len; ]]) @@ -880,6 +920,16 @@ m4_ifdef( [[]], YYLTYPE * yylloc_r; ]]) +m4_ifdef( [[M4_YY_CHARSET]], [[ + char *yycharset_r; /** current charset name */ +]]) + +m4_ifdef( [[M4_YY_CXX]],,[[ +m4_ifdef( [[M4_YY_CHARSET]],[[ + yycharset_handler_t yycharset_handler_r; /** charset handle function */ +]]) +]]) + }; /* end struct yyguts_t */ ]]) @@ -971,7 +1021,7 @@ yy_size_t yyget_leng M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); m4_ifdef( [[M4_YY_NO_GET_TEXT]],, [[ -char *yyget_text M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); +YY_CHAR *yyget_text M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); ]]) m4_ifdef( [[M4_YY_NO_GET_LINENO]],, @@ -1000,6 +1050,28 @@ void yyset_column M4_YY_PARAMS( int column_no M4_YY_PROTO_LAST_ARG ); ]]) ]]) +m4_ifdef( [[M4_YY_REENTRANT]],[[ +m4_ifdef( [[M4_YY_CHARSET]],[[ +m4_ifdef( [[M4_YY_NO_GET_CHARSET]],,[[ +char *yyget_charset M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); +%if-c-only +yycharset_handler_t yyget_charset_handler M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); +%endif +]]) +]]) +]]) + +m4_ifdef( [[M4_YY_REENTRANT]],[[ +m4_ifdef( [[M4_YY_CHARSET]], [[ +m4_ifdef( [[M4_YY_NO_SET_CHARSET]],,[[ +void yyset_charset M4_YY_PARAMS( char *charset M4_YY_PROTO_LAST_ARG ); +%if-c-only +void yyset_charset_handler M4_YY_PARAMS( yycharset_handler_t charset_handler M4_YY_PROTO_LAST_ARG ); +%endif +]]) +]]) +]]) + %if-bison-bridge m4_ifdef( [[M4_YY_NO_GET_LVAL]],, [[ @@ -1043,7 +1115,7 @@ extern int yywrap M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); %endif #ifndef yytext_ptr -static void yy_flex_strncpy M4_YY_PARAMS( char *, yyconst char *, int M4_YY_PROTO_LAST_ARG); +static void yy_flex_strncpy M4_YY_PARAMS( YY_CHAR *, yyconst YY_CHAR *, int M4_YY_PROTO_LAST_ARG); #endif #ifdef YY_NEED_STRLEN @@ -1123,7 +1195,7 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], #define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) %endif %if-c++-only C++ definition -#define ECHO LexerOutput( yytext, yyleng ) +#define ECHO LexerOutput( reinterpret_cast(yytext), yyleng ) %endif #endif ]]) @@ -1135,13 +1207,14 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], */ #ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ +do {\ %% [5.0] fread()/read() definition of YY_INPUT goes here unless we're doing C++ \ \ %if-c++-only C++ definition \ if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); -%endif - + YY_FATAL_ERROR( "input in flex scanner failed" ); \ +%endif \ +} while(0) #endif ]]) @@ -1282,7 +1355,7 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], YY_DECL { yy_state_type yy_current_state; - char *yy_cp, *yy_bp; + YY_CHAR *yy_cp, *yy_bp; int yy_act; M4_YY_DECL_GUTS_VAR(); @@ -1418,7 +1491,7 @@ do_action: /* This label is used only to access EOF actions. */ * end-of-buffer state). Contrast this with the test * in input(). */ - if ( YY_G(yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[YY_G(yy_n_chars)] ) + if ( ((YY_CHAR*)(YY_G(yy_c_buf_p))) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[YY_G(yy_n_chars)] ) { /* This was really a NUL. */ yy_state_type yy_next_state; @@ -1496,8 +1569,9 @@ do_action: /* This label is used only to access EOF actions. */ goto yy_match; case EOB_ACT_LAST_MATCH: - YY_G(yy_c_buf_p) = - &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[YY_G(yy_n_chars)]; + YY_G(yy_c_buf_p) = ((YY_CHAR*)( + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[YY_G(yy_n_chars)] + )); yy_current_state = yy_get_previous_state( M4_YY_CALL_ONLY_ARG ); @@ -1616,6 +1690,57 @@ void yyFlexLexer::LexerOutput( const char* buf, int size ) %ok-for-header %endif +m4_ifdef( [[M4_YY_NOT_IN_HEADER]],[[ +m4_ifdef( [[M4_YY_CHARSET]],[[ +/* yycharset_convert - convert incoming data from arbitrary + * charset into internal representation + */ +%if-c-only +static size_t yycharset_convert YYFARGS5( + char*, source, size_t, source_bytes, + YY_CHAR*, target, size_t, target_length, + size_t*, converted_bytes) { +%endif +%if-c++-only +size_t yyFlexLexer::yycharset_convert( + char* source, size_t source_bytes, + YY_CHAR* target, size_t target_length, + size_t* converted_bytes) { +%endif + M4_YY_DECL_GUTS_VAR(); + if(strcmp(yycharset, "M4_YY_CHARSET_SOURCE")==0) { + if(target_length < source_bytes) + YY_FATAL_ERROR("Too small buffer"); + strncpy((char*)target, source, source_bytes); + *converted_bytes = source_bytes; + return source_bytes; + } +%if-c-only + else if(yycharset_handler) +%endif + return yycharset_handler(yycharset, source, source_bytes, + target, target_length, converted_bytes M4_YY_CALL_LAST_ARG); + +/* Code below just outputs an error message saying that selected encoding + * is not supported. In C scanner it is an end part of yycharset_convert, + * while in C++ scanner it is a default implementation of yycharset_handler */ +%if-c++-only +} + +size_t yyFlexLexer::yycharset_handler(char *charset, + char *source, size_t source_bytes, + YY_CHAR *target, size_t target_length, + size_t *converted_bytes) { +%endif + char msg[256]; + snprintf(msg, sizeof(msg), + "Unsupported character encoding: %s", yycharset); + YY_FATAL_ERROR(msg); + return 0; +} +]]) +]]) + m4_ifdef( [[M4_YY_NOT_IN_HEADER]], [[ /* yy_get_next_buffer - try to read in a new buffer @@ -1633,12 +1758,12 @@ int yyFlexLexer::yy_get_next_buffer() %endif { M4_YY_DECL_GUTS_VAR(); - char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; - char *source = YY_G(yytext_ptr); + YY_CHAR *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + YY_CHAR *source = YY_G(yytext_ptr); int number_to_move, i; int ret_val; - if ( YY_G(yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[YY_G(yy_n_chars) + 1] ) + if ( YY_G(yy_c_buf_p) > ((YY_CHAR*)( &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[YY_G(yy_n_chars) + 1])) ) YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); @@ -1692,7 +1817,7 @@ m4_ifdef( [[M4_YY_USES_REJECT]], YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; int yy_c_buf_p_offset = - (int) (YY_G(yy_c_buf_p) - b->yy_ch_buf); + (int) (YY_G(yy_c_buf_p) - ((YY_CHAR*)(b->yy_ch_buf)) ); if ( b->yy_is_our_buffer ) { @@ -1703,10 +1828,10 @@ m4_ifdef( [[M4_YY_USES_REJECT]], else b->yy_buf_size *= 2; - b->yy_ch_buf = (char *) + b->yy_ch_buf = (YY_CHAR *) /* Include room in for 2 EOB chars. */ yyrealloc( (void *) b->yy_ch_buf, - b->yy_buf_size + 2 M4_YY_CALL_LAST_ARG ); + (b->yy_buf_size + 2)*sizeof(YY_CHAR) M4_YY_CALL_LAST_ARG ); } else /* Can't grow it, we don't own it. */ @@ -1727,6 +1852,43 @@ m4_ifdef( [[M4_YY_USES_REJECT]], num_to_read = YY_READ_BUF_SIZE; /* Read in more data. */ +m4_ifdef([[M4_YY_CHARSET]],[[ + if(yycharset) { + const size_t max_size = YY_READ_BUF_SIZE * sizeof(YY_CHAR); + char buffer[max_size]; + memcpy(buffer, YY_CURRENT_BUFFER_LVALUE->yy_input_buffer, + YY_CURRENT_BUFFER_LVALUE->yy_input_buffer_length); + + size_t read_bytes, converted_characters; + YY_INPUT( + (&buffer[YY_CURRENT_BUFFER_LVALUE->yy_input_buffer_length]), + (read_bytes), + (max_size-YY_CURRENT_BUFFER_LVALUE->yy_input_buffer_length) + ); + size_t converted_bytes = 0; + converted_characters = yycharset_convert( + buffer, + YY_CURRENT_BUFFER_LVALUE->yy_input_buffer_length+read_bytes, + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move], + num_to_read, + &converted_bytes M4_YY_CALL_LAST_ARG); + if(converted_characters == 0 && read_bytes != 0) + YY_FATAL_ERROR("Could not convert input characters"); + YY_G(yy_n_chars) = converted_characters; + + /* store left bytes in yy_input_buffer */ + YY_CURRENT_BUFFER_LVALUE->yy_input_buffer_length += + read_bytes-converted_bytes; + + YY_CURRENT_BUFFER_LVALUE->yy_input_buffer = (char*)yyrealloc( + (void*) YY_CURRENT_BUFFER_LVALUE->yy_input_buffer, + YY_CURRENT_BUFFER_LVALUE->yy_input_buffer_length + M4_YY_CALL_LAST_ARG); + memcpy(YY_CURRENT_BUFFER_LVALUE->yy_input_buffer, + &buffer[converted_bytes], + YY_CURRENT_BUFFER_LVALUE->yy_input_buffer_length); + } else +]]) YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), YY_G(yy_n_chars), num_to_read ); @@ -1755,8 +1917,8 @@ m4_ifdef( [[M4_YY_USES_REJECT]], if ((yy_size_t) (YY_G(yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { /* Extend the array by 50%, plus the number we really need. */ yy_size_t new_size = YY_G(yy_n_chars) + number_to_move + (YY_G(yy_n_chars) >> 1); - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( - (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, new_size M4_YY_CALL_LAST_ARG ); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (YY_CHAR *) yyrealloc( + (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, new_size*sizeof(YY_CHAR) M4_YY_CALL_LAST_ARG ); if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); } @@ -1782,7 +1944,7 @@ m4_ifdef( [[M4_YY_USES_REJECT]], %endif { yy_state_type yy_current_state; - char *yy_cp; + YY_CHAR *yy_cp; M4_YY_DECL_GUTS_VAR(); %% [15.0] code to get the start state into yy_current_state goes here @@ -1820,13 +1982,13 @@ m4_ifdef( [[M4_YY_USES_REJECT]], %if-c-only m4_ifdef( [[M4_YY_NO_UNPUT]],, [[ - static void yyunput YYFARGS2( int,c, char *,yy_bp) + static void yyunput YYFARGS2( int,c, YY_CHAR *,yy_bp) %endif %if-c++-only - void yyFlexLexer::yyunput( int c, char* yy_bp) + void yyFlexLexer::yyunput( int c, YY_CHAR* yy_bp) %endif { - char *yy_cp; + YY_CHAR *yy_cp; M4_YY_DECL_GUTS_VAR(); yy_cp = YY_G(yy_c_buf_p); @@ -1838,9 +2000,9 @@ m4_ifdef( [[M4_YY_NO_UNPUT]],, { /* need to shift things up to make room */ /* +2 for EOB chars. */ yy_size_t number_to_move = YY_G(yy_n_chars) + 2; - char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ + YY_CHAR *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2]; - char *source = + YY_CHAR *source = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]; while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) @@ -2062,7 +2224,7 @@ static void yy_load_buffer_state YYFARGS0(void) /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ - b->yy_ch_buf = (char *) yyalloc( b->yy_buf_size + 2 M4_YY_CALL_LAST_ARG ); + b->yy_ch_buf = (YY_CHAR *) yyalloc( (b->yy_buf_size + 2)*sizeof(YY_CHAR) M4_YY_CALL_LAST_ARG ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); @@ -2147,6 +2309,11 @@ m4_ifdef( [[M4_YY_ALWAYS_INTERACTIVE]], b->yy_is_interactive = 0; %endif errno = oerrno; + +m4_ifdef([[M4_YY_CHARSET]],[[ + b->yy_input_buffer = NULL; + b->yy_input_buffer_length = 0; +]]) } /** Discard all buffered characters. On the next scan, YY_INPUT will be called. @@ -2319,7 +2486,7 @@ m4_ifdef( [[M4_YY_NO_SCAN_BUFFER]],, * M4_YY_DOC_PARAM * @return the newly allocated buffer state object. */ -YY_BUFFER_STATE yy_scan_buffer YYFARGS2( char *,base, yy_size_t ,size) +YY_BUFFER_STATE yy_scan_buffer YYFARGS2( YY_CHAR *,base, yy_size_t ,size) { YY_BUFFER_STATE b; m4_dnl M4_YY_DECL_GUTS_VAR(); @@ -2386,14 +2553,14 @@ m4_ifdef( [[M4_YY_NO_SCAN_BYTES]],, YY_BUFFER_STATE yy_scan_bytes YYFARGS2( yyconst char *,yybytes, yy_size_t ,_yybytes_len) { YY_BUFFER_STATE b; - char *buf; + YY_CHAR *buf; yy_size_t n; yy_size_t i; m4_dnl M4_YY_DECL_GUTS_VAR(); /* Get memory for full buffer, including space for trailing EOB's. */ n = _yybytes_len + 2; - buf = (char *) yyalloc( n M4_YY_CALL_LAST_ARG ); + buf = (YY_CHAR *) yyalloc( n*sizeof(YY_CHAR) M4_YY_CALL_LAST_ARG ); if ( ! buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); @@ -2579,7 +2746,55 @@ int yyget_column YYFARGS0(void) } ]]) ]]) +%endif + +m4_ifdef([[M4_YY_CHARSET]],[[ + m4_ifdef([[M4_YY_REENTRANT]],[[ + + m4_ifdef([[M4_YY_NO_GET_CHARSET]],,[[ + m4_define([[M4_YY_GET_CHARSET]],[[ + char* yyget_charset YYFARGS0(void) + ]]) + ]]) + + m4_ifdef([[M4_YY_NO_GET_CHARSET_HANDLER]],,[[ + m4_define([[M4_YY_GET_CHARSET_HANDLER]],[[ + yycharset_handler_t yyget_charset_handler YYFARGS0(void) + ]]) + ]]) + ]]) + + m4_ifdef([[M4_YY_CXX]], [[ + m4_define( [[M4_YY_GET_CHARSET]], [[ + char *FlexLexer::get_charset() + ]]) + ]]) +]]) +m4_ifdef( [[M4_YY_GET_CHARSET]], [[ +/** Get the current charset name + * M4_YY_DOC_PARAM + * @return charset name + */ +M4_YY_GET_CHARSET +{ + M4_YY_DECL_GUTS_VAR(); + return yycharset; +} +]]) + +m4_ifdef( [[M4_YY_GET_CHARSET_HANDLER]],[[ +/** Get the currently set charset handler + * M4_YY_DOC_PARAM + */ +M4_YY_GET_CHARSET_HANDLER +{ + M4_YY_DECL_GUTS_VAR(); + return yycharset_handler; +} +]]) + +%if-c-only m4_ifdef( [[M4_YY_NO_GET_IN]],, [[ /** Get the input stream. @@ -2621,7 +2836,7 @@ yy_size_t yyget_leng YYFARGS0(void) */ m4_ifdef( [[M4_YY_NO_GET_TEXT]],, [[ -char *yyget_text YYFARGS0(void) +YY_CHAR *yyget_text YYFARGS0(void) { M4_YY_DECL_GUTS_VAR(); return yytext; @@ -2685,8 +2900,56 @@ void yyset_column YYFARGS1( int , column_no) } ]]) ]]) +%endif + +m4_ifdef([[M4_YY_CHARSET]], [[ + m4_ifdef([[M4_YY_REENTRANT]],[[ + + m4_ifdef([[M4_YY_NO_SET_CHARSET]],,[[ + m4_define([[M4_YY_SET_CHARSET]], [[ + void yyset_charset YYFARGS1(char*, charset) + ]]) + ]]) + + m4_ifdef([[M4_YY_NO_SET_CHARSET_HANDLER]],,[[ + m4_define([[M4_YY_SET_CHARSET_HANDLER]],[[ + void yyset_charset_handler YYFARGS1(yycharset_handler_t, charset_handler) + ]]) + ]]) + ]]) + + m4_ifdef( [[M4_YY_CXX]],[[ + m4_define( [[M4_YY_SET_CHARSET]], [[ + void FlexLexer::set_charset(char *charset) + ]]) + ]]) +]]) + +m4_ifdef( [[M4_YY_SET_CHARSET]],[[ +/** Set the current charset name + * @param charset charset name + * M4_YY_DOC_PARAM + */ +M4_YY_SET_CHARSET +{ + M4_YY_DECL_GUTS_VAR(); + yycharset = strdup(charset); +} +]]) +m4_ifdef( [[M4_YY_SET_CHARSET_HANDLER]],[[ +/** Set the current charset handler + * @param charset_handler handler function + * M4_YY_DOC_PARAM + */ +void yyset_charset_handler YYFARGS1( yycharset_handler_t, charset_handler) +{ + M4_YY_DECL_GUTS_VAR(); + yycharset_handler = charset_handler; +} +]]) +%if-c-only m4_ifdef( [[M4_YY_NO_SET_IN]],, [[ /** Set the input stream. This does not discard the current @@ -2875,7 +3138,7 @@ m4_ifdef( [[M4_YY_USE_LINENO]], YY_G(yy_buffer_stack) = 0; YY_G(yy_buffer_stack_top) = 0; YY_G(yy_buffer_stack_max) = 0; - YY_G(yy_c_buf_p) = (char *) 0; + YY_G(yy_c_buf_p) = (YY_CHAR *) 0; YY_G(yy_init) = 0; YY_G(yy_start) = 0; @@ -2901,6 +3164,11 @@ m4_ifdef( [[M4_YY_TEXT_IS_ARRAY]], YY_G(yy_prev_more_offset) = 0; ]]) +m4_ifdef( [[M4_YY_CHARSET]],[[ + yycharset = NULL; + yycharset_handler = NULL; +]]) + /* Defined in main.c */ #ifdef YY_STDINIT yyin = stdin; @@ -2972,7 +3240,7 @@ m4_ifdef( [[M4_YY_NOT_IN_HEADER]], m4_ifdef( [[M4_YY_NOT_IN_HEADER]], [[ #ifndef yytext_ptr -static void yy_flex_strncpy YYFARGS3( char*,s1, yyconst char *,s2, int,n) +static void yy_flex_strncpy YYFARGS3( YY_CHAR*,s1, yyconst YY_CHAR *,s2, int,n) { int i; for ( i = 0; i < n; ++i ) diff --git a/src/flexdef.h b/src/flexdef.h index 15b344d35..ad2aa7219 100644 --- a/src/flexdef.h +++ b/src/flexdef.h @@ -396,6 +396,7 @@ char *alloca (); * of what we think based on references to it in the user's actions. * reject_really_used - same for REJECT * trace_hex - use hexadecimal numbers in trace/debug outputs instead of octals + * charset_enabled - true if charset interface has been enabled */ extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, @@ -411,6 +412,7 @@ extern int yymore_used, reject, real_reject, continued_action, in_rule; extern int yymore_really_used, reject_really_used; extern int trace_hex; +extern bool charset_enabled; /* Variables used in the flex input routines: * datapos - characters on current output line @@ -425,6 +427,7 @@ extern int trace_hex; * infilename - name of input file * outfilename - name of output file * headerfilename - name of the .h file to generate + * headercharfilename - name of the .h file to fill with character type defintion * did_outfilename - whether outfilename was explicitly set * prefix - the prefix used for externally visible names ("yy" by default) * yyclass - yyFlexLexer subclass to use for YY_DECL @@ -434,6 +437,8 @@ extern int trace_hex; * num_input_files - size of input_files array * program_name - name with which program was invoked * + * charset_source - character set that has been declared as used in source file + * * action_array - array to hold the rule actions * action_size - size of action_array * defs1_offset - index where the user's section 1 definitions start @@ -448,7 +453,7 @@ extern int datapos, dataline, linenum; extern FILE *skelfile, *yyin, *backing_up_file; extern const char *skel[]; extern int skel_ind; -extern char *infilename, *outfilename, *headerfilename; +extern char *infilename, *outfilename, *headerfilename, *headercharfilename; extern int did_outfilename; extern char *prefix, *yyclass, *extra_type; extern int do_stdinit, use_stdout; @@ -456,6 +461,8 @@ extern char **input_files; extern int num_input_files; extern char *program_name; +extern char *charset_source; + extern char *action_array; extern int action_size; extern int defs1_offset, prolog_offset, action_offset, action_index; diff --git a/src/gen.c b/src/gen.c index 68125d7fb..c3776a8c2 100644 --- a/src/gen.c +++ b/src/gen.c @@ -938,7 +938,7 @@ void gen_NUL_trans () /* We're going to need yy_cp lying around for the call * below to gen_backing_up(). */ - indent_puts ("char *yy_cp = YY_G(yy_c_buf_p);"); + indent_puts ("YY_CHAR *yy_cp = YY_G(yy_c_buf_p);"); outc ('\n'); @@ -1687,7 +1687,7 @@ void make_tables () indent_puts ("static yy_state_type yy_last_accepting_state;"); indent_puts - ("static char *yy_last_accepting_cpos;\n"); + ("static YY_CHAR *yy_last_accepting_cpos;\n"); } } @@ -1759,7 +1759,7 @@ void make_tables () /* Declare state buffer variables. */ if (!C_plus_plus && !reentrant) { outn ("static yy_state_type *yy_state_buf=0, *yy_state_ptr=0;"); - outn ("static char *yy_full_match;"); + outn ("static YY_CHAR *yy_full_match;"); outn ("static int yy_lp;"); } @@ -1854,14 +1854,14 @@ void make_tables () outn ("#define YYLMAX 8192"); outn ("#endif\n"); if (!reentrant){ - outn ("char yytext[YYLMAX];"); - outn ("char *yytext_ptr;"); + outn ("YY_CHAR yytext[YYLMAX];"); + outn ("YY_CHAR *yytext_ptr;"); } } else { if(! reentrant) - outn ("char *yytext;"); + outn ("YY_CHAR *yytext;"); } } diff --git a/src/main.c b/src/main.c index 8b9f3ed33..6765e846f 100644 --- a/src/main.c +++ b/src/main.c @@ -64,7 +64,7 @@ int skel_ind = 0; char *action_array; int action_size, defs1_offset, prolog_offset, action_offset, action_index; -char *infilename = NULL, *outfilename = NULL, *headerfilename = NULL; +char *infilename = NULL, *outfilename = NULL, *headerfilename = NULL, *headercharfilename = NULL; int did_outfilename; char *prefix, *yyclass, *extra_type = NULL; int do_stdinit, use_stdout; @@ -100,13 +100,15 @@ int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; int num_backing_up, bol_needed; FILE *backing_up_file; +FILE *char_header_file = NULL; int end_of_buffer_state; char **input_files; int num_input_files; jmp_buf flex_main_jmp_buf; bool *rule_has_nl, *ccl_has_nl; int nlch = '\n'; -bool ansi_func_defs, ansi_func_protos; +bool ansi_func_defs, ansi_func_protos, charset_enabled = false; +char *charset_source = NULL; bool tablesext, tablesverify, gentables; char *tablesfilename=0,*tablesname=0; @@ -302,6 +304,8 @@ void check_options () if (C_plus_plus && bison_bridge_lval) flexerror (_("bison bridge not supported for the C++ scanner.")); + if(C_plus_plus) + buf_m4_define( &m4defs_buf, "M4_YY_CXX", NULL); if (useecs) { /* Set up doubly-linked equivalence classes. */ @@ -368,6 +372,42 @@ void check_options () filter_create_ext(output_chain, m4, "-P", 0); filter_create_int(output_chain, filter_fix_linedirs, NULL); + if(headerfilename && !headercharfilename) { + char *suffix = ".h"; + char *basesuffix = "_char"; + size_t suffix_len = strlen(suffix); + size_t basesuffix_len = strlen(basesuffix); + + size_t headerfilename_len = strlen(headerfilename); + + char *p = strstr(headerfilename, suffix); + if(p && headerfilename+headerfilename_len-suffix_len == p) + headerfilename_len -= suffix_len; /* cut out suffix */ + + headercharfilename = malloc(headerfilename_len+basesuffix_len+suffix_len+1); + p = headercharfilename; + + strncpy(p, headerfilename, headerfilename_len); + p += headerfilename_len; + + strncpy(p, basesuffix, basesuffix_len); + p += basesuffix_len; + + strncpy(p, suffix, suffix_len); + } + + /* Setup char header file */ + if(headercharfilename) { + char_header_file = fopen(headercharfilename, "w"); + fprintf(char_header_file, "#pragma once\n"); + fprintf(char_header_file, "\n"); + fflush(char_header_file); + + buf_strappend(&userdef_buf, "#include \""); + buf_strappend(&userdef_buf, headercharfilename); + buf_strappend(&userdef_buf, "\"\n"); + } + /* For debugging, only run the requested number of filters. */ if (preproc_level > 0) { filter_truncate(output_chain, preproc_level); @@ -440,6 +480,12 @@ void check_options () if (do_yylineno) buf_m4_define (&m4defs_buf, "M4_YY_USE_LINENO", NULL); + if(charset_enabled) + buf_m4_define(&m4defs_buf, "M4_YY_CHARSET", NULL); + + if(charset_source) + buf_m4_define(&m4defs_buf, "M4_YY_CHARSET_SOURCE", charset_source); + /* Create the alignment type. */ buf_strdefine (&userdef_buf, "YY_INT_ALIGNED", long_align ? "long int" : "short int"); @@ -500,6 +546,10 @@ void flexend (exit_status) if (++called_before) FLEX_EXIT (exit_status); + if(char_header_file && fclose (char_header_file)) + lerrsf (_("error closing char header file %s"), + headercharfilename); + if (skelfile != NULL) { if (ferror (skelfile)) lerrsf (_("input error reading skeleton file %s"), @@ -1461,6 +1511,13 @@ void readin () static char yy_stdinit[] = "FILE *yyin = stdin, *yyout = stdout;"; static char yy_nostdinit[] = "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;"; + static char character_type_uchar[] = "typedef unsigned char YY_CHAR;"; + static char character_type_char[] = "typedef char YY_CHAR;"; + static char character_defined[] = "#define YY_CHAR_DEFINED"; + + static char charset_handler_t[] = "typedef size_t(*yycharset_handler_t)(char*,char*,size_t,YY_CHAR*,size_t,size_t*);\n"; + static char charset_handler_t_reentrant[] = "typedef size_t(*yycharset_handler_t)(char*,char*,size_t,YY_CHAR*,size_t,size_t*,yyscan_t);\n"; + line_directive_out ((FILE *) 0, 1); @@ -1590,12 +1647,27 @@ void readin () outn ("\n#define FLEX_DEBUG"); OUT_BEGIN_CODE (); - if (csize == 256) - outn ("typedef unsigned char YY_CHAR;"); - else - outn ("typedef char YY_CHAR;"); + outn("#ifndef YY_CHAR_DEFINED"); + if (csize == 256) { + outn (character_type_uchar); + if(char_header_file) + fprintf(char_header_file, "%s\n", character_type_uchar); + } else { + outn (character_type_char); + if(char_header_file) + fprintf(char_header_file, "%s\n", character_type_char); + } + outn(character_defined); + outn("#endif"); + if(char_header_file) { + fprintf(char_header_file, "%s\n", character_defined); + fflush(char_header_file); + } OUT_END_CODE (); + if(charset_enabled) + outn ("#define YY_CHARSET"); + if (C_plus_plus) { outn ("#define yytext_ptr yytext"); @@ -1678,14 +1750,20 @@ void readin () */ if (yytext_is_array) { if (!reentrant) - outn ("extern char yytext[];\n"); + outn ("extern YY_CHAR yytext[];\n"); } else { + /* This prevents warning of "already defined macro" in multiple + * non-reentrant scanners */ + outn("#ifdef yytext_ptr"); + outn("#undef yytext_ptr"); + outn("#endif"); + if (reentrant) { outn ("#define yytext_ptr yytext_r"); } else { - outn ("extern char *yytext;"); + outn ("extern YY_CHAR *yytext;"); outn ("#define yytext_ptr yytext"); } } @@ -1695,6 +1773,22 @@ void readin () ("%option yyclass only meaningful for C++ scanners")); } + outn(""); + + if(charset_enabled) { + if(!reentrant) + outn(charset_handler_t); + else + outn(charset_handler_t_reentrant); + + OUT_BEGIN_CODE (); + if(!C_plus_plus && !reentrant) { + outn("char *yycharset = NULL;"); + outn("yycharset_handler_t yycharset_handler = NULL;"); + } + OUT_END_CODE (); + } + if (useecs) numecs = cre8ecs (nextecm, ecgroup, csize); else diff --git a/src/parse.y b/src/parse.y index d0cc70688..8dad46fcf 100644 --- a/src/parse.y +++ b/src/parse.y @@ -1,8 +1,8 @@ /* parse.y - parser for flex input */ %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP -%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE -%token OPT_TABLES +%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_HEADER_CHAR +%token OPT_EXTRA_TYPE OPT_TABLES OPT_CHARSET_SOURCE %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT @@ -204,8 +204,12 @@ option : OPT_OUTFILE '=' NAME { yyclass = copy_string( nmstr ); } | OPT_HEADER '=' NAME { headerfilename = copy_string( nmstr ); } + | OPT_HEADER_CHAR '=' NAME + { headercharfilename = copy_string( nmstr ); } | OPT_TABLES '=' NAME { tablesext = true; tablesfilename = copy_string( nmstr ); } + | OPT_CHARSET_SOURCE '=' NAME + { charset_source = copy_string(nmstr); } ; sect2 : sect2 scon initforrule flexrule '\n' diff --git a/src/scan.l b/src/scan.l index 5a15400b8..04a5f281f 100644 --- a/src/scan.l +++ b/src/scan.l @@ -349,6 +349,7 @@ M4QEND "]]" "c++" C_plus_plus = option_sense; caseful|case-sensitive sf_set_case_ins(!option_sense); caseless|case-insensitive sf_set_case_ins(option_sense); + charset charset_enabled = true; debug ddebug = option_sense; default spprdflt = ! option_sense; ecs useecs = option_sense; @@ -425,7 +426,9 @@ M4QEND "]]" prefix return OPT_PREFIX; yyclass return OPT_YYCLASS; header(-file)? return OPT_HEADER; + header-char(-file)? return OPT_HEADER_CHAR; tables-file return OPT_TABLES; + charset-source return OPT_CHARSET_SOURCE; tables-verify { tablesverify = option_sense; if(!tablesext && option_sense) diff --git a/tests/Makefile.am b/tests/Makefile.am index 25d8b0c73..d37661510 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -73,7 +73,8 @@ DIST_SUBDIRS = \ test-alloc-extra \ test-noansi-nr \ test-noansi-r \ - test-table-opts + test-table-opts \ + test-charset SUBDIRS = \ test-concatenated-options \ @@ -122,7 +123,9 @@ SUBDIRS = \ test-noansi-nr \ test-noansi-r \ test-top \ - test-table-opts + test-table-opts \ + test-tests_strutils \ + test-charset # clean up before running the test suite so we dont test old builds of test code diff --git a/tests/strutils.h b/tests/strutils.h new file mode 100644 index 000000000..88d918cb7 --- /dev/null +++ b/tests/strutils.h @@ -0,0 +1,122 @@ +/* This file is part of flex. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE + */ + +/* + * Those are utility functions that are reimplementations of some standard + * C functions for strings manipulation, but using YY_CHAR macro instead of + * raw char type. + * + * These functions are only used in some test suites, not in the flex itself. + */ + +#pragma once +#include +#include +#include +#include "strutils.h" + +size_t yy_utils_strlen(YY_CHAR* str) +{ + YY_CHAR *ptr = str; + while(*ptr != 0) ptr++; + return ptr-str; +} + +YY_CHAR* yy_utils_strcpy(YY_CHAR *destination, const YY_CHAR *source) +{ + YY_CHAR *ptr = destination; + while((*destination++ = *source++)); + return ptr; +} + +int yy_utils_strcmp(YY_CHAR *str1, YY_CHAR *str2) +{ + while(*str1 == *str2++) + if(*str1++ == 0) + return 0; + return *str1 - *(str2-1); +} + +YY_CHAR* yy_utils_strdup(YY_CHAR* s1) +{ + YY_CHAR* s2 = (YY_CHAR*)malloc((yy_utils_strlen(s1)+1)*sizeof(YY_CHAR)); + yy_utils_strcpy(s2, s1); + return s2; +} + +bool yy_utils_isany(YY_CHAR ch, const YY_CHAR *set, size_t set_size) { + size_t i; + for(i = 0; i < set_size; ++i) + if(set[i] == ch) + return true; + return false; +} + +bool yy_utils_isspace(YY_CHAR ch) { + static const YY_CHAR set[] = {' ', '\r', '\n', '\t'}; + return yy_utils_isany(ch, set, sizeof(set)/sizeof(set[0])); +} + +bool yy_utils_isupper(YY_CHAR ch) { + static const YY_CHAR set[] = {'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'}; + return yy_utils_isany(ch, set, sizeof(set)/sizeof(set[0])); +} + +bool yy_utils_islower(YY_CHAR ch) { + static const YY_CHAR set[] = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'}; + return yy_utils_isany(ch, set, sizeof(set)/sizeof(set[0])); +} + +bool yy_utils_isdigit(YY_CHAR ch) { + static const YY_CHAR set[] = {'0','1','2','3','4','5','6','7','8','9'}; + return yy_utils_isany(ch, set, sizeof(set)/sizeof(set[0])); +} + +long int yy_utils_strtol(const YY_CHAR *str, YY_CHAR **endptr, int base) +{ + while(yy_utils_isspace(*str)) str++; + bool neg = false; + if(*str == '-') neg = true, str++; + else if(*str == '+') str++; + YY_CHAR ch; + long int n = 0; + while((ch = *str++)) { + long int v = -1; + if(yy_utils_isdigit(ch)) + v = ch-'0'; + else if(yy_utils_isupper(ch)) + v = ch-'A'+10; + else if(yy_utils_islower(ch)) + v = ch-'a'+10; + else + break; + if(v >= base) + break; + n *= base; + n += v; + } + if(endptr) + *endptr = (YY_CHAR*)str-1; + if(neg) + n = -n; + return n; +} \ No newline at end of file diff --git a/tests/test-bison-nr/Makefile.am b/tests/test-bison-nr/Makefile.am index 9bc8af37a..3bde17f7a 100644 --- a/tests/test-bison-nr/Makefile.am +++ b/tests/test-bison-nr/Makefile.am @@ -28,7 +28,7 @@ EXTRA_DIST = scanner.l parser.y test.input main.c CLEANFILES = scanner.c scanner.h parser.c parser.h $(testname)$(EXEEXT) $(OBJS) OUTPUT OBJS = scanner.o parser.o main.o -AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_builddir) -I$(builddir) +AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_srcdir)/tests -I$(top_builddir) -I$(builddir) #LDFLAGS = $(top_srcdir)/libfl.a YFLAGS = --defines --output=parser.c --name-prefix="test" diff --git a/tests/test-bison-nr/main.c b/tests/test-bison-nr/main.c index 49dde7c69..7b07b0413 100644 --- a/tests/test-bison-nr/main.c +++ b/tests/test-bison-nr/main.c @@ -21,6 +21,7 @@ * PURPOSE. */ +#include "scanner_char.h" #include "parser.h" #include "scanner.h" diff --git a/tests/test-bison-nr/parser.y b/tests/test-bison-nr/parser.y index 2cbf9aad0..cf8c41649 100644 --- a/tests/test-bison-nr/parser.y +++ b/tests/test-bison-nr/parser.y @@ -30,6 +30,7 @@ #include #include #include "config.h" +#include "scanner_char.h" #define YYERROR_VERBOSE 1 /* #define YYPARSE_PARAM scanner */ @@ -40,7 +41,7 @@ extern int testget_lineno(void); /* A dummy function. A check against seg-faults in yylval->str. */ -int process_text(char* s) { +int process_text(YY_CHAR* s) { int total =0; while(*s) { total += (int) *s; @@ -56,7 +57,7 @@ int process_text(char* s) { %union { int lineno; - char * str; + YY_CHAR * str; } %token IDENT %token LINENO diff --git a/tests/test-bison-nr/scanner.l b/tests/test-bison-nr/scanner.l index 2510de6d6..9d3c6eafa 100644 --- a/tests/test-bison-nr/scanner.l +++ b/tests/test-bison-nr/scanner.l @@ -25,38 +25,32 @@ /* The scanner expects to link to bison yylval . */ #include #include +#include "strutils.h" #include "parser.h" #include "config.h" -static char* STRDUP(char* s1); #define YY_EXTRA_TYPE int %} %option 8bit outfile="scanner.c" prefix="test" %option bison-locations yylineno %option nomain nounput noyy_top_state noyywrap nodefault warn -%option prefix="test" header="scanner.h" yylineno +%option prefix="test" header="scanner.h" header-char="scanner_char.h" yylineno %% ^[[:digit:]]+ { yylval->lineno = yylineno; - yylloc->first_line = (int)strtol(yytext,NULL,10); + yylloc->first_line = (int)yy_utils_strtol(yytext,NULL,10); return LINENO; } ":" { return COLON; } " " { return SPACE; } "=" { return EQUAL; } -[[:alnum:]_]+ { yylval->str = STRDUP(yytext); return IDENT;} +[[:alnum:]_]+ { yylval->str = yy_utils_strdup(yytext); return IDENT;} \r|\n { } . { yyterminate();} %% -static char* STRDUP(char* s1) -{ - char* s2 = (char*)malloc(strlen(s1)+1); - sprintf(s2,"%s",s1); - return s2; -} diff --git a/tests/test-bison-yylloc/Makefile.am b/tests/test-bison-yylloc/Makefile.am index dc41b0fae..12269a62b 100644 --- a/tests/test-bison-yylloc/Makefile.am +++ b/tests/test-bison-yylloc/Makefile.am @@ -28,7 +28,7 @@ EXTRA_DIST = scanner.l parser.y test.input main.c CLEANFILES = scanner.c scanner.h parser.c parser.h $(testname)$(EXEEXT) $(OBJS) OUTPUT OBJS = scanner.o parser.o main.o -AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_builddir) -I$(builddir) +AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_srcdir)/tests -I$(top_builddir) -I$(builddir) #LDFLAGS = $(top_srcdir)/libfl.a YFLAGS = --defines --output=parser.c --name-prefix="test" diff --git a/tests/test-bison-yylloc/main.c b/tests/test-bison-yylloc/main.c index 24568a936..c463c7aac 100644 --- a/tests/test-bison-yylloc/main.c +++ b/tests/test-bison-yylloc/main.c @@ -21,6 +21,7 @@ * PURPOSE. */ +#include "scanner_char.h" #include "parser.h" #include "scanner.h" diff --git a/tests/test-bison-yylloc/parser.y b/tests/test-bison-yylloc/parser.y index e8f4e56ba..f3cdd0dfa 100644 --- a/tests/test-bison-yylloc/parser.y +++ b/tests/test-bison-yylloc/parser.y @@ -32,6 +32,7 @@ #include #include #include "config.h" +#include "scanner_char.h" #define YYERROR_VERBOSE 1 #define YYLEX_PARAM scanner @@ -40,7 +41,7 @@ extern int testget_lineno(void*); /* A dummy function. A check against seg-faults in yylval->str. */ -int process_text(char* s) { +int process_text(YY_CHAR* s) { int total =0; while(*s) { total += (int) *s; @@ -56,7 +57,7 @@ int process_text(char* s) { %union { int lineno; - char * str; + YY_CHAR * str; } %token IDENT %token LINENO diff --git a/tests/test-bison-yylloc/scanner.l b/tests/test-bison-yylloc/scanner.l index aaf6fd72f..74dc9b725 100644 --- a/tests/test-bison-yylloc/scanner.l +++ b/tests/test-bison-yylloc/scanner.l @@ -27,14 +27,14 @@ #include #include "parser.h" #include "config.h" -static char* STRDUP(char* s1); +#include "strutils.h" #define YY_EXTRA_TYPE int %} %option 8bit outfile="scanner.c" prefix="test" %option reentrant bison-bridge bison-locations yylineno %option nomain nounput noyy_top_state noyywrap nodefault warn -%option prefix="test" header="scanner.h" +%option prefix="test" header="scanner.h" header-char="scanner_char.h" %% @@ -44,22 +44,14 @@ static char* STRDUP(char* s1); ^[[:digit:]]+ { yylval->lineno = yyextra++; - yylloc->first_line = (int)strtol(yytext,NULL,10); + yylloc->first_line = (int)yy_utils_strtol(yytext,NULL,10); return LINENO; } ":" { return COLON; } " " { return SPACE; } "=" { return EQUAL; } -[[:alnum:]_]+ { yylval->str = STRDUP(yytext); return IDENT;} +[[:alnum:]_]+ { yylval->str = yy_utils_strdup(yytext); return IDENT;} \r|\n { } . { yyterminate();} %% - - -static char* STRDUP(char* s1) -{ - char* s2 = (char*)malloc(strlen(s1)+1); - sprintf(s2,"%s",s1); - return s2; -} diff --git a/tests/test-bison-yylval/Makefile.am b/tests/test-bison-yylval/Makefile.am index d8e27f7bf..f0c498dab 100644 --- a/tests/test-bison-yylval/Makefile.am +++ b/tests/test-bison-yylval/Makefile.am @@ -28,7 +28,7 @@ EXTRA_DIST = scanner.l parser.y test.input main.c CLEANFILES = scanner.c scanner.h parser.c parser.h $(testname)$(EXEEXT) $(OBJS) OUTPUT OBJS = parser.o scanner.o main.o -AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_builddir) -I$(builddir) +AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_srcdir)/tests -I$(top_builddir) -I$(builddir) #LDFLAGS = $(top_srcdir)/libfl.a YFLAGS = --defines --output=parser.c --name-prefix="test" @@ -39,7 +39,7 @@ scanner.c: $(srcdir)/scanner.l scanner.h: scanner.c scanner.o: parser.h -parser.c: $(srcdir)/parser.y +parser.c: $(srcdir)/parser.y scanner.c $(BISON) $(YFLAGS) $< parser.h: parser.c diff --git a/tests/test-bison-yylval/main.c b/tests/test-bison-yylval/main.c index 30c43141a..30cb66a0f 100644 --- a/tests/test-bison-yylval/main.c +++ b/tests/test-bison-yylval/main.c @@ -21,6 +21,7 @@ * PURPOSE. */ +#include "scanner_char.h" #include "parser.h" #include "scanner.h" diff --git a/tests/test-bison-yylval/parser.y b/tests/test-bison-yylval/parser.y index 0ffdb8959..eb6637868 100644 --- a/tests/test-bison-yylval/parser.y +++ b/tests/test-bison-yylval/parser.y @@ -31,13 +31,14 @@ #include #include #include "config.h" +#include "scanner_char.h" #define YYERROR_VERBOSE 1 #define YYLEX_PARAM scanner /* A dummy function. A check against seg-faults in yylval->str. */ -int process_text(char* s) { +int process_text(YY_CHAR* s) { int total =0; while(*s) { total += (int) *s; @@ -53,7 +54,7 @@ int process_text(char* s) { %union { long unused; - char * str; + YY_CHAR * str; } %token TAGNAME TEXT diff --git a/tests/test-bison-yylval/scanner.l b/tests/test-bison-yylval/scanner.l index 7e902f764..9e15cfa80 100644 --- a/tests/test-bison-yylval/scanner.l +++ b/tests/test-bison-yylval/scanner.l @@ -27,7 +27,7 @@ #include #include "parser.h" #include "config.h" -static char* STRDUP(char* s1); +#include "strutils.h" enum yesno_t { no=0, yes=1 }; #define YY_EXTRA_TYPE enum yesno_t @@ -36,7 +36,7 @@ enum yesno_t { no=0, yes=1 }; %option 8bit outfile="scanner.c" prefix="test" %option reentrant bison-bridge %option noyywrap nomain nounput noyy_top_state noyywrap nodefault warn -%option prefix="test" header="scanner.h" +%option prefix="test" header="scanner.h" header-char="scanner_char.h" %option stack @@ -52,14 +52,14 @@ enum yesno_t { no=0, yes=1 }; "str = STRDUP(yytext); return TEXT;} +[^<]{1,512} { yyget_lval(yyscanner)->str = yy_utils_strdup(yytext); return TEXT;} } { ">" { yy_pop_state( yyscanner ); return GT; } [[:alpha:]][[:alnum:]]* { if( NEED_TAG_NAME == yes){ NEED_TAG_NAME=no; - yylval->str = STRDUP(yytext); + yylval->str = yy_utils_strdup(yytext); return TAGNAME; } } @@ -71,11 +71,3 @@ enum yesno_t { no=0, yes=1 }; ">" { yy_pop_state(yyscanner);} } %% - - -static char* STRDUP(char* s1) -{ - char* s2 = (char*)malloc(strlen(s1)+1); - sprintf(s2,"%s",s1); - return s2; -} diff --git a/tests/test-charset/Makefile.am b/tests/test-charset/Makefile.am new file mode 100644 index 000000000..79745eb5c --- /dev/null +++ b/tests/test-charset/Makefile.am @@ -0,0 +1,68 @@ +# This file is part of flex. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: + +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. + +# Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE. + + +FLEX = $(top_builddir)/src/flex + +cases := ISO-8859-1 EBCDIC-500 CP850 + +variants := r nr +test_variants := $(foreach variant,$(variants), test-charset-$(variant)-test) + +LFILES := $(foreach VARIANT, $(VARIANTS), scanner-$(VARIANT).l) +CFILES := $(foreach VARIANT, $(VARIANTS), scanner-$(VARIANT).c) +EXEFILES := $(foreach VARIANT, $(VARIANTS), test-charset-$(VARIANT)$(EXEEXT)) + +test_case_files := $(foreach case,$(cases), test-$(case).input test-$(case).output) + +EXTRA_DIST = scanner.l.in $(test_case_files) +CLEANFILES = $(LFILES) $(CFILES) $(EXEFILES) + + +AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir)/src -I$(top_builddir) + +scanner-nr.l: $(srcdir)/scanner.l.in + m4 -P $< > $@ + +scanner-r.l: $(srcdir)/scanner.l.in + m4 -P -DVARIANT_REENTRANT=1 $< > $@ + +scanner-cxx.l: $(srcdir)/scanner.l.in + m4 -P -DVARIANT_CPLUSPLUS=1 $< > $@ + +scanner-%.c: scanner-%.l + $(FLEX) -o $@ $< + +scanner.cpp: scanner-cxx.l + $(FLEX) -o $@ $< + +test-charset-%$(EXEEXT): scanner-%.c + $(CC) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) -o $@ $(LDFLAGS) $< $(LOADLIBES) + +test-charset-cpp$(EXEEXT): scanner.cpp + $(CXX) $(AM_CPPFLAGS) $(CPPFLAGS) $(CXXFLAGS) -o $@ $(LDFLAGS) $< $(LOADLIBES) + +test-charset-%-test: test-charset-%$(EXEEXT) test-charset-cpp + for c in $(cases) ; do \ + (./$(<) $$c < $(srcdir)/test-$$c.input | diff -au - $(srcdir)/test-$$c.output) || (echo "Test $$exe failed in $$c case"; exit 1 ); \ + done + +test: $(test_variants) \ No newline at end of file diff --git a/tests/test-charset/scanner.l.in b/tests/test-charset/scanner.l.in new file mode 100644 index 000000000..bf9fb2896 --- /dev/null +++ b/tests/test-charset/scanner.l.in @@ -0,0 +1,214 @@ +/* + * This file is part of flex. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + +/* + * This test present ability to handle various encodings of incoming data + * by that same compiled parser. Test verifies ability to properly handle + * files in three encodings. + * + * This scanner file is written in ISO-8859-1 (LATIN-1), and this is also the + * first readable charset for generated scanner. Second and third, EBCDIC-500 + * and CP850 are processed in charset_handler function provided in this .l file. + * + * This file generates both reentrant and no-reentrant scanners by being + * preprocessed by m4 with or without VARIANT_REENTRANT value defined. + */ + +%{ +#include +#include +#include +#include "config.h" + +m4_ifdef(`VARIANT_REENTRANT', `#define VARIANT_REENTRANT 1', `') +m4_ifdef(`VARIANT_CPLUSPLUS', `#define VARIANT_CPLUSPLUS 1', `') + +#if VARIANT_CPLUSPLUS +#define out(str) (*yyout << str) +#else +#define out(str) (fprintf(yyout, str)) +#endif + +%} + +%option 8bit prefix="test" charset-source="ISO-8859-1" +%option nounput nomain noinput noyywrap charset +%option warn + +m4_ifdef(`VARIANT_REENTRANT', `%option reentrant', `') +m4_ifdef(`VARIANT_CPLUSPLUS', `%option c++') + +%% + +[A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ] { out("U"); } +[a-zàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿß] { out("L"); } +[0-9] { out("N"); } + +%% + +#if VARIANT_CPLUSPLUS +class TestFlexLexer: public yyFlexLexer { +public: +#endif +/* + * The function provided by scanner to handle encodings. It gets set of incoming + * bytes and convert into set of characters in internal representation - in the + * case of 7/8bit parsers, internal representation is always the same as + * "charset-source" + * + * charset - charset name (in ASCII) + * source - incoming bytes + * source_bytes - count of incoming bytes + * target - where to place output characters + * target_length - maximum number of characters (in YY_CHARs) that can be placed + * in "target" buffer + * converted_bytes - pointer to variable that should be set to number of bytes + * that has been properly converted from "source" buffer + * + * RETURNS: number of characters that has been written into "target" buffer. + * Must not be greater than value of "target_length" parameter. + */ +#if VARIANT_CPLUSPLUS +size_t yycharset_handler(char *charset, char* source, size_t source_bytes, + YY_CHAR* target, size_t target_length, size_t* converted_bytes) +#else +size_t charset_handler(char *charset, char* source, size_t source_bytes, + YY_CHAR* target, size_t target_length, size_t* converted_bytes +#if VARIANT_REENTRANT + , yyscan_t yyscanner +#endif + ) +#endif +{ + /* conversion from CP850 to ISO-8859-1. Unrepresentable values are set to -1 */ + static int conversion_table_cp850[256] = { +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + +0xC7, 0xFC, 0xE9, 0xE2, 0xE4, 0xE0, 0xE5, 0xE7, 0xEA, 0xEB, 0xE8, 0xEF, 0xEE, 0xEC, 0xC4, 0xC5, +0xC9, 0xE6, 0xC6, 0xF4, 0xF6, 0xF2, 0xFB, 0xF9, 0xFF, 0xD6, 0xDC, 0xF8, 0xA3, 0xD8, 0xD7, -1, +0xE1, 0xED, 0xF3, 0xFA, 0xF1, 0xD1, 0xAA, 0xBA, 0xBF, 0xAE, 0xAC, 0xBD, 0xBC, 0xA1, 0xAB, 0xBB, + -1, -1, -1, -1, -1, 0xC1, 0xC2, 0xC0, 0xA9, -1, -1, -1, -1, 0xA2, 0xA5, -1, + -1, -1, -1, -1, -1, -1, 0xE3, 0xC3, -1, -1, -1, -1, -1, -1, -1, 0xA4, +0xF0, 0xD0, 0xCA, 0xCB, 0xC8, -1, 0xCD, 0xCE, 0xCF, -1, -1, -1, -1, 0xA6, 0xCC, -1, +0xD3, 0xDF, 0xD4, 0xD2, 0xF5, 0xD5, 0xB5, 0xFE, 0xDE, 0xDA, 0xDB, 0xD9, 0xFD, 0xDD, 0xAF, 0xB4, +0xAD, 0xB1, -1, 0xBE, 0xB6, 0xA7, 0xF7, 0xB8, 0xB0, 0xA8, 0xB7, 0xB9, 0xB3, 0xB2, -1, 0xA0 + }; + + /* conversion from EBCDIC-500 to ISO-8859-1. Unrepresentable values are set to -1 */ + static int conversion_table_ebcdic500[256] = { +0x00, 0x01, 0x02, 0x03, -1, 0x09, -1, 0x7F, -1, -1, -1, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, +0x10, 0x11, 0x12, 0x13, -1, 0x85, 0x08, -1, 0x18, 0x19, -1, -1, 0x1C, 0x1D, 0x1E, 0x1F, + -1, -1, -1, -1, -1, 0x0A, 0x17, 0x1B, -1, -1, -1, -1, -1, 0x05, 0x06, 0x07, + -1, -1, 0x16, -1, -1, -1, -1, 0x04, -1, -1, -1, -1, 0x14, 0x15, -1, 0x1A, +0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21, +0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E, +0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, +0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, + +0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1, +0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4, +0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE, +0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x7C, 0xAF, 0xA8, 0xB4, 0xD7, +0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5, +0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF, +0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5, +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, -1 + }; + + static int *cnv = NULL; + if(strcmp(charset, "CP850") == 0) + cnv = conversion_table_cp850; + else if(strcmp(charset, "EBCDIC-500") == 0) + cnv = conversion_table_ebcdic500; + else + YY_FATAL_ERROR("Unknown encoding"); + + if(target_length < source_bytes) + YY_FATAL_ERROR("Too small buffer"); + int i; + for(i = 0; i < source_bytes; ++i) { + char in = source[i]; + int ch = cnv[(unsigned char)in]; + if(ch == -1) { + char msg[256]; + snprintf(msg, sizeof(msg), "Unsupported byte 0x%x", + (unsigned int)(unsigned char)in); + YY_FATAL_ERROR(msg); + } + target[i] = ch; + } + *converted_bytes = source_bytes; + return source_bytes; +} + +#if VARIANT_CPLUSPLUS +}; +#endif + +int main (int argc, char *argv[]) +{ + if(argc < 2) { + fprintf(stderr, "USAGE: %s [CHARSET]", argv[0]); + return 1; + } + char *charset = argv[1]; + +#if VARIANT_CPLUSPLUS + TestFlexLexer lexer; + lexer.set_charset(charset); + assert(strcmp(lexer.get_charset(), charset)==0); + lexer.yylex(); +#elif VARIANT_REENTRANT + yyscan_t lexer; + + yylex_init(&lexer); + + yyset_in(stdin, lexer); + yyset_out(stdout, lexer); + + yyset_charset(charset, lexer); + assert(strcmp(yyget_charset(lexer), charset)==0); + + yyset_charset_handler(charset_handler, lexer); + assert(yyget_charset_handler(lexer) == charset_handler); + + yylex( lexer ); + + yylex_destroy( lexer); +#else + yyin = stdin; + yyout = stdout; + yycharset = charset; + yycharset_handler = charset_handler; + + yylex(); +#endif + return 0; +} diff --git a/tests/test-charset/test-CP850.input b/tests/test-charset/test-CP850.input new file mode 100644 index 000000000..daadd8e92 --- /dev/null +++ b/tests/test-charset/test-CP850.input @@ -0,0 +1 @@ +0123456789AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz·…µ ¶ƒÇÆŽ„†’‘€‡ÔŠ‚ÒˆÓ‰Þ֡׌؋ÑÐ¥¤ã•à¢â“åä™”›ë—é£ê–šíìèç˜á \ No newline at end of file diff --git a/tests/test-charset/test-CP850.output b/tests/test-charset/test-CP850.output new file mode 100644 index 000000000..e5c7e197f --- /dev/null +++ b/tests/test-charset/test-CP850.output @@ -0,0 +1 @@ +NNNNNNNNNNULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULLL \ No newline at end of file diff --git a/tests/test-charset/test-EBCDIC-500.input b/tests/test-charset/test-EBCDIC-500.input new file mode 100644 index 000000000..5637da5d3 --- /dev/null +++ b/tests/test-charset/test-EBCDIC-500.input @@ -0,0 +1 @@ +ðñòóôõö÷øùÁ‚ÃĄŅƆLJȈɉёҒӓԔՕ֖חؘٙâ¢ã£ä¤å¥æ¦ç§è¨é©dDeEbBfFcCgGžœhHtTqQrRsSxXuUvVwW¬ŒiIíÍîÎëËïÏìÌ€pýÝþÞûÛüÜ­®ŽßY \ No newline at end of file diff --git a/tests/test-charset/test-EBCDIC-500.output b/tests/test-charset/test-EBCDIC-500.output new file mode 100644 index 000000000..e5c7e197f --- /dev/null +++ b/tests/test-charset/test-EBCDIC-500.output @@ -0,0 +1 @@ +NNNNNNNNNNULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULLL \ No newline at end of file diff --git a/tests/test-charset/test-ISO-8859-1.input b/tests/test-charset/test-ISO-8859-1.input new file mode 100644 index 000000000..38258c732 --- /dev/null +++ b/tests/test-charset/test-ISO-8859-1.input @@ -0,0 +1 @@ +0123456789AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZzÀàÁáÂâÃãÄäÅ寿ÇçÈèÉéÊêËëÌìÍíÎîÏïÐðÑñÒòÓóÔôÕõÖöØøÙùÚúÛûÜüÝýÞþÿß \ No newline at end of file diff --git a/tests/test-charset/test-ISO-8859-1.output b/tests/test-charset/test-ISO-8859-1.output new file mode 100644 index 000000000..e5c7e197f --- /dev/null +++ b/tests/test-charset/test-ISO-8859-1.output @@ -0,0 +1 @@ +NNNNNNNNNNULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULULLL \ No newline at end of file diff --git a/tests/test-header-r/main.c b/tests/test-header-r/main.c index 353a63ee2..7cf7c34de 100644 --- a/tests/test-header-r/main.c +++ b/tests/test-header-r/main.c @@ -43,7 +43,7 @@ main ( int argc, char** argv ) fp = testget_out(scanner); while(testlex(scanner)) { - char * text; + YY_CHAR * text; int line; line = testget_lineno(scanner); text = testget_text(scanner); diff --git a/tests/test-include-by-buffer/scanner.l b/tests/test-include-by-buffer/scanner.l index 322454340..7db3a6262 100644 --- a/tests/test-include-by-buffer/scanner.l +++ b/tests/test-include-by-buffer/scanner.l @@ -54,7 +54,7 @@ int include_stack_ptr = 0; /* recurse */ yytext[yyleng-1]='\0'; include_stack[include_stack_ptr++] = YY_CURRENT_BUFFER; - if((yyin=fopen(yytext,"r"))==NULL) { + if((yyin=fopen((char*)yytext,"r"))==NULL) { fprintf(stderr,"*** Error: Could not open include file \"%s\".\n",yytext); yyterminate(); } diff --git a/tests/test-include-by-push/scanner.l b/tests/test-include-by-push/scanner.l index 85561522d..ad7be5886 100644 --- a/tests/test-include-by-push/scanner.l +++ b/tests/test-include-by-push/scanner.l @@ -48,7 +48,7 @@ [[:alnum:]_.-]+> { /* recurse */ yytext[yyleng-1]='\0'; - if((yyin=fopen(yytext,"r"))==NULL) { + if((yyin=fopen((char*)yytext,"r"))==NULL) { fprintf(stderr,"*** Error: Could not open include file \"%s\".\n",yytext); yyterminate(); } diff --git a/tests/test-include-by-reentrant/scanner.l b/tests/test-include-by-reentrant/scanner.l index 8d2368138..bb1315a63 100644 --- a/tests/test-include-by-reentrant/scanner.l +++ b/tests/test-include-by-reentrant/scanner.l @@ -51,7 +51,7 @@ yyscan_t scanner; FILE * fp; yytext[yyleng-1]='\0'; - if((fp=fopen(yytext,"r"))==NULL) { + if((fp=fopen((char*)yytext,"r"))==NULL) { fprintf(stderr,"*** Error: Could not open include file \"%s\".\n", yytext); yyterminate(); diff --git a/tests/test-linedir-r/main.c b/tests/test-linedir-r/main.c index 6ba9808ac..a80ee8e1b 100644 --- a/tests/test-linedir-r/main.c +++ b/tests/test-linedir-r/main.c @@ -39,7 +39,7 @@ main ( int argc, char** argv ) fp = testget_out(scanner); while(testlex(scanner)) { - char * text; + YY_CHAR * text; int line; line = testget_lineno(scanner); text = testget_text(scanner); diff --git a/tests/test-prefix-nr/scanner.l b/tests/test-prefix-nr/scanner.l index 4497aa1ba..230f6b3c6 100644 --- a/tests/test-prefix-nr/scanner.l +++ b/tests/test-prefix-nr/scanner.l @@ -46,7 +46,7 @@ FOO_flush_buffer((YY_BUFFER_STATE)0); FOO_init_buffer((YY_BUFFER_STATE)0,(FILE*)0); FOO_load_buffer_state(); - FOO_scan_buffer((char*)0,(yy_size_t)0); + FOO_scan_buffer((YY_CHAR*)0,(yy_size_t)0); FOO_scan_bytes((yyconst char*)0, 0); FOO_scan_string((yyconst char*)0); FOO_switch_to_buffer((YY_BUFFER_STATE)0); @@ -55,7 +55,7 @@ yyleng = 0; yylex(); yyrestart((FILE*)0); - yytext = (char*)0; + yytext = (YY_CHAR*)0; } } %% diff --git a/tests/test-prefix-r/scanner.l b/tests/test-prefix-r/scanner.l index 4c2598287..66c79979c 100644 --- a/tests/test-prefix-r/scanner.l +++ b/tests/test-prefix-r/scanner.l @@ -47,7 +47,7 @@ FOO_flush_buffer( (YY_BUFFER_STATE)0, yyscanner); FOO_init_buffer( (YY_BUFFER_STATE)0, (FILE*)0, yyscanner); FOO_load_buffer_state( yyscanner); - FOO_scan_buffer( (char*)0, (yy_size_t)0, yyscanner); + FOO_scan_buffer( (YY_CHAR*)0, (yy_size_t)0, yyscanner); FOO_scan_bytes( (yyconst char*)0, 0, yyscanner); FOO_scan_string( (yyconst char*)0, yyscanner); FOO_switch_to_buffer( (YY_BUFFER_STATE)0, yyscanner); diff --git a/tests/test-string-nr/scanner.l b/tests/test-string-nr/scanner.l index 6ab5b548c..99958d19c 100644 --- a/tests/test-string-nr/scanner.l +++ b/tests/test-string-nr/scanner.l @@ -61,7 +61,7 @@ int main(void); int main () { - char * buf; + YY_CHAR* buf; int len; YY_BUFFER_STATE state; @@ -82,8 +82,8 @@ main () We make a copy, since the buffer will be modified by flex.*/ printf("Testing: yy_scan_buffer(%s): ",INPUT_STRING_1); fflush(stdout); len = strlen(INPUT_STRING_1) + 2; - buf = (char*)malloc( len ); - strcpy( buf, INPUT_STRING_1); + buf = (YY_CHAR*)malloc( len*sizeof(YY_CHAR) ); + strcpy( (char*)buf, INPUT_STRING_1); buf[ len -2 ] = 0; /* Flex requires two NUL bytes at end of buffer. */ buf[ len -1 ] =0; diff --git a/tests/test-string-r/scanner.l b/tests/test-string-r/scanner.l index 6b594d775..a543de859 100644 --- a/tests/test-string-r/scanner.l +++ b/tests/test-string-r/scanner.l @@ -61,7 +61,7 @@ int main(void); int main () { - char * buf; + YY_CHAR * buf; int len; YY_BUFFER_STATE state; yyscan_t scanner=NULL; @@ -87,8 +87,8 @@ main () We make a copy, since the buffer will be modified by flex.*/ printf("Testing: yy_scan_buffer(%s): ",INPUT_STRING_1); fflush(stdout); len = strlen(INPUT_STRING_1) + 2; - buf = (char*)malloc( len ); - strcpy( buf, INPUT_STRING_1); + buf = (YY_CHAR*)malloc( len*sizeof(YY_CHAR) ); + strcpy( (char*)buf, INPUT_STRING_1); buf[ len -2 ] = 0; /* Flex requires two NUL bytes at end of buffer. */ buf[ len -1 ] =0; diff --git a/tests/test-tests_strutils/Makefile.am b/tests/test-tests_strutils/Makefile.am new file mode 100644 index 000000000..56312a4f1 --- /dev/null +++ b/tests/test-tests_strutils/Makefile.am @@ -0,0 +1,35 @@ +# This file is part of flex. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: + +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. + +# Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE. + +testname = test-tests_strutils + +OBJS = test_strutils.o + +AM_CPPFLAGS = -I$(srcdir) -I$(top_srcdir)/tests + +$(testname)$(EXEEXT): $(OBJS) + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(OBJS) $(LOADLIBES) + +test: $(testname)$(EXEEXT) + ./$(testname)$(EXEEXT) + +.c.o: + $(CC) -c -o $@ $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $< diff --git a/tests/test-tests_strutils/test_strutils.c b/tests/test-tests_strutils/test_strutils.c new file mode 100644 index 000000000..805667d6f --- /dev/null +++ b/tests/test-tests_strutils/test_strutils.c @@ -0,0 +1,81 @@ +/* This file is part of flex. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE + */ + +/* + * This test verifies that utility functions in `tests/strutils.h` are working + * properly. + */ + +#include +#include + + +#define YY_CHAR uint32_t +#include "strutils.h" + +#define STR(x) (#x) + +#define CHECK(v) do{ \ + if(!(v)) { \ + fprintf(stderr, "Check failed: %s\n", STR(v)); \ + return 1; \ + } \ + }while(0) + +int main(int argc, char **argv) { + fprintf(stderr, "Starting strutils tests\n"); + + (void)argc; + (void)argv; + + YY_CHAR str[] = {'W', 'h', 'a', 't', ' ', 'a', ' ', 's', 't', 'r', 'i', 'n', 'g', '!', 0}; + + /* yy_utils_strlen */ + CHECK(yy_utils_strlen(str) == 14); + + /* yy_utils_strcpy, yy_utils_strcmp */ + YY_CHAR dst1[15]; + yy_utils_strcpy(dst1, str); + CHECK(yy_utils_strcmp(dst1, str) == 0); + + /* yy_utils_strdup */ + YY_CHAR *dst2 = yy_utils_strdup(str); + CHECK(yy_utils_strcmp(dst2, str) == 0); + free(dst2); + + /* yy_utils_strtol */ + YY_CHAR i1s[] = {' ', '\t', '\r', '\n', '1', '2', '3', 'a', 's', 'd', 0}; + YY_CHAR *endptr; + CHECK(yy_utils_strtol(i1s, &endptr, 10) == 123); + CHECK(endptr == i1s+7); + CHECK(yy_utils_strtol(i1s, &endptr, 16) == 0x123a); + CHECK(endptr == i1s+8); + + YY_CHAR i2s[] = {' ', '\t', '\r', '\n', '-', '1', '2', '3', 'a', 's', 'd', 0}; + CHECK(yy_utils_strtol(i2s, &endptr, 10) == -123); + CHECK(endptr == i2s+8); + CHECK(yy_utils_strtol(i2s, &endptr, 16) == -0x123a); + CHECK(endptr == i2s+9); + + fprintf(stderr, "Tests succeeded\n"); + return 0; +} \ No newline at end of file diff --git a/tests/test-top/main.c b/tests/test-top/main.c index 353a63ee2..7cf7c34de 100644 --- a/tests/test-top/main.c +++ b/tests/test-top/main.c @@ -43,7 +43,7 @@ main ( int argc, char** argv ) fp = testget_out(scanner); while(testlex(scanner)) { - char * text; + YY_CHAR * text; int line; line = testget_lineno(scanner); text = testget_text(scanner);