Parsing wide char string

A follow up change to parse the wide char string. It currently only parse and store it like normal strings. Need more change to reflect the base type and size etc. Signed-off-by: Christopher Li <sparse@chrisli.org>
author: Christopher Li <sparse@chrisli.org> 2010-06-17 17:08:09 -0700
committer: Christopher Li <sparse@chrisli.org> 2010-06-17 17:21:10 -0700
commit: 49adf11b99cfce04ddcae7be0a272cc2df31436d (patch)
tree: b2cf5af2e39f0162f555a943819875528834cfc1
parent: Allow parsing L'\0' (diff)
download: sparse-49adf11b99cfce04ddcae7be0a272cc2df31436d.tar.gz
sparse-49adf11b99cfce04ddcae7be0a272cc2df31436d.tar.bz2
sparse-49adf11b99cfce04ddcae7be0a272cc2df31436d.zip
5 files changed, 28 insertions, 15 deletions
diff --git a/expression.c b/expression.c
index 67e05e7..7e06e60 100644
--- a/expression.c
+++ b/expression.c
@@ -224,17 +224,18 @@ static struct token *string_expression(struct token *token, struct expression *e
 {
 	struct string *string = token->string;
 	struct token *next = token->next;
+	int stringtype = token_type(token);
 
 	convert_function(token);
 
-	if (token_type(next) == TOKEN_STRING) {
+	if (token_type(next) == stringtype) {
 		int totlen = string->length-1;
 		char *data;
 
 		do {
 			totlen += next->string->length-1;
 			next = next->next;
-		} while (token_type(next) == TOKEN_STRING);
+		} while (token_type(next) == stringtype);
 
 		if (totlen > MAX_STRING) {
 			warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
@@ -256,7 +257,7 @@ static struct token *string_expression(struct token *token, struct expression *e
 			next = next->next;
 			memcpy(data, s->data, len);
 			data += len;
-		} while (token_type(next) == TOKEN_STRING);
+		} while (token_type(next) == stringtype);
 		*data = '\0';
 	}
 	expr->string = string;
@@ -397,7 +398,7 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 
 	switch (token_type(token)) {
 	case TOKEN_CHAR:
-	case TOKEN_LONG_CHAR:
+	case TOKEN_WIDE_CHAR:
 		expr = alloc_expression(token->pos, EXPR_VALUE);   
 		expr->flags = Int_const_expr;
 		expr->ctype = token_type(token) == TOKEN_CHAR ? &int_ctype : &long_ctype;
@@ -464,9 +465,11 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 		break;
 	}
 
-	case TOKEN_STRING: {
+	case TOKEN_STRING:
+	case TOKEN_WIDE_STRING: {
 	handle_string:
 		expr = alloc_expression(token->pos, EXPR_STRING);
+		expr->wide = token_type(token) == TOKEN_WIDE_STRING;
 		token = string_expression(token, expr);
 		break;
 	}
diff --git a/expression.h b/expression.h
index 631224f..9778de8 100644
--- a/expression.h
+++ b/expression.h
@@ -76,7 +76,10 @@ struct expression {
 		long double fvalue;
 
 		// EXPR_STRING
-		struct string *string;
+		struct {
+			int wide;
+			struct string *string;
+		};
 
 		// EXPR_UNOP, EXPR_PREOP and EXPR_POSTOP
 		struct /* unop */ {
diff --git a/pre-process.c b/pre-process.c
index 058f24b..656acaa 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -864,10 +864,11 @@ static int token_different(struct token *t1, struct token *t2)
 		different = t1->argnum != t2->argnum;
 		break;
 	case TOKEN_CHAR:
-	case TOKEN_LONG_CHAR:
+	case TOKEN_WIDE_CHAR:
 		different = t1->character != t2->character;
 		break;
-	case TOKEN_STRING: {
+	case TOKEN_STRING:
+	case TOKEN_WIDE_STRING: {
 		struct string *s1, *s2;
 
 		s1 = t1->string;
diff --git a/token.h b/token.h
index c527e78..a7ec77e 100644
--- a/token.h
+++ b/token.h
@@ -67,8 +67,9 @@ enum token_type {
 	TOKEN_ZERO_IDENT,
 	TOKEN_NUMBER,
 	TOKEN_CHAR,
-	TOKEN_LONG_CHAR,
+	TOKEN_WIDE_CHAR,
 	TOKEN_STRING,
+	TOKEN_WIDE_STRING,
 	TOKEN_SPECIAL,
 	TOKEN_STREAMBEGIN,
 	TOKEN_STREAMEND,
diff --git a/tokenize.c b/tokenize.c
index cf05826..4c97517 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -137,6 +137,7 @@ const char *show_token(const struct token *token)
 		return show_ident(token->ident);
 
 	case TOKEN_STRING:
+	case TOKEN_WIDE_STRING:
 		return show_string(token->string);
 
 	case TOKEN_NUMBER:
@@ -146,7 +147,7 @@ const char *show_token(const struct token *token)
 		return show_special(token->special);
 
 	case TOKEN_CHAR: 
-	case TOKEN_LONG_CHAR: {
+	case TOKEN_WIDE_CHAR: {
 		char *ptr = buffer;
 		int c = token->character;
 		*ptr++ = '\'';
@@ -548,7 +549,7 @@ static int get_char_token(int next, stream_t *stream, enum token_type type)
 	return nextchar(stream);
 }
 
-static int get_string_token(int next, stream_t *stream)
+static int get_string_token(int next, stream_t *stream, enum token_type type)
 {
 	static char buffer[MAX_STRING];
 	struct string *string;
@@ -581,7 +582,7 @@ static int get_string_token(int next, stream_t *stream)
 
 	/* Pass it on.. */
 	token = stream->token;
-	token_type(token) = TOKEN_STRING;
+	token_type(token) = type;
 	token->string = string;
 	add_token(stream);
 	
@@ -701,7 +702,7 @@ static int get_one_special(int c, stream_t *stream)
 			return get_one_number(c, next, stream);
 		break;
 	case '"':
-		return get_string_token(next, stream);
+		return get_string_token(next, stream, TOKEN_STRING);
 	case '\'':
 		return get_char_token(next, stream, TOKEN_CHAR);
 	case '/':
@@ -881,8 +882,12 @@ static int get_one_identifier(int c, stream_t *stream)
 
 	ident = create_hashed_ident(buf, len, hash);
 
-	if (ident == &L_ident && next == '\'')
-		return get_char_token(nextchar(stream), stream, TOKEN_LONG_CHAR);
+	if (ident == &L_ident) {
+		if (next == '\'')
+			return get_char_token(nextchar(stream), stream, TOKEN_WIDE_CHAR);
+		if (next == '\"')
+			return get_string_token(nextchar(stream), stream, TOKEN_WIDE_STRING);
+	}
 
 	/* Pass it on.. */
 	token = stream->token;
author	Christopher Li <sparse@chrisli.org>	2010-06-17 17:08:09 -0700
committer	Christopher Li <sparse@chrisli.org>	2010-06-17 17:21:10 -0700
commit	49adf11b99cfce04ddcae7be0a272cc2df31436d (patch)
tree	b2cf5af2e39f0162f555a943819875528834cfc1
parent	Allow parsing L'\0' (diff)
download	sparse-49adf11b99cfce04ddcae7be0a272cc2df31436d.tar.gz sparse-49adf11b99cfce04ddcae7be0a272cc2df31436d.tar.bz2 sparse-49adf11b99cfce04ddcae7be0a272cc2df31436d.zip