This is an automated email from the git hooks/post-receive script.

git pushed a commit to branch master
in repository eshell.

View the commit online.

commit 76095c82c9091e38527c20da88bc5ce8c8f9f342
Author: Swagtoy <m...@ow.swag.toys>
AuthorDate: Wed Nov 20 02:55:38 2024 -0500

    lexer: Handle identifiers and fill up array of tokens
    
    And fix some bugs and of the sorts.
---
 escript/include/lexer.h | 23 ++++++----------
 escript/src/lexer.c     | 71 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/escript/include/lexer.h b/escript/include/lexer.h
index 14a7e26..049c97e 100644
--- a/escript/include/lexer.h
+++ b/escript/include/lexer.h
@@ -26,9 +26,7 @@ enum Escript_Lex_Keywords
 	ELEX_DOUBLE,
 	ELEX_NULL,
 	
-	ELEX_VAR,
-	ELEX_COMMAND,
-	ELEX_FUNCTION,
+	ELEX_IDENTIFIER,
 	ELEX_FUNCTION_DECL,
 	
 	// syntactical
@@ -75,29 +73,24 @@ enum Escript_Lex_Keywords
 struct Escript_Token_String
 {
 	char const* str;
-	int str_len;
+	size_t len;
 };
 
-union Escript_Token_Data
-{
-	struct Escript_Token_String const d_string; // it hurts!!
-	int const d_int;
-	double const d_double;
-};
-
-
 struct Escript_Token
 {
 	enum Escript_Lex_Keywords token; // union member
-	char unsigned is_keyword;
-	union Escript_Token_Data data;
+	union Escript_Token_Data {
+		struct Escript_Token_String d_string; // it hurts!!
+		int d_int;
+		double d_double;
+	} data;
 };
 
 struct Escript_Lexer
 {
 	char const* current;
 	size_t current_len;
-	Eina_Inarray* lex;
+	Eina_Inarray* tokens;
 };
 
 int escript_lexer_init(struct Escript_Lexer* lex);
diff --git a/escript/src/lexer.c b/escript/src/lexer.c
index e6aff56..2bd5fdc 100644
--- a/escript/src/lexer.c
+++ b/escript/src/lexer.c
@@ -15,16 +15,20 @@ int
 escript_lexer_init(struct Escript_Lexer* lex)
 {
 	// Initialize array
-	lex->lex = eina_inarray_new(sizeof(struct Escript_Token), 64);
+	lex->tokens = eina_inarray_new(sizeof(struct Escript_Token), 64);
 }
 
+// TODO going backwards code (not really encouraged)
 static char const*
-next_char(struct Escript_Lexer* lex, int step)
+step_char(struct Escript_Lexer* lex, int step)
 {
-	for (int i = 0; i < step + 1; ++i)
+	for (int i = 0; i <= step; ++i)
 	{
-		lex->current_len--;
-		lex->current++;
+		if (lex->current_len != 0)
+		{
+			lex->current_len--;
+			lex->current++;
+		}
 	}
 }
 
@@ -41,7 +45,7 @@ next_nb(struct Escript_Lexer* lex)
 		switch (at(0))
 		{
 			case ' ': case '\t':
-				next_char(lex, 0);
+				step_char(lex, 0);
 				break;
 			default:
 				return 1;
@@ -63,15 +67,17 @@ identifier_like_char(char x, int i)
 static Eina_Strbuf* const
 lex_identifier(struct Escript_Lexer* lex)
 {
-	// TODO maybe less allocations? unless strbuf is optimized for no insertions now
+	// TODO less allocations? in fact, we could probably just live off
+	//  of a fixed length buffer really. Any allocations are gross for
+	//  this task.
 	Eina_Strbuf* const res = eina_strbuf_new();
 	
 	while (identifier_like_char(at(0), 0/*TODO*/) && !eol(lex, 0))
 	{
 		eina_strbuf_append_char(res, at(0));
-		next_char(lex, 0);
+		step_char(lex, 0);
 	}
-	if (eina_strbuf_length_get(res))
+	if (eina_strbuf_length_get(res) == 0)
 		return NULL;
 	return res;
 }
@@ -79,33 +85,44 @@ lex_identifier(struct Escript_Lexer* lex)
 int
 escript_lexer_step(struct Escript_Lexer* lex)
 {
-	struct Escript_Token token = {
-		.is_keyword = 0
-	};
-	
+	struct Escript_Token token = { 0 };
+#define PUSH_TOKEN eina_inarray_push(lex->tokens, &token)
+#define PUSH_SIMPLE_TOKEN(x) token.token = (x); PUSH_TOKEN
 	next_nb(lex);
-	if (eol(lex, 1))
+	if (eol(lex, 0))
 		return 0;
 	switch (at(0))
 	{
+	case ';':
+		PUSH_SIMPLE_TOKEN(ELEX_SEMICOLON);
+		break;
 	case '$': // variable sigil
-		token.token = ELEX_SIGIL;
-		// TODO
-		eina_inarray_push(lex->lex, &token);
+		PUSH_SIMPLE_TOKEN(ELEX_SIGIL);
 		break;
 	default: // Raw keyword or something
-	{
-		Eina_Strbuf* ident = lex_identifier(lex);
-		if (ident)
 		{
-			printf("[%s]", eina_strbuf_string_get(ident));
-			eina_strbuf_free(ident);
+			Eina_Strbuf* ident = lex_identifier(lex);
+			if (ident)
+			{
+				printf("[%s]", eina_strbuf_string_get(ident));
+				token.token = ELEX_IDENTIFIER;
+				size_t len = eina_strbuf_length_get(ident);
+				token.data.d_string.str = eina_strbuf_string_steal(ident);
+				token.data.d_string.len = len;
+				PUSH_TOKEN;
+				goto skip_step;
+			}
+			else {
+				putchar(at(0));
+			}
 		}
 		break;
 	}
-	}
-	next_char(lex, 0);
-
+	step_char(lex, 0);
+skip_step:
+	return 0;
+#undef PUSH_SIMPLE_TOKEN
+#undef PUSH_TOKEN
 }
 
 int
@@ -120,7 +137,7 @@ escript_lexer_lex(struct Escript_Lexer* lex, char const* string, size_t string_l
 }
 
 void
-escript_lexer_cleanup(struct Escript_Lexer* cleanup)
+escript_lexer_cleanup(struct Escript_Lexer* lex)
 {
-	eina_inarray_free(cleanup->lex);
+	eina_inarray_free(lex->tokens);
 }

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.

Reply via email to