Skip to content

Replace stb_c_lexer.h with a custom Lexer written from scratch in Crust #70

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 2, 2025
Merged
Next Next commit
Outline the custom lexer
  • Loading branch information
rexim committed May 31, 2025
commit 77e496a97f7c4462143f4d739186c890516d928d
9 changes: 2 additions & 7 deletions src/b.rs
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ pub mod flag;
pub mod crust;
pub mod arena;
pub mod codegen;
pub mod lexer;

use core::ffi::*;
use core::mem::zeroed;
@@ -24,13 +25,7 @@ use flag::*;
use crust::libc::*;
use arena::Arena;
use codegen::{Target, name_of_target, TARGET_NAMES, target_by_name};

#[derive(Clone, Copy)]
pub struct Loc {
pub input_path: *const c_char,
pub line_number: c_int,
pub line_offset: c_int,
}
use lexer::*;

macro_rules! diagf {
($loc:expr, $($args:tt)*) => {{
1 change: 1 addition & 0 deletions src/crust.rs
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@ pub mod libc {
pub fn strdup(s: *const c_char) -> *mut c_char;
pub fn printf(fmt: *const c_char, ...) -> c_int;
pub fn fprintf(stream: *mut FILE, fmt: *const c_char, ...) -> c_int;
pub fn isspace(c: c_int) -> c_int;
}

// count is the amount of items, not bytes
121 changes: 121 additions & 0 deletions src/lexer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use core::ffi::*;
use crate::arena::*;
use crate::crust::libc::*;

#[derive(Clone, Copy)]
pub struct Loc {
pub input_path: *const c_char,
pub line_number: c_int,
pub line_offset: c_int,
}

#[derive(Clone, Copy)]
pub enum Token {
OCurly,
CCurly,
}

#[derive(Clone, Copy)]
pub struct Parse_Point {
pub current: *mut c_char,
pub line_start: *mut c_char,
pub line_number: usize,
}

#[derive(Clone, Copy)]
pub struct Lexer {
pub input_path: *mut c_char,
pub input_stream: *mut c_char,
pub eof: *mut c_char,
pub parse_point: Parse_Point,

pub string_storage: Arena,
pub token: Token,
pub string: *const c_char,
pub number: c_long,
pub loc: Loc,
}

pub unsafe fn skip_char(l: *mut Lexer) -> Option<c_char> {
skip_char_if(l, |_| true)
}

pub unsafe fn skip_chars(l: *mut Lexer, mut n: usize) {
while n > 0 {
if let None = skip_char(l) {
break;
}
n -= 1;
}
}

pub unsafe fn skip_char_if(l: *mut Lexer, p: unsafe fn(c_char) -> bool) -> Option<c_char> {
if is_eof(l) {
return None
}

let x = *(*l).parse_point.current;
if !p(x) {
return None
}

(*l).parse_point.current = (*l).parse_point.current.add(1);
if x == '\n' as c_char {
(*l).parse_point.line_start = (*l).parse_point.current;
(*l).parse_point.line_number += 1;
}
Some(x)
}

pub unsafe fn is_eof(l: *mut Lexer) -> bool {
(*l).parse_point.current >= (*l).eof
}

pub unsafe fn skip_whitespaces(l: *mut Lexer) {
while let Some(_) = skip_char_if(l, |x| isspace(x as i32) != 0) {}
}

pub const PUNCTS: *const [(*const c_char, Token)] = &[
(c!("{"), Token::OCurly),
(c!("}"), Token::CCurly),
];

pub unsafe fn skip_prefix(l: *mut Lexer, mut prefix: *const c_char) -> bool {
let saved_point = (*l).parse_point;
while *prefix != 0 {
let Some(x) = skip_char(l) else {
(*l).parse_point = saved_point;
return false;
};
if x != *prefix {
(*l).parse_point = saved_point;
return false;
}
prefix = prefix.add(1)
}
true
}

pub unsafe fn loc(_l: *mut Lexer) -> Loc {
todo!()
}

pub unsafe fn get_token(l: *mut Lexer) -> bool {
skip_whitespaces(l);

if is_eof(l) {
return false;
}

for i in 0..PUNCTS.len() {
let (prefix, token) = (*PUNCTS)[i];
let loc = loc(l);
if skip_prefix(l, prefix) {
(*l).token = token;
(*l).loc = loc;
return true
}
}

todo!()
}