With >9900 lines of code written, I now have a functioning C tokenizer and preprocessor. However, I still have a broken parser and an unimplemented code generator.
How do I debug the parser code, to make it work?
I can debug the code using the debugger and println statements. However, is it worth the time?
I don’t believe improve my debugging speed is worth the time.
Instead, I plan on taking a break from coding, and come back to the project down the road.
I believe it is time to focus on other things:
- Visiting family members.
- Reading research papers. Even though I have not determined my specialization, it seems like I benefit from reading classic computer systems papers.
- Doing projects involving design.
Regarding the last point, this project has reminded me that debugging is complicated, and tools for dealing with high-volume tracing data have the potential to simplify it.
However, this requires high-performance and an understanding of how people (or AIs) think and learn. It may be too large to complete in a year.
I will see how things go. The future is both predictable and unpredictable.
use core::str;
use std::{
cell::{Cell, RefCell},
collections::{HashMap, HashSet, VecDeque},
ffi::{c_char, CString},
fs,
io::{stdin, Read},
ops::{Index, Range, RangeFull},
os::unix::fs::MetadataExt,
path::{Path, PathBuf},
rc::Rc,
};
use chrono::{DateTime, Local};
use either::Either;
use libc::{isxdigit, strtod};
use tracing::{instrument, Level};
use tracing_subscriber::fmt::format::FmtSpan;
//
// Custom logging (using the tracing library)
//
#[instrument(ret)]
pub fn setup_tracing() {
let subscriber = tracing_subscriber::fmt::Subscriber::builder()
.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
.with_ansi(false)
.with_line_number(true)
.json()
.with_span_list(false)
.finish();
tracing::subscriber::set_global_default(subscriber).unwrap();
}
//
// Custom, rust-only data structures
//
#[derive(Clone)]
struct Location {
idx: usize,
end_idx: usize,
contents: Rc<Vec<u8>>,
}
impl std::fmt::Debug for Location {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = self.as_str();
let mut i = std::cmp::min(30, s.len());
while !s.is_char_boundary(i) {
i += 1;
}
f.debug_struct("Location")
.field("idx", &self.idx)
.field("end_idx", &self.end_idx)
.field("contents[idx..end_idx]", &&self.as_str()[..i])
.finish()
}
}
impl Location {
#[instrument(level = "debug")]
fn len(&self) -> usize {
return self.end_idx - self.idx;
}
#[instrument(level = "debug")]
fn as_bytes(&self) -> &[u8] {
return &self.contents[self.idx..self.end_idx];
}
#[instrument(level = "debug")]
fn as_str(&self) -> &str {
let u8 = &self.contents[self.idx..self.end_idx];
core::str::from_utf8(u8).unwrap()
}
}
impl Index<usize> for Location {
type Output = u8;
#[instrument(level = "debug")]
fn index(&self, index: usize) -> &Self::Output {
return self.contents.index(self.idx + index);
}
}
impl Iterator for Location {
type Item = u8;
#[instrument(level = "debug", ret)]
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.end_idx {
return None;
} else {
let ret = self.contents[self.idx];
self.idx += 1;
return Some(ret);
}
}
}
//
// strings.rs
//
//
// tokenize.rs
#[derive(PartialEq, Debug, Clone)]
enum TokenKind {
Ident, // Identifiers
Punct, // Punctuators
Keyword, // Keywords
Str, // String literals
Num, // Numeric literals
PPNum, // Preprocessing numbers
}
#[derive(Clone)]
struct File {
name: String,
file_no: i32,
contents: Rc<Vec<u8>>,
display_name: String,
line_delta: i32,
}
impl std::fmt::Debug for File {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("File")
.field("name", &self.name)
.field("file_no", &self.file_no)
.field("display_name", &self.display_name)
.field("line_delta", &self.line_delta)
.finish()
}
}
#[derive(Clone, Debug)]
enum TokenFields {
None,
Int(Type, i64),
Float(Type, f64),
Str(Type, Box<[u8]>), // Can be UTF-8, UTF-16, or UTF-32.
}
#[derive(Clone)]
pub struct Token {
kind: TokenKind,
loc: Location, // Token location (equivalent to a char*)
file: Rc<RefCell<File>>,
filename: Option<String>,
line_no: i32,
line_delta: i32,
at_bol: bool, // If token is at beginning of line, true
has_space: bool, // If token follows space, true
hideset: Vec<Hideset>, // For macro expansion
origin: Option<Rc<Token>>, // If this is expanded from a macro, the original token
other: TokenFields,
}
impl std::fmt::Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Token")
.field("kind", &self.kind)
.field("loc", &self.loc)
.field("other", &self.other)
.field("line_no", &self.line_no)
.field("line_delta", &self.line_delta)
.field("at_bol", &self.at_bol)
.field("has_space", &self.has_space)
.finish_non_exhaustive()
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, "Token({:?}, {})", self.kind, self.loc.as_str());
}
}
//
// preprocess.c
//
#[derive(Debug, Clone)]
struct Hideset {
name: String,
}
//
// parse.c
//
#[derive(Debug)]
struct ObjFunction {
// Function
is_inline: bool,
params: Vec<Rc<RefCell<Obj>>>,
body: Rc<RefCell<Node>>,
locals: Vec<Rc<RefCell<Obj>>>,
va_area: Option<Rc<RefCell<Obj>>>,
alloca_bottom: Rc<RefCell<Obj>>,
stack_size: i32,
// Stack inline function
is_live: bool,
is_root: bool,
refs: Vec<String>,
}
#[derive(Debug)]
pub struct Obj {
name: String,
ty: Type,
tok: Option<Token>, // Representative token
is_local: bool, // local or glocal/function
align: i32, // alignment
// Local variable
offset: i32,
// Global variable or function
is_function: bool,
is_definition: bool,
is_static: bool,
// Global variable
is_tentative: bool,
is_tls: bool,
init_data: Vec<u8>,
rel: Option<Relocation>,
func: Option<ObjFunction>,
}
// Global variable can be initialized either by a constant expression
// or a pointer to another global variable. This struct represents the
// latter.
#[derive(Debug)]
struct Relocation {
offset: usize,
label: String,
addend: i64,
}
#[derive(Clone, Debug)]
enum NodeBinaryKind {
Add,
Sub,
Mul,
Div,
Mod,
BitAnd, // &
BitOr, // |
BitXor, // ^
Shl, // <<
Shr, // >>
Eq, // =
Ne, // !=
Lt,
Le,
Assign,
Comma,
LogAnd, // &&
LogOr, // ||
}
#[derive(Clone, Debug)]
enum NodeUnaryKind {
Neg,
Addr,
Deref,
Not, // !
BitNot, // ~
}
#[derive(Clone, Debug)]
enum NodeStmtKind {
ExprStmt, // Expression statement
StmtExpr, // Statement expression
}
#[derive(Clone, Debug)]
enum NodeAtomicKind {
Cas,
Exch, // Atomic exchange
}
#[derive(Clone, Debug)]
enum NodeCondKind {
Cond, // ?:
If,
For,
Do,
Switch,
Case,
Goto,
GotoExpr, // "goto" labels-as-values"
Label, // labeled statement
LabelVal, // Labels-as-values
}
#[derive(Clone)]
enum NodeFields {
NullExpr, // Do nothing
Binary {
kind: NodeBinaryKind,
lhs: Rc<RefCell<Node>>,
rhs: Rc<RefCell<Node>>,
},
Member {
member: Member,
expr: Rc<RefCell<Node>>,
},
Unary {
kind: NodeUnaryKind,
expr: Rc<RefCell<Node>>,
},
Return {
expr: Option<Rc<RefCell<Node>>>,
},
Cond {
kind: NodeCondKind,
// if or for statement
cond: Option<Rc<RefCell<Node>>>,
then: Option<Rc<RefCell<Node>>>,
els: Option<Rc<RefCell<Node>>>,
init: Option<Either<Rc<RefCell<Node>>, VecDeque<Rc<RefCell<Node>>>>>,
inc: Option<Rc<RefCell<Node>>>,
// "break" and "continue" labels
brk_label: Option<String>,
cont_label: Option<String>,
unique_label: Option<String>,
// Switch/case
case_next: Option<Rc<RefCell<Node>>>,
case_default: Option<Rc<RefCell<Node>>>,
case_begin: Option<i32>,
case_end: Option<i32>,
// Case or goto expression and label
label: Option<String>,
expr: Option<Rc<RefCell<Node>>>,
goto_next: Option<Rc<RefCell<Node>>>,
},
Block {
body: Vec<Rc<RefCell<Node>>>,
}, // { .. }
FnCall {
// Function call
func_ty: Type,
args: Vec<Rc<RefCell<Node>>>,
pass_by_stack: bool,
ret_buffer: Option<Rc<RefCell<Obj>>>,
expr: Option<Rc<RefCell<Node>>>,
},
Stmt {
kind: NodeStmtKind,
expr: Option<Rc<RefCell<Node>>>,
body: Vec<Rc<RefCell<Node>>>,
},
Var(Rc<RefCell<Obj>>),
VlaPtr(Rc<RefCell<Obj>>),
Int(i64),
Float(f64),
Cast(Rc<RefCell<Node>>),
Cas {
// Atomic compare-and-swap
addr: Rc<RefCell<Node>>,
old: Rc<RefCell<Node>>,
new: Rc<RefCell<Node>>,
},
Exch {
lhs: Rc<RefCell<Node>>,
rhs: Rc<RefCell<Node>>,
},
MemZero {
var: Rc<RefCell<Obj>>,
},
Asm(Box<[u8]>),
}
impl std::fmt::Debug for NodeFields {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::NullExpr => write!(f, "NullExpr"),
Self::Binary { kind, lhs, rhs } => f
.debug_struct("Binary")
.field("kind", kind)
// .field("lhs", lhs)
// .field("rhs", rhs)
.finish(),
Self::Member { member, expr } => {
f.debug_struct("Member").field("member", member).finish()
}
Self::Unary { kind, expr } => f
.debug_struct("Unary")
.field("kind", kind)
// .field("expr", expr)
.finish(),
Self::Return { expr } => write!(f, "Return"),
Self::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default: default_case,
case_begin: begin,
case_end: end,
label,
unique_label,
expr,
goto_next,
} => f
.debug_struct("Cond")
.field("kind", kind)
// .field("cond", cond)
// .field("then", then)
// .field("els", els)
// .field("init", init)
// .field("inc", inc)
// .field("brk_label", brk_label)
// .field("cont_label", cont_label)
// .field("case_next", case_next)
// .field("default_case", default_case)
// .field("begin", begin)
// .field("end", end)
.finish(),
Self::Block { body } => write!(f, "Block"),
Self::FnCall {
func_ty,
args,
pass_by_stack,
ret_buffer,
expr,
} => f
.debug_struct("FnCall")
.field("func_ty", func_ty)
// .field("args", args)
.field("pass_by_stack", pass_by_stack)
// .field("ret_buffer", ret_buffer)
// .field("expr", expr)
.finish(),
Self::Stmt { kind, body, expr } => f
.debug_struct("Stmt")
.field("kind", kind)
// .field("body", body)
// .field("expr", expr)
.finish(),
Self::Var(arg0) => f
.debug_tuple("Var") /* .field(arg0) */
.finish(),
Self::VlaPtr(arg0) => f
.debug_tuple("VlaPtr") /* .field(arg0) */
.finish(),
Self::Int(arg0) => f.debug_tuple("Int").field(arg0).finish(),
Self::Float(arg0) => f.debug_tuple("Float").field(arg0).finish(),
Self::Cast(arg0) => f.debug_tuple("Cast").field(arg0).finish(),
Self::Cas { addr, old, new } => f
.debug_struct("Cas")
// .field("addr", addr)
// .field("old", old)
// .field("new", new)
.finish(),
Self::Exch { lhs, rhs } => f
.debug_struct("Exch")
// .field("lhs", lhs)
// .field("rhs", rhs)
.finish(),
Self::MemZero { var } => write!(f, "MemZero"),
Self::Asm(arg0) => f.debug_tuple("Asm").field(arg0).finish(),
}
}
}
#[derive(Clone, Debug)]
struct Node {
kind: NodeFields,
ty: Option<Type>,
tok: Token, // Representative token
}
//
// type.rs
//
#[derive(Clone, Debug)]
enum TypeKind {
Void,
Bool,
Char,
Short,
Int,
Long,
Float,
Double,
LDouble,
Enum,
Ptr {
base: Box<Type>,
},
Func {
return_ty: Box<Type>,
params: Vec<Type>,
is_variadic: bool,
next: Option<Box<Type>>,
},
Array {
base: Box<Type>,
len: i32,
},
Vla {
base: Box<Type>,
vla_len: Rc<RefCell<Node>>,
vla_size: Option<Rc<RefCell<Obj>>>,
}, // Variable-length array
Struct {
members: Vec<Member>,
is_flexible: bool,
is_packed: bool,
},
Union {
members: Vec<Member>,
is_flexible: bool,
},
}
#[derive(Clone)]
struct OneOrManyTypes {
ty: Vec<Type>,
}
#[derive(Clone)]
struct TypeDeclaration {
name: Option<Box<Token>>,
name_pos: Box<Token>,
}
#[derive(Clone)]
struct Type {
kind: TypeKind,
size: i32, // sizeof() value
align: i32,
is_unsigned: bool,
is_atomic: bool,
origin: Option<Rc<Type>>, // for type compatibility check
decl: Option<TypeDeclaration>,
}
impl std::fmt::Debug for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Type")
.field("kind", &self.kind)
.field("size", &self.size)
.field("align", &self.align)
.field("is_unsigned", &self.is_unsigned)
.finish_non_exhaustive()
}
}
#[derive(Clone, Debug)]
struct Member {
ty: Type,
tok: Option<Token>,
name: Option<Token>,
idx: usize,
align: i32,
offset: usize,
is_bitfield: bool,
bit_offset: usize,
bit_width: usize,
}
impl Member {
fn new(ty: Type) -> Member {
return Member {
ty,
tok: None,
name: None,
idx: 0,
align: 0,
offset: 0,
is_bitfield: false,
bit_offset: 0,
bit_width: 0,
};
}
}
#[derive(Debug)]
pub struct Tokenize {
current_file: Option<Rc<RefCell<File>>>, // Input file
input_files: Vec<Rc<RefCell<File>>>, // A list of all input files
}
impl Tokenize {
#[instrument(ret)]
pub fn new() -> Self {
Tokenize {
current_file: None,
input_files: Vec::new(),
}
}
}
// type.c
const TY_VOID: Type = new_type(TypeKind::Void, 1, 1, false);
const TY_BOOL: Type = new_type(TypeKind::Bool, 1, 1, false);
const TY_CHAR: Type = new_type(TypeKind::Char, 1, 1, false);
const TY_SHORT: Type = new_type(TypeKind::Short, 2, 2, false);
const TY_INT: Type = new_type(TypeKind::Int, 4, 4, false);
const TY_LONG: Type = new_type(TypeKind::Long, 8, 8, false);
const TY_UCHAR: Type = new_type(TypeKind::Char, 1, 1, true);
const TY_USHORT: Type = new_type(TypeKind::Short, 1, 1, true);
const TY_UINT: Type = new_type(TypeKind::Int, 4, 4, true);
const TY_ULONG: Type = new_type(TypeKind::Long, 8, 8, true);
const TY_FLOAT: Type = new_type(TypeKind::Float, 4, 4, false);
const TY_DOUBLE: Type = new_type(TypeKind::Double, 8, 8, false);
const TY_LDOUBLE: Type = new_type(TypeKind::LDouble, 16, 16, false);
const fn new_type(kind: TypeKind, size: i32, align: i32, is_unsigned: bool) -> Type {
return Type {
kind,
size,
align,
is_unsigned,
is_atomic: false,
origin: None,
decl: None,
};
}
#[instrument(ret)]
fn is_integer(ty: &Type) -> bool {
match ty.kind {
TypeKind::Bool | TypeKind::Char | TypeKind::Short | TypeKind::Int | TypeKind::Long => true,
TypeKind::Enum => true,
TypeKind::Void
| TypeKind::Float
| TypeKind::Double
| TypeKind::LDouble
| TypeKind::Ptr { .. }
| TypeKind::Func { .. }
| TypeKind::Array { .. }
| TypeKind::Vla { .. }
| TypeKind::Struct { .. }
| TypeKind::Union { .. } => false,
}
}
#[instrument(ret)]
fn is_flonum(ty: &Type) -> bool {
let k = ty.kind.clone();
// return k == TypeKind::Float || k == TypeKind::Double || k == TypeKind::Double;
match ty.kind {
TypeKind::Float | TypeKind::Double | TypeKind::LDouble => true,
TypeKind::Void
| TypeKind::Bool
| TypeKind::Char
| TypeKind::Short
| TypeKind::Int
| TypeKind::Long
| TypeKind::Enum
| TypeKind::Ptr { .. }
| TypeKind::Func { .. }
| TypeKind::Array { .. }
| TypeKind::Vla { .. }
| TypeKind::Struct { .. }
| TypeKind::Union { .. } => false,
}
}
#[instrument(ret)]
fn is_numeric(ty: &Type) -> bool {
return is_integer(ty) || is_flonum(ty);
}
#[instrument(ret)]
fn is_compatible(t1: &Type, t2: &Type) -> bool {
if std::ptr::eq(t1, t2) {
return true;
}
if let Some(o) = &t1.origin {
return is_compatible(&o, t2);
}
if let Some(o) = &t2.origin {
return is_compatible(t1, &o);
}
if std::mem::discriminant(&t1.kind) == std::mem::discriminant(&t2.kind) {
return false;
}
match &t1.kind {
TypeKind::Void => return false,
TypeKind::Bool => return false,
TypeKind::Char | TypeKind::Short | TypeKind::Int | TypeKind::Long => {
return t1.is_unsigned == t2.is_unsigned;
}
TypeKind::Float | TypeKind::Double | TypeKind::LDouble => {
return true;
}
TypeKind::Enum => return false,
TypeKind::Ptr { base: base1 } => {
let base2 = match &t2.kind {
TypeKind::Void
| TypeKind::Bool
| TypeKind::Char
| TypeKind::Short
| TypeKind::Int
| TypeKind::Long
| TypeKind::Float
| TypeKind::Double
| TypeKind::LDouble
| TypeKind::Enum => {
return false;
}
TypeKind::Ptr { base } => base,
TypeKind::Func { .. } => {
return false;
}
TypeKind::Array { base, len } => base,
TypeKind::Vla {
base,
vla_len,
vla_size,
} => base,
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
return false;
}
TypeKind::Union { .. } => {
return false;
}
};
return is_compatible(&base1, &base2);
}
TypeKind::Func {
return_ty: return_ty1,
params: params1,
is_variadic: is_variadic1,
next: _,
} => {
if let TypeKind::Func {
return_ty: return_ty2,
params: params2,
is_variadic: is_variadic2,
next: _,
} = &t2.kind
{
if !is_compatible(&return_ty1, &return_ty2) {
return false;
}
if is_variadic1 != is_variadic2 {
return false;
}
if params1.len() != params2.len() {
return false;
}
for (p1, p2) in params1.iter().zip(params2.iter()) {
if !is_compatible(&p1, &p2) {
return false;
}
}
return true;
} else {
return false;
}
}
TypeKind::Array {
base: base1,
len: len1,
} => {
match &t2.kind {
TypeKind::Array {
base: base2,
len: len2,
} => {
if !is_compatible(&base1, &base2) {
return false;
}
// Case: array_declaration()
return *len1 < 0 && *len2 < 0 && len1 == len2;
}
_ => return false,
}
}
// TODO: can these types be compatible?
TypeKind::Vla { .. } => return false,
TypeKind::Struct { .. } => return false,
TypeKind::Union { .. } => return false,
}
}
#[instrument(ret)]
fn copy_type(ty: Type) -> Type {
let mut ret = ty.clone();
ret.origin = Some(Rc::new(ty));
return ret;
}
#[instrument(ret)]
fn pointer_to(base: Type) -> Type {
return new_type(
TypeKind::Ptr {
base: Box::new(base),
},
8,
8,
true,
);
}
#[instrument(ret)]
fn func_type(return_ty: Type) -> Type {
return new_type(
TypeKind::Func {
return_ty: Box::new(return_ty),
params: Vec::new(),
is_variadic: false,
next: None,
},
1,
1,
false,
);
}
#[instrument(ret)]
fn array_of(base: Type, len: i32) -> Type {
let size = base.size * len;
let align = base.align;
return new_type(
TypeKind::Array {
base: Box::new(base),
len: len,
},
size,
align,
false,
);
}
#[instrument(ret)]
fn vla_of(base: Type, len: Rc<RefCell<Node>>) -> Type {
return new_type(
TypeKind::Vla {
base: Box::new(base),
vla_len: len,
vla_size: None,
},
8,
8,
false,
);
}
#[instrument(ret)]
fn enum_type() -> Type {
return new_type(TypeKind::Enum, 4, 4, false);
}
#[instrument(ret)]
fn struct_type() -> Type {
return new_type(
TypeKind::Struct {
members: Vec::new(),
is_flexible: false,
is_packed: false,
},
0,
1,
false,
);
}
#[instrument(ret)]
fn get_common_type(ty1: &Type, ty2: &Type) -> Type {
match &ty1.kind {
TypeKind::Ptr { base, .. } => {
return pointer_to(*base.clone());
}
TypeKind::Array { base, .. } => {
return pointer_to(*base.clone());
}
TypeKind::Vla { base, .. } => {
return pointer_to(*base.clone());
}
TypeKind::Void
| TypeKind::Bool
| TypeKind::Char
| TypeKind::Short
| TypeKind::Int
| TypeKind::Long
| TypeKind::Float
| TypeKind::Double
| TypeKind::LDouble
| TypeKind::Enum
| TypeKind::Func { .. }
| TypeKind::Struct { .. }
| TypeKind::Union { .. } => {}
}
if let TypeKind::Func { .. } = &ty1.kind {
return pointer_to(ty1.clone());
}
if let TypeKind::Func { .. } = &ty2.kind {
return pointer_to(ty2.clone());
}
if matches!(ty1.kind, TypeKind::LDouble) || matches!(ty2.kind, TypeKind::LDouble) {
return TY_LDOUBLE;
}
if matches!(ty2.kind, TypeKind::Double) || matches!(ty2.kind, TypeKind::Double) {
return TY_DOUBLE;
}
if matches!(ty1.kind, TypeKind::Float) || matches!(ty2.kind, TypeKind::Float) {
return TY_FLOAT;
}
let mut ty1m: Type = ty1.clone();
let mut ty2m: Type = ty2.clone();
if ty1m.size < 4 {
ty1m = TY_INT;
}
if ty2.size < 4 {
ty2m = TY_INT;
}
if ty1m.size != ty2m.size {
if ty1m.size < ty2m.size {
return ty2m;
} else {
return ty1m;
}
}
if ty2m.is_unsigned {
return ty2m;
}
return ty1m;
}
// add_type is implemented in the parse.c section
#[cfg(test)]
mod test_type {
use super::{is_compatible, TY_CHAR, TY_LONG, TY_ULONG};
#[test]
fn test_is_compatible() {
let cases = [((TY_CHAR, TY_CHAR), true), ((TY_ULONG, TY_LONG), false)];
for ((t1, t2), e) in cases {
let a = is_compatible(&t1, &t2);
assert_eq!(a, e);
}
}
}
// unicode.c
#[instrument(ret, level = "Debug")]
fn encode_utf8(buf: &mut [u8], c: u32) -> usize {
#[instrument(ret)]
fn one_zero_6_bits(c: u32) -> u8 {
return 0b10000000 | (c & 0b111111) as u8;
}
if c <= 0x7F {
buf[0] = c as u8;
return 1;
} else if c <= 0x7FF {
buf[0] = 0b11000000 | (c >> 6) as u8;
buf[1] = one_zero_6_bits(c);
return 2;
} else if c <= 0xFFFF {
buf[0] = 0b11100000 | (c >> 12) as u8;
buf[1] = one_zero_6_bits(c >> 6);
buf[2] = one_zero_6_bits(c);
return 3;
} else if c <= 0x10FFFF {
buf[0] = 0b11110000 | (c >> 18) as u8;
buf[1] = one_zero_6_bits(c >> 12);
buf[2] = one_zero_6_bits(c >> 6);
buf[3] = one_zero_6_bits(c);
return 4;
} else {
// TODO: error?
todo!();
}
}
#[instrument(ret, level = "Debug")]
fn decode_utf8(b: &[u8]) -> Result<(u32, &[u8]), &str> {
// TODO: handle out of bounds errors?
if b[0] <= 0x7F {
return Ok((b[0] as u32, &b[1..]));
} else {
let (first_bits, len) = {
if b[0] >= 0b11110000 {
(b[0] & 0b111, 4)
} else if b[0] >= 0b11100000 {
(b[0] & 0b1111, 3)
} else if b[0] >= 0b11000000 {
(b[0] & 0b11111, 2)
} else {
// TODO: bubble up to display the token which triggered the error?
return Err("invalid UTF-8 sequence");
}
};
let mut c = first_bits as u32;
for i in 1..len {
if b[i] < 0b10000000 {
// TODO: bubble up to display the token which triggered the error?
return Err("invalid UTF-8 sequence");
}
c = (c << 6) | (b[i] & 0b111111) as u32
}
Ok((c, &b[len..]))
}
}
#[cfg(test)]
mod test_utf8 {
use super::{decode_utf8, encode_utf8};
#[test]
fn utf8() {
let points: Vec<u32> = vec![0x7F, 0x80, 0x7FF, 0x800, 0xFFFF, 0x10000, 0x10FFFF];
for p in points {
let mut buffer = [0u8; 4];
encode_utf8(&mut buffer, p);
let r = decode_utf8(&buffer);
assert!(r.is_ok());
let (point_result, new) = r.unwrap();
assert_eq!(point_result, p);
assert!(new.len() < 4);
}
}
}
#[instrument(ret, level = "debug")]
fn in_range(range: &[(u32, u32)], c: u32) -> bool {
for (low, high) in range {
if *low <= c && c <= *high {
return true;
}
}
return false;
}
// [https://www.sigbus.info/n1570#D] C11 allows not only ASCII
// but some multibyte characters in certain Unicode ranges to be used in
// an identifier.
//
// This function returns true if a given character is acceptable as the
//first character of an identifier.
#[instrument(ret, level = "debug")]
fn is_ident1(c: u32) -> bool {
let range = [
('_' as u32, '_' as u32),
('a' as u32, 'z' as u32),
('A' as u32, 'Z' as u32),
('$' as u32, '$' as u32),
(0x00A8, 0x00A8),
(0x00AA, 0x00AA),
(0x00AD, 0x00AD),
(0x00AF, 0x00AF),
(0x00B2, 0x00B5),
(0x00B7, 0x00BA),
(0x00BC, 0x00BE),
(0x00C0, 0x00D6),
(0x00D8, 0x00F6),
(0x00F8, 0x00FF),
(0x0100, 0x02FF),
(0x0370, 0x167F),
(0x1681, 0x180D),
(0x180F, 0x1DBF),
(0x1E00, 0x1FFF),
(0x200B, 0x200D),
(0x202A, 0x202E),
(0x203F, 0x2040),
(0x2054, 0x2054),
(0x2060, 0x206F),
(0x2070, 0x20CF),
(0x2100, 0x218F),
(0x2460, 0x24FF),
(0x2776, 0x2793),
(0x2C00, 0x2DFF),
(0x2E80, 0x2FFF),
(0x3004, 0x3007),
(0x3021, 0x302F),
(0x3031, 0x303F),
(0x3040, 0xD7FF),
(0xF900, 0xFD3D),
(0xFD40, 0xFDCF),
(0xFDF0, 0xFE1F),
(0xFE30, 0xFE44),
(0xFE47, 0xFFFD),
(0x10000, 0x1FFFD),
(0x20000, 0x2FFFD),
(0x30000, 0x3FFFD),
(0x40000, 0x4FFFD),
(0x50000, 0x5FFFD),
(0x60000, 0x6FFFD),
(0x70000, 0x7FFFD),
(0x80000, 0x8FFFD),
(0x90000, 0x9FFFD),
(0xA0000, 0xAFFFD),
(0xB0000, 0xBFFFD),
(0xC0000, 0xCFFFD),
(0xD0000, 0xDFFFD),
(0xE0000, 0xEFFFD),
];
return in_range(&range, c);
}
// Return true if a given character is acceptable as a non-first
// character of an identifier.
#[instrument(ret, level = "debug")]
fn is_ident2(c: u32) -> bool {
let range = [
('0' as u32, '9' as u32),
('$' as u32, '$' as u32),
(0x0300, 0x036F),
(0x1DC0, 0x1DFF),
(0x20D0, 0x20FF),
(0xFE20, 0xFE2F),
];
return is_ident1(c) || in_range(&range, c);
}
// Returns the number of columns needed to display a given
// character in a fixed-width font.
//
// Based on Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
#[instrument(ret)]
fn char_width(c: u32) -> usize {
let range1 = [
(0x0000, 0x001F),
(0x007f, 0x00a0),
(0x0300, 0x036F),
(0x0483, 0x0486),
(0x0488, 0x0489),
(0x0591, 0x05BD),
(0x05BF, 0x05BF),
(0x05C1, 0x05C2),
(0x05C4, 0x05C5),
(0x05C7, 0x05C7),
(0x0600, 0x0603),
(0x0610, 0x0615),
(0x064B, 0x065E),
(0x0670, 0x0670),
(0x06D6, 0x06E4),
(0x06E7, 0x06E8),
(0x06EA, 0x06ED),
(0x070F, 0x070F),
(0x0711, 0x0711),
(0x0730, 0x074A),
(0x07A6, 0x07B0),
(0x07EB, 0x07F3),
(0x0901, 0x0902),
(0x093C, 0x093C),
(0x0941, 0x0948),
(0x094D, 0x094D),
(0x0951, 0x0954),
(0x0962, 0x0963),
(0x0981, 0x0981),
(0x09BC, 0x09BC),
(0x09C1, 0x09C4),
(0x09CD, 0x09CD),
(0x09E2, 0x09E3),
(0x0A01, 0x0A02),
(0x0A3C, 0x0A3C),
(0x0A41, 0x0A42),
(0x0A47, 0x0A48),
(0x0A4B, 0x0A4D),
(0x0A70, 0x0A71),
(0x0A81, 0x0A82),
(0x0ABC, 0x0ABC),
(0x0AC1, 0x0AC5),
(0x0AC7, 0x0AC8),
(0x0ACD, 0x0ACD),
(0x0AE2, 0x0AE3),
(0x0B01, 0x0B01),
(0x0B3C, 0x0B3C),
(0x0B3F, 0x0B3F),
(0x0B41, 0x0B43),
(0x0B4D, 0x0B4D),
(0x0B56, 0x0B56),
(0x0B82, 0x0B82),
(0x0BC0, 0x0BC0),
(0x0BCD, 0x0BCD),
(0x0C3E, 0x0C40),
(0x0C46, 0x0C48),
(0x0C4A, 0x0C4D),
(0x0C55, 0x0C56),
(0x0CBC, 0x0CBC),
(0x0CBF, 0x0CBF),
(0x0CC6, 0x0CC6),
(0x0CCC, 0x0CCD),
(0x0CE2, 0x0CE3),
(0x0D41, 0x0D43),
(0x0D4D, 0x0D4D),
(0x0DCA, 0x0DCA),
(0x0DD2, 0x0DD4),
(0x0DD6, 0x0DD6),
(0x0E31, 0x0E31),
(0x0E34, 0x0E3A),
(0x0E47, 0x0E4E),
(0x0EB1, 0x0EB1),
(0x0EB4, 0x0EB9),
(0x0EBB, 0x0EBC),
(0x0EC8, 0x0ECD),
(0x0F18, 0x0F19),
(0x0F35, 0x0F35),
(0x0F37, 0x0F37),
(0x0F39, 0x0F39),
(0x0F71, 0x0F7E),
(0x0F80, 0x0F84),
(0x0F86, 0x0F87),
(0x0F90, 0x0F97),
(0x0F99, 0x0FBC),
(0x0FC6, 0x0FC6),
(0x102D, 0x1030),
(0x1032, 0x1032),
(0x1036, 0x1037),
(0x1039, 0x1039),
(0x1058, 0x1059),
(0x1160, 0x11FF),
(0x135F, 0x135F),
(0x1712, 0x1714),
(0x1732, 0x1734),
(0x1752, 0x1753),
(0x1772, 0x1773),
(0x17B4, 0x17B5),
(0x17B7, 0x17BD),
(0x17C6, 0x17C6),
(0x17C9, 0x17D3),
(0x17DD, 0x17DD),
(0x180B, 0x180D),
(0x18A9, 0x18A9),
(0x1920, 0x1922),
(0x1927, 0x1928),
(0x1932, 0x1932),
(0x1939, 0x193B),
(0x1A17, 0x1A18),
(0x1B00, 0x1B03),
(0x1B34, 0x1B34),
(0x1B36, 0x1B3A),
(0x1B3C, 0x1B3C),
(0x1B42, 0x1B42),
(0x1B6B, 0x1B73),
(0x1DC0, 0x1DCA),
(0x1DFE, 0x1DFF),
(0x200B, 0x200F),
(0x202A, 0x202E),
(0x2060, 0x2063),
(0x206A, 0x206F),
(0x20D0, 0x20EF),
(0x302A, 0x302F),
(0x3099, 0x309A),
(0xA806, 0xA806),
(0xA80B, 0xA80B),
(0xA825, 0xA826),
(0xFB1E, 0xFB1E),
(0xFE00, 0xFE0F),
(0xFE20, 0xFE23),
(0xFEFF, 0xFEFF),
(0xFFF9, 0xFFFB),
(0x10A01, 0x10A03),
(0x10A05, 0x10A06),
(0x10A0C, 0x10A0F),
(0x10A38, 0x10A3A),
(0x10A3F, 0x10A3F),
(0x1D167, 0x1D169),
(0x1D173, 0x1D182),
(0x1D185, 0x1D18B),
(0x1D1AA, 0x1D1AD),
(0x1D242, 0x1D244),
(0xE0001, 0xE0001),
(0xE0020, 0xE007F),
(0xE0100, 0xE01EF),
];
if in_range(&range1, c) {
return 0;
}
let range2 = [
(0x1100, 0x115F),
(0x2329, 0x2329),
(0x232A, 0x232A),
(0x2E80, 0x303E),
(0x3040, 0xA4CF),
(0xAC00, 0xD7A3),
(0xF900, 0xFAFF),
(0xFE10, 0xFE19),
(0xFE30, 0xFE6F),
(0xFF00, 0xFF60),
(0xFFE0, 0xFFE6),
(0x1F000, 0x1F644),
(0x20000, 0x2FFFD),
(0x30000, 0x3FFFD),
];
if in_range(&range2, c) {
return 2;
}
return 0;
}
#[instrument(ret)]
fn display_width(p: &str) -> usize {
let mut w = 0;
for c in p.chars() {
w += char_width(c as u32);
}
return w;
}
//
// tokenize.c
//
#[instrument(ret)]
fn error(filename: &str, line_no: i32, loc: &Location, msg: &str) {
let contents = loc.contents.as_ref();
let line_idx = {
let mut idx = loc.idx;
while idx > 0 && contents[idx - 1] != b'\n' {
idx -= 1;
}
idx
};
let end_idx = {
let mut idx = loc.idx;
while idx < contents.len() && contents[idx] != b'\n' {
idx += 1;
}
idx
};
// Print out the line.
let line = core::str::from_utf8(&contents[line_idx..end_idx]).unwrap();
let header = format!("{}:{}: ", filename, line_no);
let indent = header.len();
eprint!("{}{}\n", header, line);
// Show the error message below the token.
let spaces = indent + {
let line_to_loc = core::str::from_utf8(&contents[line_idx..loc.idx]).unwrap();
display_width(line_to_loc)
};
eprint!("{}^ {}\n", " ".repeat(spaces), msg); // how many spaces?
}
impl Tokenize {
#[instrument(ret)]
fn error(&self, loc: &Location, msg: &str) {
let line_no = {
let mut n = 1;
for (i, b) in loc.contents.iter().enumerate() {
if i == loc.idx {
break;
}
if *b == b'\n' {
n += 1;
}
}
n
};
error(
self.current_file.as_ref().unwrap().borrow().name.as_str(),
line_no,
&loc,
msg,
);
panic!(); // TODO: replace with process exit?
}
}
impl Token {
#[instrument(ret)]
fn error(&self, msg: &str) {
let file = &self.file;
error(&file.borrow().name, self.line_no, &self.loc, msg);
panic!(); // TODO: replace with process exit?
}
#[instrument(ret)]
fn warn(&self, msg: &str) {
let file = &self.file;
error(&file.borrow().name, self.line_no, &self.loc, msg);
}
}
#[cfg(test)]
mod test_errors {
#[test]
fn errors() {
// TODO:
}
}
impl Token {
#[instrument(ret, level = Level::DEBUG)]
pub fn new(
kind: TokenKind,
current_file: Rc<RefCell<File>>,
loc: Location,
at_bol: bool,
has_space: bool,
) -> Self {
Token {
kind,
loc,
file: current_file,
filename: None,
line_no: -1, // TODO: unitialized?
line_delta: 0,
at_bol: at_bol,
has_space: has_space,
hideset: Vec::new(),
origin: None,
other: TokenFields::None,
}
}
}
#[instrument(ret, level = "debug")]
fn read_ident(start: &[u8]) -> usize {
let (c, mut next) = decode_utf8(start).unwrap(); // TODO: unwrap
if !is_ident1(c) {
return 0;
}
while next.len() > 0 {
let (c, nextnext) = decode_utf8(next).unwrap(); // TODO:
if !is_ident2(c) {
break;
}
next = nextnext;
}
return start.len() - next.len();
}
#[cfg(test)]
mod test_read_ident {
use super::read_ident;
#[test]
fn test_read_ident() {
let idents = [("asdfadsf1", 9), ("123adsfas", 0)];
for (s, elen) in idents {
let alen = read_ident(s.as_bytes());
assert_eq!(elen, alen);
}
}
}
#[instrument(ret)]
fn from_hex(c: u8) -> u8 {
if b'0' <= c && c <= b'9' {
return c - b'0';
} else if b'a' <= c && c <= b'f' {
return c - b'a' + 10;
} else {
return c - b'A' + 10;
}
}
#[instrument(ret, level = "debug")]
fn read_punct(p: &[u8]) -> usize {
let kw = [
"<<=", ">>=", "...", "==", "!=", "<=", ">=", "->", "+=", "-=", "*=", "/=", "++", "--",
"%=", "&=", "|=", "^=", "&&", "||", "<<", ">>", "##",
];
for w in kw {
if p.starts_with(w.as_bytes()) {
return w.len();
}
}
return unsafe {
// TODO: is p[0] a u8?
if libc::ispunct(p[0] as libc::c_int) > 0 {
1
} else {
0
}
};
}
impl Token {
#[instrument(ret, level = Level::DEBUG)]
fn is_keyword(&self) -> bool {
let set: HashSet<&str> = HashSet::from([
"return",
"if",
"else",
"for",
"while",
"int",
"sizeof",
"char",
"struct",
"union",
"short",
"long",
"void",
"typedef",
"_Bool",
"enum",
"static",
"goto",
"break",
"continue",
"switch",
"case",
"default",
"extern",
"_Alignof",
"_Alignas",
"do",
"signed",
"unsigned",
"const",
"volatile",
"auto",
"register",
"restrict",
"__restrict",
"__restrict__",
"_Noreturn",
"float",
"double",
"typeof",
"asm",
"_Thread_local",
"__thread",
"_Atomic",
"__attribute__",
]);
return set.contains(self.loc.as_str());
}
}
impl Tokenize {
// Escape sequences are defined using themselves here.
// '\n' is implemented using '\n'. This tautological definition
// works because the compiler that compiles our compiler knows what
// '\n' actually is. In other words, we "inherit" the ASCII code of '\n'
// from the compiler that compiles our compiler, so we don't have to teach
// the actual code here.
//
// This fact has huge implications not only for the correctness
// of the compiler but also for the security of the generated code.
// For more info, read "Reflections on Trusting Trust" by Ken Thompson
// https://github.com/rui314/chibicc/wiki/thompson1984.pdf
#[instrument(ret)]
fn read_escaped_char<'a>(&'a self, p: &mut Location) -> u32 {
if b'0' <= p[0] && p[0] <= b'7' {
// Read an octal number.
let mut c = (p[0] - b'0') as u32;
let mut i: usize = 1;
p.next();
while i < 3 {
if b'0' <= p[0] && p[0] <= b'7' {
c <<= 3;
c += (p[0] - b'0') as u32;
p.next();
}
i += 1;
}
return c;
} else if p[0] == b'x' {
// Read a hexadecimal number.
p.next();
if unsafe { libc::isxdigit(p[0] as i32) == 0 } {
// TODO: preemptively replace with Location?
self.error(p, "invalid hex escape sequence");
}
let mut c: u32 = 0;
while p.len() > 0 && unsafe { libc::isxdigit(p[0] as i32) > 0 } {
c <<= 4;
c += from_hex(p[0]) as u32;
p.next();
}
return c;
} else {
let c = match p[0] {
b'a' => 7 as u32,
b'b' => 8 as u32,
b't' => b'\t' as u32,
b'n' => b'\n' as u32,
b'v' => 11 as u32,
b'f' => 12 as u32,
b'r' => b'\r' as u32,
b'e' => 27,
_ => p[0] as u32,
};
p.next();
return c;
}
}
#[instrument(ret)]
fn string_literal_end(&self, loc: &Location) -> Location {
let mut end = loc.clone();
while let Some(c) = end.next() {
if c == b'"' {
break;
}
if c == b'\n' || c == b'\0' {
self.error(loc, "unclosed string literal");
}
if c == b'\\' {
end.next();
}
}
end.idx -= 1;
assert!(end[0] == b'"');
return end;
}
#[instrument(ret)]
fn read_string_literal(
&self,
start: &Location,
quote: &Location,
at_bol: bool,
has_space: bool,
) -> Token {
let mut q = quote.clone();
q.next();
let end = self.string_literal_end(&q);
q.end_idx = end.idx;
let mut buf: Vec<u8> = Vec::with_capacity(end.idx - quote.idx);
while let Some(c) = q.next() {
if c == b'\\' {
let ec: u32 = self.read_escaped_char(&mut q);
buf.push(ec as u8); // TODO: unsafe cast?
} else {
buf.push(c);
}
}
let mut tok = Token::new(
TokenKind::Str,
self.current_file.clone().unwrap(),
Location {
idx: start.idx,
end_idx: end.idx + 1,
contents: end.contents,
},
at_bol,
has_space,
);
tok.other = TokenFields::Str(
array_of(TY_CHAR, (buf.len() + 1) as i32),
buf.into_boxed_slice(),
);
return tok;
}
// Read a UTF-8-encoded string literal and transcode it in UTF-16.
//
// UTF-16 is yet another variable-width encoding fro Unicode. Code
// points smaller than U+10000 are encoded in 2 bytes. Code points
// equal to or larger than that are encoded in 4 bytes. Each 2 bytes
// in the 4 byte sequence is called "surrogate", and a 4 byte sequence
// is called a "surrogate pair".
#[instrument(ret)]
fn read_utf16_string_literal(
&self,
start: &Location,
quote: &Location,
at_bol: bool,
has_space: bool,
) -> Token {
let mut q = quote.clone();
q.next();
let end = self.string_literal_end(&q);
q.end_idx = end.idx;
let mut buf = Vec::with_capacity(end.idx - start.idx);
while q.idx < end.idx {
if q[0] == b'\\' {
q.next();
let ec = self.read_escaped_char(&mut q);
buf.push(ec as u16); // TODO: unsafe cast?
} else {
let p = q.as_bytes();
let (cd, next_p) = decode_utf8(p).unwrap();
q.idx += p.len() - next_p.len();
if cd < 0x10000 {
// Encode a code point in 2 bytes.
buf.push(cd as u16);
} else {
// Encode a code point in 4 bytes.
let cs = cd - 0x10000;
let b1 = 0xd800 + ((cs >> 10) & 0x3ff);
let b2 = 0xdc00 + (cs & 0x3ff);
buf.push(b1 as u16);
buf.push(b2 as u16);
}
}
}
let mut tok = Token::new(
TokenKind::Str,
self.current_file.clone().unwrap(),
Location {
idx: start.idx,
end_idx: end.idx + 1,
contents: q.contents,
},
at_bol,
has_space,
);
tok.other = TokenFields::Str(array_of(TY_USHORT, (buf.len() + 1) as i32), unsafe {
std::mem::transmute(buf.into_boxed_slice())
});
return tok;
}
// Read a UTF-8-encoded string literal and transcode it in UTF-32.
//
// UTF-32 is a fixed-width encoding for Unicode. Each code point is
// encoded in 4 bytes.
#[instrument(ret)]
fn read_utf32_string_literal(
&self,
start: &Location,
quote: &Location,
ty: Type,
at_bol: bool,
has_space: bool,
) -> Token {
let mut q = quote.clone();
q.next();
let end = self.string_literal_end(&q);
q.end_idx = end.idx;
let mut buf: Vec<u32> = Vec::with_capacity(end.idx - start.idx); // Why quote.idx instead of start in original code?
while q.idx < end.idx {
if q[0] == b'\\' {
q.next();
let ec = self.read_escaped_char(&mut q);
buf.push(ec);
} else {
let p = q.as_bytes();
let (dc, next_p) = decode_utf8(p).unwrap();
q.idx += p.len() - next_p.len();
buf.push(dc);
}
}
let mut tok = Token::new(
TokenKind::Str,
self.current_file.clone().unwrap(),
Location {
idx: start.idx,
end_idx: end.idx + 1,
contents: q.contents,
},
at_bol,
has_space,
);
tok.other = TokenFields::Str(array_of(ty, (buf.len() + 1) as i32), unsafe {
std::mem::transmute(buf.into_boxed_slice())
});
return tok;
}
#[instrument(ret)]
fn read_char_literal(
&self,
start: &Location,
quote: &Location,
ty: &Type,
at_bol: bool,
has_space: bool,
) -> Token {
let mut q = quote.clone();
q.next();
let first_literal_byte = q[0];
if first_literal_byte == b'\0' {
self.error(start, "unclosed char literal");
}
let c32 = {
if first_literal_byte == b'\\' {
q.next();
self.read_escaped_char(&mut q)
} else {
let p = q.as_bytes();
let (c32, nextp) = decode_utf8(p).unwrap();
q.idx += p.len() - nextp.len();
c32
}
};
if q.find(|c| *c == b'\'').is_none() {
self.error(start, "unclosed char literal");
}
let end = q;
let mut tok = Token::new(
TokenKind::Num,
self.current_file.clone().unwrap(),
Location {
idx: start.idx,
end_idx: end.idx, // end excees q
contents: end.contents,
},
at_bol,
has_space,
);
tok.other = TokenFields::Int(ty.clone(), c32 as i64);
return tok;
}
}
impl Token {
#[instrument(ret)]
fn convert_pp_int(&mut self) -> bool {
// TODO: functionality is different because p stops at loc.end_idx
// while the original code make look at addresses after loc.end_idx
let mut p = self.loc.as_bytes();
// Read a binary, octal, decimal, or hexadecimal number.
let mut base = 10;
if p.len() >= 2
&& p[..2].eq_ignore_ascii_case("0x".as_bytes())
&& unsafe { isxdigit(p[2] as i32) > 0 }
{
p = &p[2..];
base = 16;
} else if p.len() >= 2
&& p[..2].eq_ignore_ascii_case("0b".as_bytes())
&& (p[2] == b'0' || p[2] == b'1')
{
p = &p[2..];
base = 2;
} else if p[0] == b'0' {
base = 8;
}
let mut pint = &p[..];
let mut flag = false;
for i in 0..p.len() {
if p[i].eq_ignore_ascii_case(&b'l') || p[i].eq_ignore_ascii_case(&b'u') {
pint = &p[..i];
p = &p[i..];
flag = true;
break;
}
}
if !flag {
p = &p[p.len()..];
}
let (val, l, u) = match u64::from_str_radix(core::str::from_utf8(&pint).unwrap(), base) {
Ok(val_unsigned) => {
let val = val_unsigned as i64;
if p.starts_with("LLU".as_bytes())
|| p.starts_with("LLu".as_bytes())
|| p.starts_with("llU".as_bytes())
|| p.starts_with("llu".as_bytes())
|| p.starts_with("ULL".as_bytes())
|| p.starts_with("Ull".as_bytes())
|| p.starts_with("uLL".as_bytes())
|| p.starts_with("ull".as_bytes())
{
p = &p[3..];
(val, true, true)
} else if p.len() >= 2
&& (p[..2].eq_ignore_ascii_case("lu".as_bytes())
|| p[..2].eq_ignore_ascii_case("ul".as_bytes()))
{
p = &p[2..];
(val, true, true)
} else if p.starts_with("LL".as_bytes()) || p.starts_with("ll".as_bytes()) {
p = &p[2..];
(val, true, false)
} else if p.len() > 0 && p[0].eq_ignore_ascii_case(&b'l') {
p = &p[1..];
(val, true, false)
} else if p.len() > 0 && p[0].eq_ignore_ascii_case(&b'u') {
p = &p[1..];
(val, false, true)
} else {
(val, false, false)
}
}
Err(_) => {
return false;
}
};
if p.len() != 0 {
return false;
}
let ty: Type = {
if base == 10 {
if l & u {
TY_ULONG
} else if l {
TY_LONG
} else if u {
if val >> 32 != 0 {
TY_ULONG
} else {
TY_UINT
}
} else {
if val >> 31 != 0 {
TY_LONG
} else {
TY_INT
}
}
} else {
if l & u {
TY_ULONG
} else if l {
if val >> 63 != 0 {
TY_ULONG
} else {
TY_LONG
}
} else if u {
if val >> 32 != 0 {
TY_ULONG
} else {
TY_UINT
}
} else if val >> 63 != 0 {
TY_ULONG
} else if val >> 32 != 0 {
TY_LONG
} else if val >> 31 != 0 {
TY_UINT
} else {
TY_INT
}
}
};
self.kind = TokenKind::Num;
self.other = TokenFields::Int(ty, val);
return true;
}
// The definition of the numeric literal at the preprocessing stage
// is more relaxed than the definition of that at the later stages.
// In order to handle that, a numeric literal is tokenized as a
// "pp-number" token first and then converted to a regular number
// token after preprocessing.
//
// This function converts a pp-number token to a regular number token.
#[instrument(ret)]
fn convert_pp_number(&mut self) {
if self.convert_pp_int() {
return;
}
let (val, bytes_read) = unsafe {
let s = CString::new(self.loc.as_bytes()).unwrap();
let start = s.as_ptr();
let end: Cell<*mut c_char> = Cell::new(std::ptr::null_mut());
let val = strtod(s.as_ptr(), end.as_ptr());
let len = end.get().byte_offset_from(start);
(val, len as usize)
};
let mut end = &self.loc.as_bytes()[bytes_read..];
let (ty, val) = {
if end.len() > 0 && end[0].eq_ignore_ascii_case(&b'f') {
end = &end[1..];
(TY_FLOAT, val)
} else if end.len() > 0 && end[0].eq_ignore_ascii_case(&b'l') {
end = &end[1..];
(TY_LDOUBLE, val)
} else {
(TY_DOUBLE, val)
}
};
if end.len() != 0 {
self.error("invalid numeric constant");
}
self.kind = TokenKind::Num;
self.other = TokenFields::Float(ty, val);
}
}
impl Tokenize {
// We take in a vector instead
#[instrument(skip_all)]
fn convert_pp_tokens(tokens: &mut Vec<Token>) {
for t in tokens {
if t.is_keyword() {
t.kind = TokenKind::Keyword;
} else if t.kind == TokenKind::PPNum {
t.convert_pp_number();
}
}
}
// Initialize line info for all tokens.
//
// Note: we take a vector, and assume the tokens are sorted.
#[instrument(skip_all)]
fn add_line_number(&self, tokens: &mut Vec<Token>) {
let p: &[u8] = &self.current_file.as_ref().unwrap().borrow().contents;
let mut n = 1;
let mut token_idx = 0;
for (i, c) in p.into_iter().enumerate() {
if token_idx == tokens.len() {
// assert_eq!(*c, b'\n');
continue;
}
if i == tokens[token_idx].loc.idx {
tokens[token_idx].line_no = n;
token_idx += 1;
}
if *c == b'\n' {
n += 1;
}
}
}
}
#[cfg(test)]
mod test_read {
use std::{cell::RefCell, rc::Rc};
use super::{File, Location, Tokenize};
#[test]
fn test_read_escaped_character() {
let cases = [
("044", 36), // octet
("xFF", 255), // hex
("\n", '\n' as u32),
("u", b'u' as u32), //
];
let t = fake_tokenize();
for (s, e) in cases {
let mut loc = Location {
idx: 0,
end_idx: s.len(),
contents: Rc::new(s.into()),
};
let a = t.read_escaped_char(&mut loc);
assert_eq!(loc.idx, loc.contents.len());
assert_eq!(a, e);
}
}
fn fake_tokenize() -> Tokenize {
let fake_file = File {
name: "fake".into(),
file_no: -1,
contents: Rc::new("fake".into()),
display_name: "fake".into(),
line_delta: 0,
};
let t = Tokenize {
current_file: Some(Rc::new(RefCell::new(fake_file))),
input_files: Vec::new(),
};
t
}
}
impl Tokenize {
#[instrument(ret)]
fn tokenize_string_literal(
&self,
tok: &Token,
basety: &Type,
at_bol: bool,
has_space: bool,
) -> Token {
let t = if basety.size == 2 {
self.read_utf16_string_literal(&tok.loc, &tok.loc, at_bol, has_space)
} else {
self.read_utf32_string_literal(&tok.loc, &tok.loc, basety.clone(), at_bol, has_space)
};
return t;
}
#[instrument(ret)]
fn tokenize(&mut self, file: Rc<RefCell<File>>) -> TokenList {
self.current_file = Some(file.clone());
let mut loc = Location {
idx: 0,
end_idx: file.borrow().contents.len(),
contents: file.borrow().contents.clone(),
};
let mut tokens: Vec<Token> = Vec::new();
let mut at_bol = true;
let mut has_space: bool = false;
while loc.idx < loc.end_idx {
// Skip line comments.
if loc.as_bytes().starts_with("//".as_bytes()) {
loc.idx += 2;
while loc[0] != b'\n' {
loc.idx += 1;
}
has_space = true;
}
// Skip block comments
else if loc.as_bytes().starts_with("/*".as_bytes()) {
#[instrument(ret, level = "debug")]
fn strstr(s: &[u8], m: &[u8]) -> Option<usize> {
for i in 0..s.len() - 1 {
let mut mf = true;
for j in 0..m.len() {
if s[i + j] != m[j] {
mf = false;
break;
}
}
if mf {
return Some(i);
}
}
return None;
}
let q = strstr(&loc.as_bytes()[2..], "*/".as_bytes());
match q {
Some(i) => {
loc.idx += 2 + i;
loc.idx += 2;
has_space = true
}
None => {
self.error(&loc, "unclosed block comment");
return TokenList {
tokens: Vec::new(),
idx: 0,
};
}
}
}
// Skip newline.
else if loc[0] == b'\n' {
loc.idx += 1;
at_bol = true;
has_space = false;
}
// Skip whitespace characters.
else if loc[0].is_ascii_whitespace() {
loc.idx += 1;
has_space = true;
} else {
let at_bol_copy = at_bol;
let has_space_copy = has_space;
let mut push_token = |tok| {
tokens.push(tok);
at_bol = false;
has_space = false;
};
// Numeric literal
if loc[0].is_ascii_digit() || (loc[0] == b'.' && loc[1].is_ascii_digit()) {
let start: usize = loc.idx;
loc.idx += 1;
loop {
if loc.len() >= 2
&& loc[0] != 0
&& loc[1] != 0
&& "eEpP".as_bytes().contains(&loc[0])
&& "+-".as_bytes().contains(&loc[1])
{
loc.idx += 2;
} else if loc.len() > 0
&& (loc[0].is_ascii_alphanumeric() || loc[0] == b'.')
{
loc.idx += 1;
} else {
break;
}
}
push_token(Token::new(
TokenKind::PPNum,
self.current_file.clone().unwrap(),
Location {
idx: start,
end_idx: loc.idx,
contents: loc.contents.clone(),
},
at_bol_copy,
has_space_copy,
));
}
// String literal
else if loc[0] == b'"' {
let tok = self.read_string_literal(&loc, &loc, at_bol_copy, has_space_copy);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// UTF-8 string literal
else if loc.as_bytes().starts_with("u8\"".as_bytes()) {
let mut quote = loc.clone();
quote.idx += 2;
let tok = self.read_string_literal(&loc, "e, at_bol_copy, has_space_copy);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// UTF-16 string literal
else if loc.as_bytes().starts_with("u\"".as_bytes()) {
let mut quote = loc.clone();
quote.idx += 1;
let tok =
self.read_utf16_string_literal(&loc, "e, at_bol_copy, has_space_copy);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// Wide string literal
else if loc.as_bytes().starts_with("L\"".as_bytes()) {
let mut quote = loc.clone();
quote.idx += 1;
let tok = self.read_utf32_string_literal(
&loc,
"e,
TY_INT,
at_bol_copy,
has_space_copy,
);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// UTF-32 string literal
else if loc.as_bytes().starts_with("U\"".as_bytes()) {
let mut quote = loc.clone();
quote.idx += 1;
let tok = self.read_utf32_string_literal(
&loc,
"e,
TY_UINT,
at_bol_copy,
has_space_copy,
);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// Character literal
else if loc[0] == b'\'' {
let tok =
self.read_char_literal(&loc, &loc, &TY_INT, at_bol_copy, has_space_copy);
// TODO: what is the meaning of cur->val = (char) cur->val;
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// UTF16- Character literal
else if loc.as_bytes().starts_with("u'".as_bytes()) {
let mut quote = loc.clone();
quote.idx += 1;
let tok = self.read_char_literal(
&loc,
"e,
&TY_USHORT,
at_bol_copy,
has_space_copy,
);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// Wide character literal
else if loc.as_bytes().starts_with("L'".as_bytes()) {
let mut quote = loc.clone();
quote.idx += 1;
let tok =
self.read_char_literal(&loc, "e, &TY_INT, at_bol_copy, has_space_copy);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// UTF-32 character literal
else if loc.as_bytes().starts_with("U'".as_bytes()) {
let mut quote = loc.clone();
quote.idx += 1;
let tok: Token =
self.read_char_literal(&loc, "e, &TY_UINT, at_bol_copy, has_space_copy);
loc.idx = tok.loc.end_idx;
push_token(tok);
}
// Identifier or keyword
else {
let ident_len = read_ident(&loc.as_bytes());
if ident_len > 0 {
let tok = Token::new(
TokenKind::Ident,
self.current_file.clone().unwrap(),
Location {
idx: loc.idx,
end_idx: loc.idx + ident_len,
contents: loc.contents.clone(),
},
at_bol_copy,
has_space_copy,
);
loc.idx = tok.loc.end_idx;
push_token(tok);
continue;
}
let punct_len = read_punct(loc.as_bytes());
if punct_len > 0 {
let tok = Token::new(
TokenKind::Punct,
self.current_file.clone().unwrap(),
Location {
idx: loc.idx,
end_idx: loc.idx + punct_len,
contents: loc.contents.clone(),
},
at_bol_copy,
has_space_copy,
);
loc.idx = tok.loc.end_idx;
push_token(tok);
continue;
}
self.error(&loc, "invalid token");
}
}
}
self.add_line_number(&mut tokens);
return TokenList {
tokens: tokens,
idx: 0,
};
}
}
#[instrument]
fn read_file(path: &str) -> Option<Vec<u8>> {
let mut fp: Box<dyn Read>;
if path == "-" {
fp = Box::new(stdin());
} else {
match std::fs::File::open(path) {
Ok(bf) => {
if bf.metadata().unwrap().is_dir() {
return None;
}
fp = Box::new(bf);
}
Err(_) => {
return None;
}
}
}
let mut buf = Vec::new();
// Read the entire file.
loop {
let mut buf2: [u8; 4096] = [0; 4096];
let n = fp.read(&mut buf2).unwrap();
if n == 0 {
break;
}
buf.extend_from_slice(&buf2[..n]);
}
// Make sure that the last line is properly terminated with '\n'.
if buf.len() == 0 || *buf.last().unwrap() != b'\n' {
buf.push(b'\n');
}
return Some(buf);
}
impl Tokenize {
#[instrument(ret)]
fn get_input_files(&self) -> Vec<Rc<RefCell<File>>> {
return self.input_files.clone();
}
}
impl File {
#[instrument(ret, skip(contents))]
fn new(name: String, file_no: i32, contents: Vec<u8>) -> Self {
let display_name = name.clone();
return File {
name,
file_no,
contents: Rc::new(contents),
display_name: display_name,
line_delta: 0,
};
}
}
// Replaces \r or \r\n with \n.
#[instrument(skip_all)]
fn canonicalize_newline(p: &mut Vec<u8>) {
let mut i = 0;
let mut j = 0;
while i < p.len() {
if p[i] == b'\r' && p[i + 1] == b'\n' {
// TODO: unsafe
p[j] = b'\n';
i += 2;
j += 1;
} else if p[i] == b'\r' {
p[j] = b'\n';
i += 1;
j += 1;
} else {
p[j] = p[i];
i += 1;
j += 1;
}
}
p.truncate(j);
}
// Removes backslashes followed by a newline.
#[instrument(skip_all)]
fn remove_backslash_newline(p: &mut Vec<u8>) {
let mut i = 0;
let mut j = 0;
// We want to keep the number of newline characters so that
// the logical line number matches the physical one.
// This counter maintains the number of newlines we have removed.
let mut n = 0;
while i < p.len() {
if p[i] == b'\\' && p[i + 1] == b'\n' {
i += 2;
n += 1;
} else if p[i] == b'\n' {
p[j] = b'\n';
i += 1;
j += 1;
while n > 0 {
p[j] = b'\n';
j += 1;
n -= 1;
}
} else {
p[j] = p[i];
i += 1;
j += 1;
}
}
while n > 0 {
p[j] = b'\n';
j += 1;
n -= 1;
}
p.truncate(j);
}
#[instrument(ret, level = "debug")]
fn read_universal_char(p: &[u8], len: usize) -> u32 {
let mut c: u32 = 0;
for i in 0..len {
if unsafe { isxdigit(p[i] as i32) == 0 } {
return 0;
}
c = (c << 4) | (from_hex(p[i]) as u32);
}
return c;
}
// Replace \u or \U escape sequences with corresponding UTF-8 bytes
#[instrument(skip_all)]
fn convert_universal_chars(p: &mut Vec<u8>) {
let mut i = 0;
let mut j = 0;
while i < p.len() {
if p[i..].starts_with("\\u".as_bytes()) {
let c = read_universal_char(&p[i + 2..], 4);
if c > 0 {
i += 6;
j += encode_utf8(&mut p[j..], c);
} else {
p[j] = p[i];
i += 1;
j += 1;
}
} else if p[i..].starts_with("\\U".as_bytes()) {
let c = read_universal_char(&p[i + 2..], 8);
if c > 0 {
i += 10;
j += encode_utf8(&mut p[j..], c);
} else {
p[j] = p[i];
i += 10;
j += 10;
}
} else if p[0] == b'\\' {
p[j] = p[i];
i += 1;
j += 1;
p[j] = p[i];
i += 1;
j += 1;
} else {
p[j] = p[i];
i += 1;
j += 1;
}
}
p.truncate(j);
}
impl Tokenize {
#[instrument]
pub fn tokenize_file(&mut self, path: &str) -> Option<TokenList> {
let op = read_file(path);
if op.is_none() {
return None;
}
let mut contents = op.unwrap();
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
// If exists, just skip them because they are useless bytes.
// (It is actually not recommended to add BOM markers to UTF-8
// texts, but it's not uncommon particularly on Windows.)
if contents[..3].eq(&[b'\xef', b'\xbb', b'\xbf']) {
contents = contents[3..].into_iter().map(|x| *x).collect();
}
canonicalize_newline(&mut contents);
remove_backslash_newline(&mut contents);
convert_universal_chars(&mut contents);
// Save the filename for assembler .file directive.
let file = Rc::new(RefCell::new(File::new(
path.into(),
self.input_files.len() as i32,
contents,
)));
self.input_files.push(file.clone());
return Some(self.tokenize(file));
}
}
#[cfg(test)]
mod test_tokenize {
use std::{cell::RefCell, fmt::Write, rc::Rc};
use crate::chibicc::chibicc::{read_punct, setup_tracing};
use super::{File, Tokenize};
#[test]
fn test_read_punc() {
let s = r#"<= 0x7F) {
buf[0] ="#;
let len = read_punct(s.as_bytes());
assert_eq!(len, 2);
}
#[test]
fn test_tokenize() {
setup_tracing();
// Contents is a string that hits most branches
// of tokenize.
let contents = r#"
// Line Comment
/*
Block comment
*/
// Numbers
12345;
123.45;
10e+7;
0xFF;
0b11110000;
// String literals
strcmp("αβγ", "\u03B1\u03B2\u03B3");
strcmp("日本語", "\u65E5\u672C\u8A9E");
"u8\"a\"";
strcmp(u8"abc", "abc");
({ char16_t x[] = u"αβ"; x[1]; }))
memcmp(L"🍣", "c\363\001\0\0\0\0\0", 8);
sizeof(L"\xffzzz");
memcmp(U"abc", "a\0\0\0b\0\0\0c\0\0\0\0\0\0\0", 16);
memcmp(U"🍣", "c\363\001\0\0\0\0\0", 8);
// Char literals
L'\0';
L'a';
L'\xffffffff'>>31;
L'あ';
u'\0';
u'\xffff'>>15;
U'\xffffffff'>>31;
U'🍣';
// Identifier
var variablename123 = 1234;
// Punctuation
if (variablename123 == true) {
}
"#;
let expected_out = r#"Token(Punct, *)
Token(Punct, /)
Token(PPNum, 12345)
Token(Punct, ;)
Token(PPNum, 123.45)
Token(Punct, ;)
Token(PPNum, 10e+7)
Token(Punct, ;)
Token(PPNum, 0xFF)
Token(Punct, ;)
Token(PPNum, 0b11110000)
Token(Punct, ;)
Token(Ident, strcmp)
Token(Punct, ()
Token(Str, "αβγ")
Token(Punct, ,)
Token(Str, "\u03B1\u03B2\u03B3")
Token(Punct, ))
Token(Punct, ;)
Token(Ident, strcmp)
Token(Punct, ()
Token(Str, "日本語")
Token(Punct, ,)
Token(Str, "\u65E5\u672C\u8A9E")
Token(Punct, ))
Token(Punct, ;)
Token(Str, "u8\"a\"")
Token(Punct, ;)
Token(Ident, strcmp)
Token(Punct, ()
Token(Str, u8"abc")
Token(Punct, ,)
Token(Str, "abc")
Token(Punct, ))
Token(Punct, ;)
Token(Punct, ()
Token(Punct, {)
Token(Ident, char16_t)
Token(Ident, x)
Token(Punct, [)
Token(Punct, ])
Token(Punct, =)
Token(Str, u"αβ")
Token(Punct, ;)
Token(Ident, x)
Token(Punct, [)
Token(PPNum, 1)
Token(Punct, ])
Token(Punct, ;)
Token(Punct, })
Token(Punct, ))
Token(Punct, ))
Token(Ident, memcmp)
Token(Punct, ()
Token(Str, L"🍣")
Token(Punct, ,)
Token(Str, "c\363\001\0\0\0\0\0")
Token(Punct, ,)
Token(PPNum, 8)
Token(Punct, ))
Token(Punct, ;)
Token(Ident, sizeof)
Token(Punct, ()
Token(Str, L"\xffzzz")
Token(Punct, ))
Token(Punct, ;)
Token(Ident, memcmp)
Token(Punct, ()
Token(Str, U"abc")
Token(Punct, ,)
Token(Str, "a\0\0\0b\0\0\0c\0\0\0\0\0\0\0")
Token(Punct, ,)
Token(PPNum, 16)
Token(Punct, ))
Token(Punct, ;)
Token(Ident, memcmp)
Token(Punct, ()
Token(Str, U"🍣")
Token(Punct, ,)
Token(Str, "c\363\001\0\0\0\0\0")
Token(Punct, ,)
Token(PPNum, 8)
Token(Punct, ))
Token(Punct, ;)
Token(Num, L'\0')
Token(Punct, ;)
Token(Num, L'a')
Token(Punct, ;)
Token(Num, L'\xffffffff')
Token(Punct, >>)
Token(PPNum, 31)
Token(Punct, ;)
Token(Num, L'あ')
Token(Punct, ;)
Token(Num, u'\0')
Token(Punct, ;)
Token(Num, u'\xffff')
Token(Punct, >>)
Token(PPNum, 15)
Token(Punct, ;)
Token(Num, U'\xffffffff')
Token(Punct, >>)
Token(PPNum, 31)
Token(Punct, ;)
Token(Num, U'🍣')
Token(Punct, ;)
Token(Ident, var)
Token(Ident, variablename123)
Token(Punct, =)
Token(PPNum, 1234)
Token(Punct, ;)
Token(Ident, if)
Token(Punct, ()
Token(Ident, variablename123)
Token(Punct, ==)
Token(Ident, true)
Token(Punct, ))
Token(Punct, {)
Token(Punct, })
Token(EOF, )
"#;
let f = Rc::new(RefCell::new(File {
name: "test".into(),
file_no: 0,
contents: Rc::new(Vec::from(contents.as_bytes())),
display_name: "test".into(),
line_delta: 0,
}));
let mut t = Tokenize {
current_file: Some(f.clone()),
input_files: Vec::from([f.clone()]),
};
let v = t.tokenize(f);
let mut actual_out = String::new();
for t in v.tokens {
actual_out.write_str(t.to_string().as_str());
actual_out.write_char('\n');
}
println!("{}", actual_out);
{
let expected_lines: Vec<&str> = expected_out.split("\n").collect();
let actual_lines: Vec<&str> = actual_out.split("\n").collect();
let mut fail = false;
for (i, line) in expected_lines.iter().enumerate() {
if i >= actual_lines.len() {
break;
}
if *line != actual_lines[i] {
fail = true;
println!(
"line not equal: i={}\n\texpected={}\n\tactual={}",
i, line, actual_lines[i]
);
}
}
if expected_lines.len() != actual_lines.len() {
fail = true;
println!(
"len(expected_lines) != len(actual_lines): {} {}",
expected_lines.len(),
actual_lines.len()
);
}
assert!(!fail);
}
}
}
//
// preprocess.c
//
// The preprocessor takes a list of tokens as an input and returns a
// new list of tokens as an output.
//
// The preprocessing language is designed in such a way that there's
// guarnteed to stop even if there is a recursive macro.
// Informally speaking, a macro is applied only once for each token.
// That is, if a macro token T appears in a result of direct or
// indirect macro expansion of T, T won't be expanded any further.
// For example, if T is defined as U, and U is defined as T, then
// token T is expanded to U and athen to T and the macro expansion
// stops at that point.
//
// To achieve the above behavior, we attach for each token a set of
// macro names from which the token is expanded. The set is called
// "hideset". Hideset is initially empty, and every time we expand a
// macro, the macro name is added to the resulting tokens' hideset.
//
// The above macro expansion algorithm is explained in this document
// written by Dave Prossor, which is used as a bais for the standard's
// wording.
// https://github.com/rui314/chibicc/wiki/cpp.algo.pdf
#[derive(Debug)]
struct MacroParam {
name: String,
}
#[derive(Clone, Debug)]
struct MacroArg {
name: String,
is_va_args: bool,
tok: Rc<Vec<Token>>,
}
type MacroHandlerFn = fn(&Token, &mut Preprocess) -> Token;
#[derive(Debug)]
struct Macro {
name: String,
is_objlike: bool, // Object-like or function-like
params: Vec<MacroParam>,
va_args_name: Option<String>,
body: Option<Rc<TokenList>>,
handler: Option<MacroHandlerFn>,
}
// `#if` can be nested, so we use a stack to managed nested `#if`s.
#[derive(Debug, PartialEq)]
enum CondInclCtx {
InThen,
InElif,
InElse,
}
#[derive(Debug)]
struct CondIncl {
ctx: CondInclCtx,
tok: Rc<Token>,
included: bool,
}
pub struct Preprocess {
macros: HashMap<String, Rc<Macro>>,
cond_incl: Vec<Rc<RefCell<CondIncl>>>,
pragma_once: HashSet<String>,
include_paths: Vec<String>,
include_next_idx: usize,
include_cache: HashMap<String, String>,
include_guards: HashMap<String, String>,
counter_macro_idx: i32,
base_file: String,
}
impl std::fmt::Debug for Preprocess {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Preprocess")
.field("macros.key()", &self.macros.keys())
.field(
"cond_incl",
&self
.cond_incl
.iter()
.map(|c| format!("[{}, {:?}]", c.borrow().tok, c.borrow().ctx))
.collect::<Vec<String>>(),
)
.field("pragma_once", &self.pragma_once)
.field("include_paths", &self.include_paths)
.field("include_next_idx", &self.include_next_idx)
.field("include_cache", &self.include_cache)
.field("include_guards", &self.include_guards)
.field("counter_macro_idx", &self.counter_macro_idx)
.field("base_file", &self.base_file)
.finish()
}
}
impl Preprocess {
pub fn new(include_paths: Vec<String>) -> Preprocess {
return Preprocess {
macros: HashMap::new(),
cond_incl: Vec::new(),
pragma_once: HashSet::new(),
include_paths: include_paths,
include_next_idx: 0,
include_cache: HashMap::new(),
include_guards: HashMap::new(),
counter_macro_idx: 0,
base_file: String::new(),
};
}
}
impl Token {
#[instrument(ret, level=Level::DEBUG)]
fn equal(&self, op: &str) -> bool {
self.loc.as_bytes() == op.as_bytes()
}
#[instrument(ret, level=Level::DEBUG)]
fn is_hash(&self) -> bool {
return self.at_bol && self.equal("#");
}
}
#[derive(Clone)] // TODO: reduce clone() calls
pub struct TokenList {
tokens: Vec<Token>,
idx: usize,
}
impl std::fmt::Debug for TokenList {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TokenIterator")
.field("tokens[idx]", &self.tokens.get(self.idx))
.field("idx", &self.idx)
.field("len()", &self.len())
.finish()
}
}
impl Index<Range<usize>> for TokenList {
type Output = [Token];
fn index(&self, index: Range<usize>) -> &Self::Output {
let r: Range<usize> = Range {
start: index.start + self.idx,
end: index.end + self.idx,
};
return &self.tokens[r];
}
}
impl Index<RangeFull> for TokenList {
type Output = [Token];
fn index(&self, _: RangeFull) -> &Self::Output {
let len = self.tokens.len() - self.idx;
return &self[0..len];
}
}
impl Index<usize> for TokenList {
type Output = Token;
fn index(&self, index: usize) -> &Self::Output {
return &self.tokens[self.idx + index];
}
}
impl TokenList {
fn len(&self) -> usize {
return self.tokens.len() - self.idx;
}
}
impl std::ops::IndexMut<usize> for TokenList {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
return &mut self.tokens[self.idx + index];
}
}
impl TokenList {
#[instrument(ret)]
fn skip(&mut self, op: &str) {
let tok = &self[0];
if !tok.equal(op) {
tok.error(&format!("expected '{:?}'", op));
}
self.idx += 1;
}
#[instrument(ret)]
fn consume(&mut self, op: &str) -> bool {
let tok = &self[0];
if tok.equal(op) {
self.idx += 1;
return true;
}
return false;
}
// Some preprocessor directives such as #include allow extraneous
// tokens before newline. This function skips such tokens.
#[instrument(ret)]
fn skip_line(&mut self) {
if self.len() == 0 {
return;
}
let tok = &self[0];
if tok.at_bol {
return;
}
tok.warn("extra token"); // TODO: issues with nested macros?
while !self[0].at_bol {
self.idx += 1;
}
}
}
impl Hideset {
#[instrument(ret, level = Level::DEBUG)]
fn new(name: String) -> Self {
return Hideset { name };
}
}
#[instrument(ret, level=Level::DEBUG)]
fn hideset_union(hs1: &Vec<Hideset>, hs2: &Vec<Hideset>) -> Vec<Hideset> {
let mut r = hs1.clone();
r.extend_from_slice(&hs2);
return r;
}
#[instrument(ret, level=Level::DEBUG)]
fn hideset_contains(hs: &Vec<Hideset>, s: &str) -> bool {
for h in hs.iter() {
if h.name == s {
return true;
}
}
return false;
}
#[instrument(ret)]
fn hideset_intersection(hs1: &Vec<Hideset>, hs2: &Vec<Hideset>) -> Vec<Hideset> {
let mut r = Vec::new();
for x in hs1.iter() {
if hideset_contains(hs2, &x.name) {
r.push(Hideset::new(x.name.clone()))
}
}
return r;
}
impl TokenList {
#[instrument(ret)]
fn add_hideset(&mut self, hs: &Vec<Hideset>) {
for i in self.idx..self.tokens.len() {
self.tokens[i].hideset = hideset_union(&self.tokens[i].hideset, hs);
}
}
#[instrument(ret)]
fn append(&mut self, tok: &TokenList) {
self.tokens.extend_from_slice(&tok.tokens[tok.idx..]);
}
#[instrument(ret)]
fn skip_cond_incl2(&mut self) {
while self.len() > 0 {
if self[0].is_hash()
&& (self[1].equal("if") || self[1].equal("ifdef") || self[1].equal("ifndef"))
{
self.idx += 2;
self.skip_cond_incl2();
continue;
}
if self[0].is_hash() && self[1].equal("endif") {
self.idx += 2;
return;
}
self.idx += 1;
}
}
// Skip until next `#else`, `#elif`, or `#endif`.
// Nested `#if` and `#endif` are skipped.
#[instrument(ret)]
fn skip_cond_incl(&mut self) {
while self.len() > 0 {
if self[0].is_hash()
&& (self[1].equal("if") || self[1].equal("ifdef") || self[1].equal("ifndef"))
{
self.idx += 2;
self.skip_cond_incl2();
continue;
}
if self[0].is_hash()
&& (self[1].equal("elif") || self[1].equal("else") || self[1].equal("endif"))
{
break;
}
self.idx += 1;
}
}
}
#[instrument(ret)]
fn quote_string(s: &str) -> String {
let mut bufsize = 3;
for b in s.bytes() {
if b == b'\\' || b == b'"' {
bufsize += 1;
}
bufsize += 1;
}
let mut p = String::with_capacity(bufsize);
p.push('"');
for b in s.bytes() {
if b == b'\\' || b == b'"' {
p.push('\\');
}
p.push(b as char);
}
p.push('"');
return p;
}
impl Token {
#[instrument(ret)]
fn new_str_token(s: &str, tmpl: &Token) -> Token {
let buf = quote_string(s);
let mut t = Tokenize::new();
let f = File::new(
tmpl.file.borrow().name.clone(),
tmpl.file.borrow().file_no,
buf.into_bytes(),
);
let tokens: TokenList = t.tokenize(Rc::new(RefCell::new(f)));
assert!(tokens.tokens.len() == 1);
return tokens[0].clone();
}
}
impl TokenList {
#[instrument(ret)]
fn copy_line(&mut self) -> TokenList {
let mut r = Vec::new();
while !self[0].at_bol {
r.push(self[0].clone());
self.idx += 1;
}
return TokenList { tokens: r, idx: 0 };
}
}
impl Token {
#[instrument(ret)]
fn new_num_token(val: i32, tmpl: &Token) -> Token {
let buf = format!("{}\n", val);
let mut t = Tokenize::new();
let f = File::new(
tmpl.file.borrow().name.clone(),
tmpl.file.borrow().file_no,
buf.into_bytes(),
);
let tokens: TokenList = t.tokenize(Rc::new(RefCell::new(f)));
assert!(tokens.tokens.len() == 1);
return tokens[0].clone();
}
}
impl TokenList {
#[instrument(ret)]
fn read_const_expr(&mut self, pp: &Preprocess) -> TokenList {
let mut tok = self.copy_line();
let mut ret = Vec::new();
while tok.len() > 0 {
// "defined(foo)" or "defined foo" becomes "1" if macro "foo"
// is defined. Otherwise "0".
if tok[0].equal("defined") {
let start = tok[0].clone();
tok.idx += 1;
let has_paren = tok.consume("(");
if tok[0].kind != TokenKind::Ident {
start.error("macro name must be an identifier");
}
let m = pp.find_macro(&tok[0]);
tok.idx += 1;
if has_paren {
tok.skip(")");
}
ret.push(Token::new_num_token(
if m.is_some() { 1 } else { 0 },
&start,
));
continue;
}
ret.push(tok[0].clone());
tok.idx += 1;
}
return TokenList {
tokens: ret,
idx: 0,
};
}
#[instrument(ret)]
fn eval_const_expr(&mut self, pp: &mut Preprocess, tokenize: &mut Tokenize) -> i64 {
let start = self[0].clone();
self.idx += 1;
let mut expr = self.read_const_expr(pp);
expr = preprocess2(expr, pp, tokenize);
if expr.len() == 0 {
start.error("no expression");
}
// [https://www.sigbus.info/n1570#6.10.1p4] The standard requires
// we replace remaining non-macro identifiers with "0" before
// evaluating a constant expression. For example, `#if foo` is
// equivalent to `#if 0` if foo is not defined.
let expr_orig = expr.join(None);
{
let mut i: usize = 0;
while i < expr.len() {
if expr[i].kind == TokenKind::Ident {
expr[i] = Token::new_num_token(0, &expr[i]);
}
i += 1;
}
}
// Convert pp-numbers to regular numbers
Tokenize::convert_pp_tokens(&mut expr.tokens);
eprintln!(
"WARNING: if evaluation not implemented yet, returning 0: {} ||| {}",
expr_orig,
expr.join(None)
);
return 0;
// TODO: implement after implementing parse
// expr.const_expr
/*
Token *rest2;
long val = const_expr(&rest2, expr);
if (rest2->kind != TK_EOF)
error_tok(rest2, "extra token");
return val;
*/
todo!();
}
}
impl Preprocess {
#[instrument(ret)]
fn push_cond_incl(&mut self, tok: Rc<Token>, included: bool) -> Rc<RefCell<CondIncl>> {
let ci = CondIncl {
ctx: CondInclCtx::InThen,
tok: tok.clone(),
included,
};
let circ = Rc::new(RefCell::new(ci));
self.cond_incl.push(circ.clone());
return circ;
}
#[instrument(ret, level=Level::DEBUG)]
fn find_macro(&self, tok: &Token) -> Option<Rc<Macro>> {
if tok.kind != TokenKind::Ident {
return None;
}
return self.macros.get(tok.loc.as_str()).map(|x| x.clone());
}
#[instrument(ret, level=Level::DEBUG)]
fn add_macro(&mut self, m: Macro) -> Rc<Macro> {
let name = m.name.clone();
let mrc = Rc::new(m);
self.macros.insert(name, mrc.clone());
return mrc;
}
}
impl TokenList {
#[instrument(ret)]
fn read_macro_params(&mut self) -> (Vec<MacroParam>, Option<String>) {
let mut va_args_name = None;
let mut params = Vec::new();
while !self[0].equal(")") {
if params.len() != 0 {
self.skip(",");
}
if self[0].equal("...") {
va_args_name = Some("__VA_ARGS__".into());
self.idx += 1;
self.skip(")");
return (params, va_args_name);
}
if self[0].kind != TokenKind::Ident {
self[0].error("expected an identifier");
}
if self[1].equal("...") {
va_args_name = Some(String::from(self[0].loc.as_str()));
self.idx += 2;
self.skip(")");
return (params, va_args_name);
}
let m = MacroParam {
name: self[0].loc.as_str().into(),
};
params.push(m);
self.idx += 1;
}
self.idx += 1;
return (params, va_args_name);
}
#[instrument(ret)]
fn read_macro_definition(&mut self, pp: &mut Preprocess) {
if self[0].kind != TokenKind::Ident {
self[0].error("macro name must be an identifier");
}
let name = String::from(self[0].loc.as_str());
self.idx += 1;
if !self[0].has_space && self[0].equal("(") {
// Function-like macro
self.idx += 1;
let (params, va_args_name) = self.read_macro_params();
let tokens = self.copy_line();
let m = Macro {
name,
is_objlike: false,
params,
va_args_name,
body: Some(Rc::new(tokens)),
handler: None,
};
pp.add_macro(m);
} else {
let tokens = self.copy_line();
let m = Macro {
name,
is_objlike: true,
params: Vec::new(),
va_args_name: None,
body: Some(Rc::new(tokens)),
handler: None,
};
pp.add_macro(m);
}
}
#[instrument(ret)]
fn read_macro_arg_one(&mut self, read_rest: bool) -> MacroArg {
let mut tokens = Vec::new();
let mut level = 0;
loop {
if level == 0 && self[0].equal(")") {
break;
}
if level == 0 && !read_rest && self[0].equal(",") {
break;
}
if self.len() == 0 {
self[0].error("premature end of input");
}
if self[0].equal("(") {
level += 1;
} else if self[0].equal(")") {
level -= 1;
}
tokens.push(self[0].clone());
self.idx += 1;
}
let arg = MacroArg {
name: String::new(),
is_va_args: false,
tok: Rc::new(tokens),
};
return arg;
}
#[instrument(ret)]
fn read_macro_args(
&mut self,
params: &Vec<MacroParam>,
va_args_name: Option<String>,
) -> Vec<MacroArg> {
self.idx += 2;
let mut args = Vec::new();
for p in params {
if args.len() != 0 {
self.skip(",");
}
let mut a = self.read_macro_arg_one(false);
a.name = p.name.clone();
args.push(a);
}
if let Some(va_args_name) = va_args_name {
let mut arg = if self[0].equal(")") {
MacroArg {
name: String::new(),
is_va_args: true,
tok: Rc::new(vec![]),
}
} else {
if args.len() > 0 {
self.skip(",");
}
self.read_macro_arg_one(true)
};
arg.name = va_args_name;
arg.is_va_args = true;
args.push(arg);
}
self.skip(")");
self.idx -= 1;
return args;
}
}
#[instrument(ret, level=Level::DEBUG)]
fn find_arg(args: &Vec<MacroArg>, tok: &Token) -> Option<MacroArg> {
for a in args {
if a.name == String::from(tok.loc.as_str()) {
return Some(a.clone());
}
}
return None;
}
impl TokenList {
// Concatenates all tokens in `self` and returns a new string
#[instrument(ret)]
fn join(&self, end: Option<&Token>) -> String {
let mut len = 0;
for t in &self[..] {
if end.is_some() && t.loc.idx == end.unwrap().loc.idx {
break;
}
if len == 0 || t.has_space {
len += 1;
}
len += t.loc.len();
}
let mut buf = String::with_capacity(len);
// Copy token texts.
for t in &self[..] {
if end.is_some() && t.loc.idx == end.unwrap().loc.idx {
break;
}
if buf.len() > 0 && t.has_space {
buf.push(' ');
}
buf.push_str(t.loc.as_str());
}
return buf;
}
// Concatenate all tokens in `self` and return a new string token.
// This function is used for the stringizing operator (#).
#[instrument(ret)]
fn stringize(&self, hash: &Token) -> Token {
// Create a new string token. We need to set some value for
// its source location for error reporting function, so we use
// a macro name token as a template.
let s = self.join(None);
return Token::new_str_token(&s, hash);
}
}
impl Token {
#[instrument(ret)]
fn paste(&self, other: &Token) -> Token {
let buf = format!("{}{}", self.loc.as_str(), other.loc.as_str());
let mut t = Tokenize::new();
let f = File::new(
self.file.borrow().name.clone(),
self.file.borrow().file_no,
buf.clone().into_bytes(),
);
let tokens = t.tokenize(Rc::new(RefCell::new(f)));
if tokens.tokens.len() != 1 {
self.error(format!("pasting forms '{}', an invalid token", buf).as_str());
}
return tokens[0].clone();
}
}
#[instrument(ret)]
fn has_varargs(args: &Vec<MacroArg>) -> bool {
for ap in args {
if ap.name == "__VA_ARGS" {
return ap.tok.len() > 0;
}
}
return false;
}
// Replace func-like macro params with given arguments.
impl TokenList {
#[instrument(ret)]
fn subst(
mut self,
args: &Vec<MacroArg>,
pp: &mut Preprocess,
tokenize: &mut Tokenize,
) -> TokenList {
let mut v = Vec::new();
while self.len() > 0 {
let tok = &self[0];
if tok.equal("#") {
let arg = find_arg(args, &self[1]);
if arg.is_none() {
self[1].error("'#' is not followed by a macro parameter");
}
let argtl: TokenList = TokenList {
tokens: arg.unwrap().tok.to_vec(),
idx: 0,
};
v.push(argtl.stringize(&self[0]));
self.idx += 2;
continue;
}
// [GNU] If __VA_ARG__ is empty, `,##__VA_ARGS__` is expanded
// to the empty token list. Otherwise, it's expanded to `,` and
// __VA_ARGS.
if tok.equal(",") && self[1].equal("##") {
let arg = find_arg(args, &self[2]);
if arg.is_some() {
if let Some(arg) = arg {
if arg.tok.len() == 0 {
self.idx += 3;
} else {
v.push(tok.clone());
self.idx += 2;
}
continue;
}
}
}
if tok.equal("##") {
if v.len() == 0 {
tok.error("'##' cannot appear at the start of macro expansion");
}
if self.len() == 1 {
tok.error("'##' cannot appear at the end of macro expansion");
}
let arg = find_arg(args, &self[1]);
if let Some(a) = arg {
if a.tok.len() > 0 {
let last = v.last_mut().unwrap();
*last = last.paste(&a.tok[0]);
for t in &a.tok[1..] {
v.push(t.clone());
}
}
self.idx += 2;
continue;
}
let last = v.last_mut().unwrap();
*last = last.paste(&self[1]);
self.idx += 2;
continue;
}
let arg = find_arg(args, &tok);
if arg.is_some() && self.len() > 1 && self[1].equal("##") {
let rhs = &self[2];
let arg = arg.unwrap();
if arg.tok.len() == 0 {
let arg2 = find_arg(args, rhs);
if let Some(arg2) = arg2 {
for t in arg2.tok.iter() {
v.push(t.clone());
}
} else {
v.push(rhs.clone());
}
self.idx += 3;
continue;
}
for t in arg.tok.iter() {
v.push(t.clone());
}
self.idx += 1;
continue;
}
// If __VA_ARG is empty, __VA_OPT(x) is expanded to the
// empty token list. Otherwise, __VA_OPT__(x) is expanded to x.
if tok.equal("__VA_OPT__") && self[1].equal("(") {
self.idx += 2;
let arg = self.read_macro_arg_one(true);
if has_varargs(args) {
for t in arg.tok.iter() {
v.push(t.clone());
}
}
//
self.skip(")"); // idx += 1;
continue;
}
// Handle a macro token. Macro arguments are completely macro-expanded
// before they are substituted into a macro body.
if let Some(a) = arg {
let mut t: TokenList = preprocess2(
TokenList {
tokens: a.tok.to_vec(),
idx: 0,
},
pp,
tokenize,
);
if t.len() > 0 {
t[0].at_bol = tok.at_bol;
t[0].has_space = tok.has_space;
for tok in t.tokens {
v.push(tok);
}
}
self.idx += 1;
continue;
}
// Handle a non-macro token
v.push(tok.clone());
self.idx += 1;
continue;
}
return TokenList { tokens: v, idx: 0 };
}
// If tok is a macro, expand it and return true.
// Otherwise, do nothing and return false.
#[instrument(ret)]
fn expand_macro(mut self, pp: &mut Preprocess, tokenize: &mut Tokenize) -> (TokenList, bool) {
if hideset_contains(&self[0].hideset, self[0].loc.as_str()) {
return (self, false);
}
let m = pp.find_macro(&self[0]);
if m.is_none() {
return (self, false);
}
let m = m.unwrap();
// Built-in dynamic macro application such as __LINE__
if let Some(h) = m.handler {
let t: Token = h(&self[0], pp);
let mut tokens = TokenList {
tokens: vec![t],
idx: 0,
};
self.idx += 1;
tokens.append(&self);
return (tokens, true);
}
// Object-like macro application
if m.is_objlike {
let hs = hideset_union(&self[0].hideset, &vec![Hideset::new(m.name.clone())]);
let start_tok: Token = self[0].clone();
let mut body = m.body.as_ref().unwrap().as_ref().clone();
body.add_hideset(&hs); // TODO: should hideset be added to the macro body?
for t in body.tokens.iter_mut() {
t.origin = Some(Rc::new(start_tok.clone()))
}
self.idx += 1;
body.append(&self);
body[0].at_bol = start_tok.at_bol;
body[0].has_space = start_tok.has_space;
return (body, true);
}
// If a funclike macro token is not followed by an argument list,
// treat it as a normal identifier.
if self.len() == 1 || !self[1].equal("(") {
return (self, false);
}
// Function-like macro application
let macro_token = Rc::new(self[0].clone());
let args = self.read_macro_args(&m.params, m.va_args_name.clone());
let r_paren = self[0].clone();
// Tokens that consist of a func-like macro invocation may have different
// hidsets, and if that's the case, it's not clear what the hideset
// for the new tokens should be. We take the intersection of the
// macro token and the closing parenthesis and use it as a new hideset
// as explained in the Dave Prossor's algorithm.
let mut hs = hideset_intersection(¯o_token.hideset, &r_paren.hideset);
hs = hideset_union(&hs, &vec![Hideset::new(m.name.clone())]);
let mut body = m.body.clone().unwrap().as_ref().clone();
body = body.subst(&args, pp, tokenize);
body.add_hideset(&hs);
for t in body.tokens.iter_mut() {
t.origin = Some(macro_token.clone());
}
body.append(&self);
body[0].at_bol = macro_token.at_bol;
body[0].has_space = macro_token.has_space;
return (body, true);
}
}
impl Preprocess {
#[instrument(ret)]
fn search_include_paths(&mut self, filename: &str) -> Option<String> {
if filename.starts_with("/") {
return Some(String::from(filename));
}
let cached = self.include_cache.get(filename);
if let Some(c) = cached {
return Some(c.clone());
}
// Search a file from the include paths.
for (i, p) in self.include_paths.iter().enumerate() {
let path = format!("{}/{}", p, filename);
if !Path::new(&path).exists() {
continue;
}
self.include_cache
.insert(String::from(filename), path.clone());
self.include_next_idx = i + 1;
return Some(path);
}
return None;
}
#[instrument(ret)]
fn search_include_next(&mut self, filename: &str) -> Option<String> {
for p in &self.include_paths[self.include_next_idx..] {
let path = format!("{}/{}", p, filename);
if Path::new(&path).exists() {
return Some(path);
}
self.include_next_idx += 1;
}
return None;
}
}
impl TokenList {
// Read an # include argument
#[instrument(ret)]
fn read_include_filename(
&mut self,
pp: &mut Preprocess,
tokenize: &mut Tokenize,
) -> (Vec<u8>, bool) {
// Pattern 1: #include "foo.h"
if self[0].kind == TokenKind::Str {
// A double-quoted filename for #include is a special kind of
// token, and we don't want to interpret any escape sequences in it.
// For example, "\f" in "C:\foo" is not a formfeed character but
// just two non-control characters, backslash and f.
// So, we don't want to use token->str.
let s = Vec::from(&self[0].loc.contents[self[0].loc.idx + 1..self[0].loc.end_idx - 1]);
self.idx += 1;
self.skip_line();
return (s, true);
}
// Pattern 2: #include <foo.h>
if self[0].equal("<") {
// Reconstruct a filename from a sequence of tokens between
// "<" and ">";
let mut i = 0;
while i <= self.len() && !self[i].equal(">") {
if i == self.len() || self[i].at_bol {
self[i].error("expected '>'")
}
i += 1;
}
let end = self[i].clone();
self.idx += 1;
let s = self.join(Some(&end));
self.idx += i;
self.skip_line();
return (s.into_bytes(), false);
}
// Pattern 3: #include FOO
// In this case FOO must be macro-expanded to either
// a single string token or a sequence of "<" ... ">".
if self[0].kind == TokenKind::Ident {
let cl = self.copy_line();
let mut tok2 = preprocess2(cl, pp, tokenize);
return tok2.read_include_filename(pp, tokenize);
}
self[0].error("expected a filename");
panic!("unreachable");
}
#[instrument(ret)]
fn detect_include_guard(&self) -> Option<Vec<u8>> {
let mut t = self.clone(); // TODO: expensive
// Detect the first two lines.
if !t[0].is_hash() || !t[1].equal("ifdef") {
return None;
}
t.idx += 2;
if t[0].kind != TokenKind::Ident {
return None;
}
let m = String::from(t[0].loc.as_str());
t.idx += 1;
if !t[0].is_hash() || !t[1].equal("define") || !t[2].equal(&m) {
return None;
}
while t.len() > 0 {
if !t[0].is_hash() {
t.idx += 1;
continue;
}
if t[1].equal("endif") && t.len() == 1 {
return Some(m.into_bytes());
}
if t[0].equal("if") || t[0].equal("ifdef") || t[0].equal("ifndef") {
t.idx += 1;
t.skip_cond_incl();
} else {
t.idx += 1;
}
}
return None;
}
#[instrument(ret)]
fn include_file(
self,
path: &str,
filename_tok: &Token,
pp: &mut Preprocess,
t: &mut Tokenize,
) -> TokenList {
// Check for "#pragma once"
if pp.pragma_once.contains(path) {
return self;
}
// If we read the same file before, and if the file was guarded
// by the usual #ifndef ... #endif pattern, we may be able to
// skip the file without opening it.
let guard_name = pp.include_guards.get(path);
if guard_name.is_some() && pp.macros.get(guard_name.unwrap()).is_some() {
return self;
}
let tok2 = t.tokenize_file(path);
if tok2.is_none() {
// TODO: errorno?
filename_tok.error(format!("{}: cannot open file", path).as_str())
}
let guard_name = self.detect_include_guard();
if let Some(guard_name) = guard_name {
pp.include_guards
.insert(String::from(path), String::from_utf8(guard_name).unwrap());
}
let mut tok2 = tok2.unwrap();
tok2.append(&self);
return tok2;
}
#[instrument(ret)]
fn read_line_marker(&mut self, pp: &mut Preprocess, tokenize: &mut Tokenize) {
let start_idx = self.idx;
let cl: TokenList = self.copy_line();
let mut tok = pp.preprocess(cl, tokenize);
if let TokenFields::Int(ty, val) = &tok[0].other {
assert_eq!(tok[0].kind, TokenKind::Num);
assert!(matches!(ty.kind, TypeKind::Int));
let mut f = self.tokens[start_idx].file.borrow_mut();
f.line_delta = *val as i32 - self.tokens[start_idx].line_no;
tok.idx += 1;
if tok.len() == 0 {
return;
}
if let TokenFields::Str(_, s) = &tok[0].other {
assert_eq!(tok[0].kind, TokenKind::Str);
f.display_name = String::from_utf8(Vec::from(s.as_ref())).unwrap();
} else {
tok[0].error("filename expected");
}
} else {
tok[0].error("invalid line marker");
}
}
}
#[instrument(ret)]
fn preprocess2(mut t: TokenList, pp: &mut Preprocess, tokenize: &mut Tokenize) -> TokenList {
let mut ret = Vec::new();
while t.len() > 0 {
// If it is a macro, expand it.
let ok: bool;
(t, ok) = t.expand_macro(pp, tokenize);
if ok {
continue;
}
// Pass through if it is not a "#".
if !t[0].is_hash() {
let line_delta = t[0].file.borrow().line_delta;
t[0].line_delta = line_delta;
let display_name = t[0].file.borrow().display_name.clone();
t[0].filename = Some(display_name);
ret.push(t[0].clone()); // TODO: avoid token clones?
t.idx += 1;
continue;
}
let start: Token = t[0].clone();
let start_next_next = t[2].clone();
t.idx += 1;
if t[0].equal("include") {
t.idx += 1;
let (filename, is_dquote) = t.read_include_filename(pp, tokenize);
let filename = String::from_utf8(filename).unwrap();
if !filename.starts_with("/") && is_dquote {
let mut dirname = PathBuf::from(&start.file.borrow().name);
dirname.pop();
let path = format!("{}/{}", dirname.to_str().unwrap(), filename);
if Path::new(&path).exists() {
t = t.include_file(&path, &start_next_next, pp, tokenize);
continue;
}
}
let path = pp.search_include_paths(&filename);
t = t.include_file(&path.unwrap_or(filename), &start_next_next, pp, tokenize);
continue;
}
if t[0].equal("include_next") {
t.idx += 1;
let (filename, _) = t.read_include_filename(pp, tokenize);
let filename = String::from_utf8(filename).unwrap();
let path = pp.search_include_next(&filename);
t = t.include_file(&path.unwrap_or(filename), &start_next_next, pp, tokenize);
continue;
}
if t[0].equal("define") {
t.idx += 1;
t.read_macro_definition(pp);
continue;
}
if t[0].equal("undef") {
t.idx += 1;
if t[0].kind != TokenKind::Ident {
t[0].error("macro name must be an identifier");
}
pp.undef_macro(t[0].loc.as_str());
t.idx += 1;
t.skip_line();
continue;
}
if t[0].equal("if") {
let val = t.eval_const_expr(pp, tokenize);
pp.push_cond_incl(Rc::new(start), val != 0); // why start?
if val == 0 {
t.skip_cond_incl();
}
continue;
}
if t[0].equal("ifdef") {
let defined = pp.find_macro(&t[1]).is_some();
pp.push_cond_incl(Rc::new(t[0].clone()), defined);
t.idx += 2;
t.skip_line();
if !defined {
t.skip_cond_incl();
}
continue;
}
if t[0].equal("ifndef") {
let defined = pp.find_macro(&t[1]).is_some();
pp.push_cond_incl(Rc::new(t[0].clone()), !defined);
t.idx += 2;
t.skip_line();
if defined {
t.skip_cond_incl();
}
continue;
}
if t[0].equal("elif") {
if pp.cond_incl.len() == 0
|| pp.cond_incl.last().unwrap().borrow().ctx == CondInclCtx::InElse
{
start.error("stray #elif");
}
{
let mut ci = pp.cond_incl.last().unwrap().borrow_mut();
ci.ctx = CondInclCtx::InElif;
}
let included = pp.cond_incl.last().unwrap().borrow().included;
if !included && t.eval_const_expr(pp, tokenize) > 0 {
let mut ci = pp.cond_incl.last().unwrap().borrow_mut();
ci.included = true;
} else {
t.skip_cond_incl();
}
continue;
}
if t[0].equal("else") {
if pp.cond_incl.is_empty()
|| pp.cond_incl.last().unwrap().borrow().ctx == CondInclCtx::InElse
{
start.error("stray #else");
}
let mut ci = pp.cond_incl.last().unwrap().borrow_mut();
ci.ctx = CondInclCtx::InElse;
t.idx += 1;
t.skip_line();
if !ci.included {
t.skip_cond_incl();
}
continue;
}
if t[0].equal("endif") {
if pp.cond_incl.len() == 0 {
start.error("stray #endif");
}
pp.cond_incl.pop();
t.idx += 1;
t.skip_line();
continue;
}
if t[0].equal("line") {
t.idx += 1;
t.read_line_marker(pp, tokenize);
continue;
}
if t[0].kind == TokenKind::PPNum {
t.read_line_marker(pp, tokenize);
continue;
}
if t[0].equal("pragma") && t[1].equal("once") {
pp.pragma_once.insert(t[0].file.borrow().name.clone());
t.idx += 2;
t.skip_line();
continue;
}
if t[0].equal("pragma") {
loop {
t.idx += 1;
if t[0].at_bol {
break;
}
}
continue;
}
if t[0].equal("error") {
t[0].error("error");
}
// CUSTOM
if t[0].equal("warning") {
eprintln!("CUSTOM: skipping warning");
t.skip_line();
continue;
}
// `#`-only line is legal. It's called a null directive.
if t[0].at_bol {
continue;
}
t[0].error("invalid preprocessor directive");
}
return TokenList {
tokens: ret,
idx: 0,
};
}
impl Preprocess {
#[instrument(ret)]
fn define_macro(&mut self, name: &str, buf: &str) {
let f = File::new("<built-in>".into(), 1, Vec::from(buf.as_bytes()));
let mut t = Tokenize::new();
let tokens = t.tokenize(Rc::new(RefCell::new(f)));
self.add_macro(Macro {
name: String::from(name),
is_objlike: true,
params: Vec::new(),
va_args_name: None,
body: Some(Rc::new(tokens)),
handler: None,
});
}
#[instrument(ret)]
fn undef_macro(&mut self, name: &str) {
self.macros.remove(name);
}
#[instrument(ret)]
fn add_builtin(&mut self, name: &str, f: MacroHandlerFn) {
let m = Macro {
name: String::from(name),
is_objlike: true,
params: Vec::new(),
va_args_name: None,
body: None,
handler: Some(f),
};
self.add_macro(m);
}
}
impl Token {
#[instrument(ret)]
fn file_macro(tmpl: &Token, _: &mut Preprocess) -> Token {
let mut tmpl = Rc::new(tmpl.clone());
while tmpl.origin.is_some() {
tmpl = tmpl.origin.clone().unwrap();
}
return Token::new_str_token(&tmpl.file.borrow().display_name, tmpl.as_ref());
}
#[instrument(ret)]
fn line_macro(tmpl: &Token, _: &mut Preprocess) -> Token {
let mut tmpl = Rc::new(tmpl.clone());
while let Some(t) = tmpl.origin.clone() {
tmpl = t
}
let i = tmpl.line_no + tmpl.file.borrow().line_delta;
return Token::new_num_token(i, tmpl.as_ref());
}
// __COUNTER__ is expanded to serial values starting from 0
#[instrument(ret)]
fn counter_macro(tmpl: &Token, pp: &mut Preprocess) -> Token {
let t = Token::new_num_token(pp.counter_macro_idx, tmpl);
pp.counter_macro_idx += 1;
return t;
}
// __TIMESTAMP__ is expanded to a string describing the last
// modification time of the current file. E.g.
// "Fri Jul 24 01:32:50 2020"
#[instrument(ret)]
fn timestamp_macro(tmpl: &Token, _: &mut Preprocess) -> Token {
let r = fs::metadata(&tmpl.file.borrow().name);
match r {
Ok(m) => {
let dt = DateTime::from_timestamp(m.ctime(), 0).unwrap();
let formatted = format!("{}", dt.format("%a %m %d %H:%M:%S %Y"));
return Token::new_str_token(&formatted, tmpl);
}
Err(_) => {
return Token::new_str_token("??? ??? ?? ??:??:?? ????", tmpl);
}
}
}
#[instrument(ret)]
fn base_file_macro(tmpl: &Token, pp: &mut Preprocess) -> Token {
return Token::new_str_token(&pp.base_file, tmpl);
}
}
// __DATE__ is expanded to the current date, e.g. "May 17 2020"
#[instrument(ret)]
fn format_date(dt: DateTime<Local>) -> String {
return format!("{}", dt.format("%a %m %Y"));
}
// __TIME__ is expanded to the current time, e.g. "13:34:03"
#[instrument(ret)]
fn format_time(dt: DateTime<Local>) -> String {
return format!("{}", dt.format("%H:%M:%S"));
}
impl Preprocess {
pub fn init_macros(&mut self) {
// Define predefined macros
self.define_macro("_LP64", "1");
self.define_macro("__C99_MACRO_WITH_VA_ARGS", "1");
self.define_macro("__ELF__", "1");
self.define_macro("__LP64__", "1");
self.define_macro("__SIZEOF_DOUBLE__", "8");
self.define_macro("__SIZEOF_FLOAT__", "4");
self.define_macro("__SIZEOF_INT__", "4");
self.define_macro("__SIZEOF_LONG_DOUBLE__", "8");
self.define_macro("__SIZEOF_LONG_LONG__", "8");
self.define_macro("__SIZEOF_LONG__", "8");
self.define_macro("__SIZEOF_POINTER__", "8");
self.define_macro("__SIZEOF_PTRDIFF_T__", "8");
self.define_macro("__SIZEOF_SHORT__", "2");
self.define_macro("__SIZEOF_SIZE_T__", "8");
self.define_macro("__SIZE_TYPE__", "unsigned long");
self.define_macro("__STDC_HOSTED__", "1");
self.define_macro("__STDC_NO_COMPLEX__", "1");
self.define_macro("__STDC_UTF_16__", "1");
self.define_macro("__STDC_UTF_32__", "1");
self.define_macro("__STDC_VERSION__", "201112L");
self.define_macro("__STDC__", "1");
self.define_macro("__USER_LABEL_PREFIX__", "");
self.define_macro("__alignof__", "_Alignof");
self.define_macro("__amd64", "1");
self.define_macro("__amd64__", "1");
self.define_macro("__chibicc__", "1");
self.define_macro("__const__", "const");
self.define_macro("__gnu_linux__", "1");
self.define_macro("__inline__", "inline");
self.define_macro("__linux", "1");
self.define_macro("__linux__", "1");
self.define_macro("__signed__", "signed");
self.define_macro("__typeof__", "typeof");
self.define_macro("__unix", "1");
self.define_macro("__unix__", "1");
self.define_macro("__volatile__", "volatile");
self.define_macro("__x86_64", "1");
self.define_macro("__x86_64__", "1");
self.define_macro("linux", "1");
self.define_macro("unix", "1");
self.add_builtin("__FILE__", Token::file_macro);
self.add_builtin("__LINE__", Token::line_macro);
self.add_builtin("__COUNTER__", Token::counter_macro);
self.add_builtin("__TIMESTAMP__", Token::timestamp_macro);
self.add_builtin("__BASE_FILE__", Token::base_file_macro);
let dt = Local::now();
self.define_macro("__DATE__", &format_date(dt));
self.define_macro("__TIME__", &format_time(dt));
}
}
#[derive(Debug, PartialEq)]
enum StringKind {
None,
UTF8,
UTF16,
UTF32,
Wide,
}
impl Token {
#[instrument(ret)]
fn get_string_kind(&self) -> StringKind {
if self.loc[0] == b'u' && self.loc[1] == b'8' {
return StringKind::UTF8;
}
match self.loc[0] {
b'"' => StringKind::None,
b'u' => StringKind::UTF16,
b'U' => StringKind::UTF32,
b'L' => StringKind::Wide,
_ => panic!("string kind locaation {}", self.loc[0] as char),
}
}
}
impl TokenList {
// Concatenate adjacent string literals into a single string literal
// as per the C spec.
#[instrument(ret)]
fn join_adjacent_string_literals(mut self, tokenize: &Tokenize) -> TokenList {
// First pass: If regular string literals are adjacent to
// wide string literals, regular string literals are converted to a wide
// type before concatenation. In this pass, we do the conversion.
let mut i = 0;
while i < self.len() {
if self[i].kind != TokenKind::Str
|| i + 1 == self.len()
|| self[i + 1].kind != TokenKind::Str
{
i += 1;
continue;
}
let mut kind = self[i].get_string_kind();
let mut basety = if let TokenFields::Str(base_ty, _) = &self[i].other {
base_ty.clone()
} else {
panic!();
};
for j in i + 1..self.len() {
if self[j].kind != TokenKind::Str {
break;
}
let k = self[j].get_string_kind();
if kind == StringKind::None {
kind = k;
basety = if let TokenFields::Str(base_ty, _) = &self[j].other {
base_ty.clone()
} else {
panic!();
};
} else if k != StringKind::None && kind != k {
self[j].error("unsupported non-standard concatenation of string literals");
}
}
if basety.size > 1 {
for j in i..self.len() {
if self[j].kind != TokenKind::Str {
break;
}
if let TokenFields::Str(base_ty, _) = &self[j].other {
if base_ty.size == 1 {
self[j] = tokenize.tokenize_string_literal(
&self[j],
&basety,
self[j].at_bol,
self[j].has_space,
);
}
} else {
panic!();
};
}
}
while self[i].kind == TokenKind::Str {
i += 1;
}
}
// Second pass: concatenate adjacent string literals
let mut ret = TokenList {
tokens: Vec::new(),
idx: 0,
};
let mut i = 0;
while i < self.len() {
if self[i].kind != TokenKind::Str
|| i + 1 == self.len()
|| self[i + 1].kind != TokenKind::Str
{
ret.tokens.push(self[i].clone());
i += 1;
} else {
let mut j = i + 1;
while self[j].kind == TokenKind::Str {
j += 1;
}
let mut len = 1;
let mut buf: Vec<u8> = Vec::new();
for k in i..j {
if let TokenFields::Str(ty, s) = &self[k].other {
buf.extend_from_slice(s.as_ref());
if let TypeKind::Array { len: l, .. } = ty.kind {
len += l - 1;
} else {
panic!();
}
} else {
panic!();
}
}
let mut t = self[i].clone();
if let TokenFields::Str(ty, _) = &self[i].other {
if let TypeKind::Array { base, .. } = &ty.kind {
t.other =
TokenFields::Str(array_of(*base.clone(), len), buf.into_boxed_slice());
} else {
panic!();
}
} else {
panic!();
}
ret.tokens.push(t);
i = j;
}
}
return ret;
}
}
impl Preprocess {
#[instrument]
pub fn preprocess(&mut self, tokens: TokenList, tokenize: &mut Tokenize) -> TokenList {
let mut tl = preprocess2(tokens, self, tokenize);
if self.cond_incl.len() > 0 {
self.cond_incl
.last()
.unwrap()
.borrow()
.tok
.error("unterminated conditional directive");
}
Tokenize::convert_pp_tokens(&mut tl.tokens);
tl = tl.join_adjacent_string_literals(tokenize);
for t in tl.tokens.iter_mut() {
t.line_no += t.line_delta;
}
return tl;
}
}
//
// parse.c (and add_type from type.c)
//
// This file contains a recursive descent parser for C.
//
// Most functions in this file are named after the symbols they are
// supposed to read from an input token list. For example, stmt() is
// responsible for reading a statement from a token list. The function
// then constructs an AST node representing a statement.
//
// Each function conceptually returns two values, an AST node and
// remaining part of the input tokens. Since C doesn't support
// multiple return values, the remaining tokens are returned to the
// caller via a pointer argument.
//
// Input tokens are represented by a linked list. Unlike many recursive
// descent parsers, we don't have the notion of the "input token stream".
// Most parsing functions don't change the global state of the parser.
// So it is very easy to lookahead arbitrary number of tokens in this
// parser.
// Scope for local variables, global variables, typedefs
// or enum constants
struct VarScope {
var: Option<Rc<RefCell<Obj>>>,
type_def: Option<Type>,
enum_ty: Option<Type>,
enum_val: Option<i32>,
}
// Represent a block scope.
struct Scope {
// C has two block scopes, one is for variables/typedefs and
// the other is for struct/union/enum tags.
vars: HashMap<String, Rc<RefCell<VarScope>>>,
tags: HashMap<String, Type>,
}
impl Scope {
fn new() -> Scope {
return Scope {
vars: HashMap::new(),
tags: HashMap::new(),
};
}
}
// Variable attributes such as typedef or extern
struct VarAttr {
is_typedef: bool,
is_static: bool,
is_extern: bool,
is_inline: bool,
is_tls: bool,
align: i32,
}
// This struct represents a variable initializer. Since initializers
// can be nested (e.g. `int[2][2] = {{1, 2}, {3, 4}}`), this struct
// is a tree data structure.
struct Initializer {
ty: Type,
tok: Option<Token>,
is_flexible: bool,
// If it's not an aggregate type and has an initializer,
// `expr` has an initialization expression.
expr: Option<Node>,
// If it's an initializer for an aggregate type (e.g. array or struct),
// children has initializers for its children.
children: Vec<Initializer>,
// Only one member can be initialized for a union.
// `mem` is used to clarify which member is initialized.
mem: Option<Member>,
}
// For local variable initializer.
struct InitDesg {
next: Option<Rc<RefCell<InitDesg>>>,
idx: i32,
member: Option<Member>,
var: Option<Rc<RefCell<Obj>>>,
}
pub struct Parse {
// All local variable instances created during parsing are
// accumulated to this list.
locals: VecDeque<Rc<RefCell<Obj>>>,
// Likewise, global variables are accumulated to this list.
globals: VecDeque<Rc<RefCell<Obj>>>,
scope: VecDeque<Scope>,
// Point to the function object the parser is current parsing
current_fn: Option<Rc<RefCell<Obj>>>,
// List of all goto statements and labels in the current function.
gotos: Option<Rc<RefCell<Node>>>,
labels: Option<Rc<RefCell<Node>>>,
// Current "goto" and "continue" jump targets.
brk_label: Option<String>,
cont_label: Option<String>,
// Points to a node representing a switch if we are parsing
// a switch statement. Otherwise, NULL.
current_switch: Option<Rc<RefCell<Node>>>,
builtin_alloca: Option<Rc<RefCell<Obj>>>,
unique_name_id: i32,
typenames: HashSet<String>,
}
impl Parse {
pub fn new() -> Parse {
let m: HashSet<String> = TYPENAME_KWS.iter().map(|s| String::from(*s)).collect();
return Parse {
locals: VecDeque::new(),
globals: VecDeque::new(),
scope: VecDeque::from([Scope {
vars: HashMap::new(),
tags: HashMap::new(),
}]),
current_fn: None,
gotos: None,
labels: None,
brk_label: None,
cont_label: None,
current_switch: None,
builtin_alloca: None,
unique_name_id: 0,
typenames: m,
};
}
}
// Round up `n` to the nearest multiple of `align`. For instance,
// align_to(5, 8) returns 8 and align_to(11, 8) returns 16.
fn align_to(n: i32, align: i32) -> i32 {
return ((n + align - 1) / align) * align;
}
fn align_down(n: i32, align: i32) -> i32 {
return align_to(n - align + 1, align);
}
impl Parse {
fn enter_scope(&mut self) {
let s = Scope::new();
self.scope.push_front(s);
}
fn leave_scope(&mut self) {
self.scope.pop_front();
}
// Find a variable by name.
fn find_var(&self, tok: &Token) -> Option<Rc<RefCell<VarScope>>> {
for sc in self.scope.iter() {
if let Some(sc2) = sc.vars.get(tok.loc.as_str()) {
return Some(sc2.clone());
}
}
return None;
}
fn find_tag(&self, tok: &Token) -> Option<&Type> {
for sc in self.scope.iter() {
if let Some(ty) = sc.tags.get(tok.loc.as_str()) {
return Some(ty);
}
}
return None;
}
}
impl Node {
fn new_binary(kind: NodeBinaryKind, lhs: Node, rhs: Node, tok: Token) -> Node {
return Node {
ty: None,
tok,
kind: NodeFields::Binary {
kind,
lhs: Rc::new(RefCell::new(lhs)),
rhs: Rc::new(RefCell::new(rhs)),
},
};
}
fn new_binary1(kind: NodeBinaryKind, lhs: Node, rhs: Rc<RefCell<Node>>, tok: Token) -> Node {
return Node {
ty: None,
tok,
kind: NodeFields::Binary {
kind,
lhs: Rc::new(RefCell::new(lhs)),
rhs: rhs,
},
};
}
fn new_unary(kind: NodeUnaryKind, expr: Node, tok: Token) -> Node {
return Node {
ty: None,
tok,
kind: NodeFields::Unary {
kind,
expr: Rc::new(RefCell::new(expr)),
},
};
}
fn new_unary1(kind: NodeUnaryKind, expr: Rc<RefCell<Node>>, tok: Token) -> Node {
return Node {
ty: None,
tok,
kind: NodeFields::Unary { kind, expr: expr },
};
}
fn new_num(val: i64, tok: Token) -> Node {
return Node {
ty: Some(TY_INT),
tok: tok,
kind: NodeFields::Int(val),
};
}
fn new_long(val: i64, tok: Token) -> Node {
return Node {
ty: Some(TY_LONG),
tok,
kind: NodeFields::Int(val),
};
}
fn new_ulong(val: i64, tok: Token) -> Node {
return Node {
ty: None,
tok: tok,
kind: NodeFields::Int(val),
};
}
fn new_fnum(val: f64, tok: Token) -> Node {
return Node {
ty: None,
tok: tok,
kind: NodeFields::Float(val),
};
}
fn new_var_node(var: Rc<RefCell<Obj>>, tok: Token) -> Node {
return Node {
ty: None,
tok,
kind: NodeFields::Var(var),
};
}
fn new_vla_ptr(var: Rc<RefCell<Obj>>, tok: Token) -> Node {
return Node {
ty: None,
tok: tok,
kind: NodeFields::VlaPtr(var),
};
}
fn new_cast(expr: Rc<RefCell<Node>>, ty: Type) -> Node {
expr.borrow_mut().add_type();
let tok = expr.borrow().tok.clone();
return Node {
ty: Some(copy_type(ty)),
tok: tok,
kind: NodeFields::Cast(expr),
};
}
fn new_expr_stmt(expr: Node, tok: Token) -> Node {
return Node {
kind: NodeFields::Stmt {
kind: NodeStmtKind::ExprStmt,
expr: Some(Rc::new(RefCell::new(expr))),
body: Vec::new(),
},
ty: None,
tok: tok,
};
}
fn new_cond(kind: NodeCondKind, tok: Token) -> Node {
return Node {
kind: NodeFields::Cond {
kind: kind,
cond: None,
then: None,
els: None,
init: None,
inc: None,
brk_label: None,
cont_label: None,
case_next: None,
case_default: None,
case_begin: None,
case_end: None,
expr: None,
label: None,
unique_label: None,
goto_next: None,
},
ty: None,
tok,
};
}
#[instrument(ret)]
fn add_type(&mut self) {
// For many binary operators, we implicitly promote operands so
// that both operands have the same type. Any integral type smaller than
// int is always promoted to int. If the type of one operand is larger
// than the other's (e.g. "long" vs. "int"), the smaller operand will
// be promoted to match with the other.
//
// This operation is called the "usual arithmetic conversion".
#[instrument(ret)]
fn usual_arith_conv(lhs: &Rc<RefCell<Node>>, rhs: &Rc<RefCell<Node>>) {
let ty = get_common_type(
lhs.borrow().ty.as_ref().unwrap(),
rhs.borrow().ty.as_ref().unwrap(),
);
let new_lhs = Node::new_cast(lhs.clone(), ty.clone());
let mut l = lhs.borrow_mut();
*l = new_lhs;
let new_rhs = Node::new_cast(rhs.clone(), ty);
let mut r = rhs.borrow_mut();
*r = new_rhs;
}
if self.ty.is_some() {
return;
}
match &self.kind {
NodeFields::NullExpr => {
// TODO:
}
NodeFields::Binary { kind, lhs, rhs } => {
// NodeBinaryKind::Neg => todo!(),
match kind {
NodeBinaryKind::Add
| NodeBinaryKind::Sub
| NodeBinaryKind::Mul
| NodeBinaryKind::Div
| NodeBinaryKind::Mod
| NodeBinaryKind::BitAnd
| NodeBinaryKind::BitOr
| NodeBinaryKind::BitXor => {
usual_arith_conv(lhs, rhs);
self.ty = lhs.borrow().ty.clone();
}
NodeBinaryKind::Shl | NodeBinaryKind::Shr => {
self.ty = lhs.borrow().ty.clone();
}
NodeBinaryKind::Eq
| NodeBinaryKind::Ne
| NodeBinaryKind::Lt
| NodeBinaryKind::Le => {
usual_arith_conv(lhs, rhs);
self.ty = Some(TY_INT);
}
NodeBinaryKind::Assign => {
if let TypeKind::Array { .. } = &lhs.borrow().ty.as_ref().unwrap().kind {
let tok = &lhs.borrow().tok;
tok.error("not an lvalue");
}
if let TypeKind::Struct { .. } = lhs.borrow().ty.as_ref().unwrap().kind {
} else {
*rhs.borrow_mut() =
Node::new_cast(rhs.clone(), lhs.borrow().ty.clone().unwrap());
}
self.ty = lhs.borrow().ty.clone();
}
NodeBinaryKind::Comma => {
self.ty = rhs.borrow().ty.clone();
}
NodeBinaryKind::LogAnd | NodeBinaryKind::LogOr => {
self.ty = Some(TY_INT);
}
}
}
NodeFields::Member { member, expr } => {
self.ty = Some(member.ty.clone());
}
NodeFields::Unary { kind, expr } => {
expr.borrow_mut().add_type();
match kind {
NodeUnaryKind::Neg => {
let ty = get_common_type(&TY_INT, expr.borrow().ty.as_ref().unwrap());
*expr.borrow_mut() = Node::new_cast(expr.clone(), ty.clone());
self.ty = Some(ty);
}
NodeUnaryKind::Addr => {
let e = expr.borrow();
let ty = e.ty.as_ref().unwrap();
if let TypeKind::Array { base, .. } = &ty.kind {
self.ty = Some(pointer_to(*base.clone()));
} else {
// TODO: what about VLA_ARRAY?
self.ty = Some(pointer_to(ty.clone()));
}
}
NodeUnaryKind::Deref => {
let e = expr.borrow();
let ty = e.ty.as_ref().unwrap();
match &ty.kind {
TypeKind::Ptr { base, .. }
| TypeKind::Array { base, .. }
| TypeKind::Vla { base, .. } => {
if matches!(base.kind, TypeKind::Void) {
self.tok.error("dereferencing a void pointer");
}
self.ty = Some(base.as_ref().clone());
}
TypeKind::Void
| TypeKind::Bool
| TypeKind::Char
| TypeKind::Short
| TypeKind::Int
| TypeKind::Long
| TypeKind::Float
| TypeKind::Double
| TypeKind::LDouble
| TypeKind::Enum
| TypeKind::Func { .. }
| TypeKind::Struct { .. }
| TypeKind::Union { .. } => todo!(),
}
}
NodeUnaryKind::BitNot => {
self.ty = expr.borrow().ty.clone();
}
NodeUnaryKind::Not => {
self.ty = Some(TY_INT);
}
}
}
NodeFields::Return { .. } => {}
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
..
} => {
cond.clone().unwrap().borrow_mut().add_type();
then.clone().unwrap().borrow_mut().add_type();
els.clone().unwrap().borrow_mut().add_type();
match init.as_ref().unwrap() {
Either::Left(init) => {
init.borrow_mut().add_type();
}
Either::Right(init) => {
init.front().as_ref().unwrap().borrow_mut().add_type();
}
}
inc.clone().unwrap().borrow_mut().add_type();
match kind {
NodeCondKind::Cond => {
if matches!(
then.clone().unwrap().borrow().ty.as_ref().unwrap().kind,
TypeKind::Void
) || matches!(
els.clone().unwrap().borrow().ty.as_ref().unwrap().kind,
TypeKind::Void
) {
self.ty = Some(TY_VOID);
} else {
usual_arith_conv(&then.clone().unwrap(), &els.clone().unwrap());
self.ty = then.clone().unwrap().borrow().ty.clone();
}
}
NodeCondKind::If
| NodeCondKind::For
| NodeCondKind::Do
| NodeCondKind::Switch
| NodeCondKind::Case
| NodeCondKind::Goto
| NodeCondKind::GotoExpr
| NodeCondKind::Label => {}
NodeCondKind::LabelVal => {
self.ty = Some(pointer_to(TY_VOID));
}
}
}
NodeFields::Block { .. } => {}
NodeFields::FnCall { func_ty, args, .. } => {
for n in args.iter() {
n.borrow_mut().add_type();
}
self.ty = Some(func_ty.clone());
}
NodeFields::Stmt { kind, body, expr } => {
for n in body.iter() {
n.borrow_mut().add_type();
}
match kind {
NodeStmtKind::ExprStmt => {}
NodeStmtKind::StmtExpr => {
match body.last() {
Some(stmt) => {
let s = stmt.borrow();
match &s.kind {
NodeFields::Stmt { kind, body, expr } => match kind {
NodeStmtKind::ExprStmt => {
assert!(expr.is_some());
self.ty = expr.as_ref().unwrap().borrow().ty.clone();
return;
}
NodeStmtKind::StmtExpr => {}
},
_ => panic!("stmt is not a NodeFields::Stmt: {:?}", s.kind),
}
}
None => {}
}
self.tok
.error("statement expression returning void is not supported");
}
}
}
NodeFields::Var(var) | NodeFields::VlaPtr(var) => {
self.ty = Some(var.borrow().ty.clone());
}
NodeFields::Cast(_) => {
assert!(self.ty.is_some());
}
NodeFields::Int(_) => {
assert!(self.ty.is_some());
}
NodeFields::Float(_) => {}
NodeFields::MemZero { .. } => {}
NodeFields::Asm(_) => todo!(),
NodeFields::Cas { addr, old, new } => {
addr.borrow_mut().add_type();
old.borrow_mut().add_type();
new.borrow_mut().add_type();
self.ty = Some(TY_BOOL);
if matches!(
addr.borrow().ty.as_ref().unwrap().kind,
TypeKind::Ptr { .. }
) {
addr.borrow().tok.error("pointer expected");
}
if matches!(old.borrow().ty.as_ref().unwrap().kind, TypeKind::Ptr { .. }) {
old.borrow().tok.error("pointer expected");
}
}
NodeFields::Exch { lhs, rhs } => match &lhs.borrow().ty.as_ref().unwrap().kind {
TypeKind::Ptr { base } => {
self.ty = Some(base.as_ref().clone());
}
_ => lhs.borrow().tok.error("pointer expected"),
},
}
assert!(self.ty.is_some());
}
}
impl Parse {
fn push_scope(&mut self, name: String) -> Rc<RefCell<VarScope>> {
let sc = Rc::new(RefCell::new(VarScope {
var: None,
type_def: None,
enum_ty: None,
enum_val: None,
}));
self.scope
.front_mut()
.unwrap()
.vars
.insert(name, sc.clone());
return sc;
}
}
impl Initializer {
fn new(ty: Type, is_flexible: bool) -> Initializer {
let mut init = Initializer {
ty,
tok: None,
is_flexible: false,
expr: None,
children: Vec::new(),
mem: None,
};
match &init.ty.kind {
TypeKind::Void
| TypeKind::Bool
| TypeKind::Char
| TypeKind::Short
| TypeKind::Int
| TypeKind::Long
| TypeKind::Float
| TypeKind::Double
| TypeKind::LDouble
| TypeKind::Enum
| TypeKind::Ptr { .. }
| TypeKind::Func { .. } => {}
TypeKind::Array { base, len } => {
// TODO: when is ty.size < 0
if is_flexible && init.ty.size < 0 {
init.is_flexible = true;
return init;
}
let mut children = Vec::with_capacity(*len as usize);
for b in 0..*len {
children.push(Initializer::new(base.as_ref().clone(), false));
}
init.children = children;
return init;
}
TypeKind::Vla { .. } => {} // Do nothing?
TypeKind::Struct {
members,
is_flexible: tyif,
..
}
| TypeKind::Union {
members,
is_flexible: tyif,
..
} => {
let mut children = Vec::with_capacity(members.len());
for (i, mem) in members.iter().enumerate() {
if is_flexible && *tyif && i + 1 == members.len() {
let child = Initializer {
ty: mem.ty.clone(),
tok: None,
is_flexible: true,
expr: None,
children: Vec::new(),
mem: None,
};
children.push(child);
} else {
children.push(Initializer::new(mem.ty.clone(), false));
}
}
init.children = children;
return init;
}
}
return init;
}
}
impl Parse {
fn new_var(&mut self, name: String, ty: Type) -> Rc<RefCell<Obj>> {
let align = ty.align;
let var = Obj {
name: name.clone(),
ty,
tok: None,
is_local: false,
align,
offset: 0,
is_function: false,
is_definition: false,
is_static: false,
is_tentative: false,
is_tls: false,
init_data: Vec::new(),
rel: None,
func: None,
};
let sc = self.push_scope(name);
let varrc = Rc::new(RefCell::new(var));
sc.borrow_mut().var = Some(varrc.clone());
return varrc;
}
fn new_lvar(&mut self, name: String, ty: Type) -> Rc<RefCell<Obj>> {
let var = self.new_var(name, ty);
var.borrow_mut().is_local = true;
self.locals.push_front(var.clone());
return var;
}
fn new_gvar(&mut self, name: String, ty: Type) -> Rc<RefCell<Obj>> {
let var = self.new_var(name, ty);
var.borrow_mut().is_definition = true;
var.borrow_mut().is_static = true;
self.globals.push_front(var.clone());
return var;
}
}
impl Parse {
fn new_unique_name(&mut self) -> String {
return format!(".L..{}", self.unique_name_id);
}
fn new_anon_gvar(&mut self, ty: Type) -> Rc<RefCell<Obj>> {
let name = self.new_unique_name();
return self.new_gvar(name, ty);
}
fn new_string_literal(&mut self, p: &[u8], ty: Type) -> Rc<RefCell<Obj>> {
let var = self.new_anon_gvar(ty);
{
let init_data = &mut var.borrow_mut().init_data;
init_data.resize(p.len(), 0);
init_data.copy_from_slice(p);
}
return var;
}
}
impl Token {
fn get_ident(&self) -> &str {
if self.kind != TokenKind::Ident {
self.error("expected an identifier");
}
return self.loc.as_str();
}
}
impl Parse {
fn find_typedef(&self, tok: &Token) -> Option<Type> {
if tok.kind == TokenKind::Ident {
let sc = self.find_var(tok);
if let Some(sc) = sc {
return sc.borrow().type_def.clone();
}
}
return None;
}
fn push_tag_scope(&mut self, tok: &Token, ty: Type) {
let sc = self.scope.front_mut().unwrap();
sc.tags.insert(tok.loc.as_str().into(), ty);
}
}
// declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long"
// | "typedef" | "static" | "extern" | "inline"
// | "_Thread_local" | "__thread"
// | "signed" | "unsigned"
// | struct-decl | union-decl | typdef-name
// | enum-specifier | typeof-specifier
// | "const" | "volatile" | "auto" | "register" | "restrict"
// | "__restrict" | "__restrict__" | "_Notreturn")+
//
// The order of typenames in a type-specifier doesn't matter. For
// example, `int lo static` means the same as `static long int`.
// That can also be written as `static long` because you can omit
// `int` if `long` or `short` are specified. However, something like
// `char int` is not a valid type specifier. We have to accept only a
// limited combination of the typenames.
//
// In this function, we count the number of occurences of each typename
// while keeping the "current" type object that the typenames up
// until that point represent. When we reach a non-typename token,
// we returns the current type object.
impl TokenList {
fn declspec(&mut self, attr: &mut Option<VarAttr>, parse: &mut Parse) -> Type {
// We use a single integer as counters for all typenames.
// For example, bits 0 and 1 represent how many times we saw
// the keyword "void" so far. With this, we can use a switch
// statement as you can see below.
const VOID: i32 = 1 << 0;
const BOOL: i32 = 1 << 2;
const CHAR: i32 = 1 << 4;
const SHORT: i32 = 1 << 6;
const INT: i32 = 1 << 8;
const LONG: i32 = 1 << 10;
const FLOAT: i32 = 1 << 12;
const DOUBLE: i32 = 1 << 14;
const OTHER: i32 = 1 << 16;
const SIGNED: i32 = 1 << 17;
const UNSIGNED: i32 = 1 << 18;
let mut ty = TY_INT;
let mut counter = 0;
let mut is_atomic = false;
while parse.is_typename(&self[0]) {
if [
"typedef",
"static",
"extern",
"inline",
"_Thread_local",
"__thread",
]
.iter()
.any(|op| self[0].equal(op))
{
match attr {
None => {
self[0].error("storage class specifier is not allowed in this context");
}
Some(attr) => {
if self[0].equal("typedef") {
attr.is_typedef = true;
} else if self[0].equal("static") {
attr.is_static = true;
} else if self[0].equal("extern") {
attr.is_extern = true;
} else if self[0].equal("inline") {
attr.is_inline = true;
} else {
attr.is_tls = true;
}
if attr.is_typedef
&& (attr.is_static || attr.is_extern || attr.is_inline || attr.is_tls)
{
self[0].error("typedef may not be used together with static, extern, inline, __thread or _Thread_local");
}
self.idx += 1;
continue;
}
}
}
// These keywords are recognized but ignored
if [
"const",
"volatile",
"auto",
"register",
"restrict",
"__restrict",
"__restrict__",
"_Noreturn",
]
.iter()
.any(|op| self.consume(op))
{
continue;
}
if self[0].equal("_Atomic") {
self.idx += 1;
if self[0].equal("(") {
ty = self.typename(parse);
self.skip(")");
}
is_atomic = true;
continue;
}
if self[0].equal("_Alignas") {
match attr {
None => {
self[0].error("_Alignas is not allowed in this context");
}
Some(attr) => {
self.idx += 1;
self.skip("(");
if parse.is_typename(&self[0]) {
attr.align = self.typename(parse).align;
} else {
attr.align = self.const_expr(parse) as i32;
}
self.skip(")");
continue;
}
}
}
// Handle user-defined types
let ty2 = parse.find_typedef(&self[0]);
if ["struct", "union", "enum", "typeof"]
.iter()
.any(|op| self[0].equal(op))
|| ty2.is_some()
{
if counter > 0 {
break;
}
if self[0].equal("struct") {
self.idx += 1;
ty = self.struct_decl(parse);
} else if self[0].equal("union") {
self.idx += 1;
ty = self.union_decl(parse);
} else if self[0].equal("enum") {
self.idx += 1;
ty = self.enum_specifier(parse);
} else if self[0].equal("typeof") {
self.idx += 1;
ty = self.typeof_specifier(parse);
} else {
ty = ty2.unwrap();
self.idx += 1;
}
counter += OTHER;
continue;
}
// Handle built-in types.
if self[0].equal("void") {
counter += VOID;
} else if self[0].equal("_Bool") {
counter += BOOL;
} else if self[0].equal("char") {
counter += CHAR;
} else if self[0].equal("short") {
counter += SHORT;
} else if self[0].equal("int") {
counter += INT;
} else if self[0].equal("long") {
counter += LONG;
} else if self[0].equal("float") {
counter += FLOAT;
} else if self[0].equal("double") {
counter += DOUBLE;
} else if self[0].equal("signed") {
counter |= SIGNED;
} else if self[0].equal("unsigned") {
counter |= UNSIGNED;
} else {
unreachable!();
}
if counter == VOID {
ty = TY_VOID;
} else if counter == BOOL {
ty = TY_BOOL;
} else if [CHAR, SIGNED + CHAR].contains(&counter) {
ty = TY_CHAR;
} else if counter == UNSIGNED + CHAR {
ty = TY_UCHAR;
} else if [SHORT, SHORT + INT, SIGNED + SHORT, SIGNED + SHORT + INT].contains(&counter)
{
ty = TY_SHORT;
} else if [UNSIGNED + SHORT, UNSIGNED + SHORT + INT].contains(&counter) {
ty = TY_USHORT;
} else if [INT, SIGNED, SIGNED + INT].contains(&counter) {
ty = TY_INT
} else if [UNSIGNED, UNSIGNED + INT].contains(&counter) {
ty = TY_UINT
} else if [
LONG,
LONG + INT,
LONG + LONG,
LONG + LONG + INT,
SIGNED + LONG,
SIGNED + LONG + INT,
SIGNED + LONG + LONG,
SIGNED + LONG + LONG + INT,
]
.contains(&counter)
{
ty = TY_LONG;
} else if [
UNSIGNED + LONG,
UNSIGNED + LONG + INT,
UNSIGNED + LONG + LONG,
UNSIGNED + LONG + LONG + INT,
]
.contains(&counter)
{
ty = TY_ULONG;
} else if counter == FLOAT {
ty = TY_FLOAT;
} else if counter == DOUBLE {
ty = TY_DOUBLE;
} else if counter == LONG + DOUBLE {
ty = TY_LDOUBLE;
} else {
self[0].error("invalid type");
}
self.idx += 1;
}
if is_atomic {
ty = copy_type(ty);
ty.is_atomic = true;
}
return ty;
}
// func-params = ("void" | param ("," param)* ("," "...")?)? ")"
// param = declspec declarator
fn func_params(&mut self, mut ty: Type, parse: &mut Parse) -> Type {
if self[0].equal("void") && self[1].equal(")") {
self.idx += 2;
return func_type(ty);
}
let mut params = Vec::new();
let mut is_variadic = false;
while !self[0].equal(")") {
if params.len() > 0 {
self.skip(",");
}
if self[0].equal("...") {
is_variadic = true;
self.idx += 1;
self.skip(")");
break;
}
let mut ty2 = self.declspec(&mut None, parse);
ty2 = self.declarator(ty2, parse);
let name = match &ty2.decl {
None => unreachable!(),
Some(decl) => decl.name.clone(),
};
match &ty2.kind {
TypeKind::Func { .. } => {
ty2 = pointer_to(ty2);
match ty2.decl.as_mut() {
Some(decl) => decl.name = name,
None => unreachable!(),
}
}
TypeKind::Array { base, len } => {
ty2 = pointer_to(*base.clone());
match ty2.decl.as_mut() {
Some(decl) => decl.name = name,
None => unreachable!(),
}
}
_ => {}
}
params.push(copy_type(ty2));
}
if params.len() == 0 {
is_variadic = true;
}
ty = func_type(ty);
match &mut ty.kind {
TypeKind::Func {
params: p,
is_variadic: v,
..
} => {
*p = params;
*v = is_variadic;
}
_ => unreachable!(),
}
self.skip(")");
return ty;
}
// array-dimensions = ("static" | "restrict")* const-expr? "]" type-suffix
fn array_dimensions(&mut self, mut ty: Type, parse: &mut Parse) -> Type {
while ["static", "restrict"].iter().any(|op| self[0].equal(op)) {
self.idx += 1;
}
if self[0].equal("]") {
self.idx += 1;
ty = self.type_suffix(ty, parse);
return array_of(ty, -1);
}
let mut expr = self.conditional(parse);
self.skip("]");
ty = self.type_suffix(ty, parse);
match ty.kind {
TypeKind::Array { .. } => {
return array_of(ty, expr.eval() as i32);
}
TypeKind::Vla { .. } => return vla_of(ty, Rc::new(RefCell::new(expr))),
_ => unreachable!(),
}
}
// type-suffix = "(" func-params
// | "[" array-dimensions
// | ε
fn type_suffix(&mut self, ty: Type, parse: &mut Parse) -> Type {
if self[0].equal("(") {
return self.func_params(ty, parse);
}
if self[0].equal("[") {
return self.array_dimensions(ty, parse);
}
return ty;
}
// pointers = ("*" ("const" | "volatile" | "restrict")*)*
fn pointers(&mut self, mut ty: Type) -> Type {
while self.consume("*") {
ty = pointer_to(ty);
while [
"const",
"volatile",
"restrict",
"__restrict",
"__restrict__",
]
.iter()
.any(|op| self[0].equal(op))
{
self.idx += 1;
}
}
return ty;
}
// declarator = pointers ("(" ident ")" | "(" declarator ")" | ident) type-suffix
fn declarator(&mut self, mut ty: Type, parse: &mut Parse) -> Type {
ty = self.pointers(ty);
if self[0].equal("(") {
let start_idx = self.idx;
self.idx += 1;
let dummy = Type {
kind: TypeKind::Void,
size: 0,
align: 0,
is_unsigned: false,
is_atomic: false,
origin: None,
decl: None,
};
self.declarator(dummy, parse);
println!("{:?}\n{:?}\n{:?}", self[0], self[1], self[2]);
self.skip(")");
self.idx = start_idx + 1;
return self.declarator(ty, parse);
}
let TypeDeclaration { name, name_pos };
name_pos = Box::new(self[0].clone());
if self[0].kind == TokenKind::Ident {
name = Some(Box::new(self[0].clone()));
self.idx += 1;
} else {
name = None;
}
ty = self.type_suffix(ty, parse);
ty.decl = Some(TypeDeclaration { name, name_pos });
return ty;
}
// abstract-declarator = pointers ("(" abstract-declarator ")")? type-suffix
fn abstract_declarator(&mut self, mut ty: Type, parse: &mut Parse) -> Type {
ty = self.pointers(ty);
if self[0].equal("(") {
let start_idx = self.idx;
self.idx += 1;
let dummy = Type {
kind: TypeKind::Void,
size: 0,
align: 0,
is_unsigned: false,
is_atomic: false,
origin: None,
decl: None,
};
self.abstract_declarator(dummy, parse);
self.skip(")");
ty = self.type_suffix(ty, parse);
self.idx = start_idx + 1;
return self.abstract_declarator(ty, parse);
}
return self.type_suffix(ty, parse);
}
// type-name = declspec abstract-declarator
fn typename(&mut self, parse: &mut Parse) -> Type {
let ty = self.declspec(&mut None, parse);
return self.abstract_declarator(ty, parse);
}
}
static TYPENAME_KWS: [&str; 30] = [
"void",
"_Bool",
"char",
"short",
"int",
"long",
"struct",
"union",
"typedef",
"enum",
"static",
"extern",
"_Alignas",
"signed",
"unsigned",
"const",
"volatile",
"auto",
"register",
"restrict",
"__restrict",
"__restrict__",
"_Noreturn",
"float",
"double",
"typeof",
"inline",
"_Thread_local",
"__thread",
"_Atomic",
];
impl Parse {
fn is_typename(&self, tok: &Token) -> bool {
return self.typenames.get(tok.loc.as_str()).is_some() || self.find_typedef(tok).is_some();
}
}
impl Parse {
fn declare_builtin_functions(&mut self) {
let mut ty = func_type(pointer_to(TY_VOID));
assert!(matches!(ty.kind, TypeKind::Func { .. }));
let ty_clone = ty.clone();
if let TypeKind::Func { params, .. } = &mut ty.kind {
params.push(copy_type(ty_clone));
}
let var = self.new_gvar("alloca".into(), ty);
var.borrow_mut().is_definition = false;
self.builtin_alloca = Some(var);
}
}
impl TokenList {
fn is_end(&self) -> bool {
return self[0].equal("}") || (self[0].equal(",") && self[1].equal("}"));
}
fn consume_end(&mut self) -> bool {
if self[0].equal("}") {
self.idx += 1;
return true;
}
if self[0].equal(",") && self[1].equal("}") {
self.idx += 2;
return true;
}
return false;
}
}
impl TokenList {
// enum-specifier = ident? "{" enum-list? "}"
// | ident ("{" enum-list? "}")?
//
// enum-list = ident ("=" num)? ("," ident ("=" num)?)* ","?
fn enum_specifier(&mut self, parse: &mut Parse) -> Type {
let ty = enum_type();
// Read a struct tag.
let tag = if self[0].kind == TokenKind::Ident {
let tag = Some(self[0].clone());
self.idx += 1;
tag
} else {
None
};
if let Some(tag) = &tag {
if !self[0].equal("{") {
let ty = parse.find_tag(tag);
match ty {
Some(ty) => {
if !matches!(ty.kind, TypeKind::Enum) {
tag.error("not an enum tag");
}
return ty.clone();
}
None => {
tag.error("unknown enum type");
unreachable!();
}
}
}
}
self.skip("{");
// Read an enum list.
let mut i = 0;
let mut val = 0;
while !self.consume_end() {
if i > 0 {
self.skip(",");
}
i += 1;
let name = String::from(self[0].get_ident());
self.idx += 1;
if self[0].equal("=") {
self.idx += 1;
val = self.const_expr(parse)
}
let sc = parse.push_scope(name);
let mut scmut = sc.borrow_mut();
scmut.enum_ty = Some(ty.clone());
scmut.enum_val = Some(val as i32);
val += 1;
}
if let Some(tag) = &tag {
parse.push_tag_scope(tag, ty.clone());
}
return ty;
}
// typeof-specifier = "(" (expr | typename) ")"
fn typeof_specifier(&mut self, parse: &mut Parse) -> Type {
self.skip("(");
let ty = if parse.is_typename(&self[0]) {
self.typename(parse)
} else {
let mut node: Node = self.expr(parse);
node.add_type();
node.ty.unwrap()
};
self.skip(")");
return ty;
}
}
// Generating code for computing a VLA size.
fn compute_vla_size(ty: Type, tok: &Token, parse: &mut Parse) -> (Node, Type) {
let mut node = Node {
kind: NodeFields::NullExpr,
ty: None,
tok: tok.clone(),
};
let mut new_ty = match ty.kind {
TypeKind::Ptr { base } | TypeKind::Array { base, .. } | TypeKind::Vla { base, .. } => {
let (rhs, new_ty) = compute_vla_size(*base.clone(), tok, parse);
node = Node::new_binary(NodeBinaryKind::Comma, node, rhs, tok.clone());
new_ty
}
_ => ty,
};
match &mut new_ty.kind {
TypeKind::Vla {
base,
vla_len,
vla_size,
} => {
let base_sz = match &mut base.kind {
TypeKind::Array { base, len } => Node::new_num(base.size as i64, tok.clone()),
TypeKind::Vla {
base,
vla_len,
vla_size,
} => Node::new_var_node(vla_size.clone().unwrap(), tok.clone()),
_ => unreachable!(),
};
*vla_size = Some(parse.new_lvar("".into(), TY_ULONG));
// TODO: is clone ok? or need a mutable?
let expr_rhs = Node::new_binary(
NodeBinaryKind::Mul,
vla_len.borrow().clone(),
base_sz,
tok.clone(),
);
let expr = Node::new_binary(
NodeBinaryKind::Assign,
Node::new_var_node(vla_size.as_ref().unwrap().clone(), tok.clone()),
expr_rhs,
tok.clone(),
);
return (
Node::new_binary(NodeBinaryKind::Comma, node, expr, tok.clone()),
new_ty,
);
}
_ => return (node, new_ty),
}
}
fn new_alloca(sz: Vec<Rc<RefCell<Node>>>, parse: &Parse) -> Node {
let alloca = parse.builtin_alloca.as_ref().unwrap().borrow();
let return_ty = match &alloca.ty.kind {
TypeKind::Func { return_ty, .. } => *return_ty.clone(),
_ => {
unreachable!()
}
};
let tok = sz[0].borrow().tok.clone();
let expr = Node::new_var_node(parse.builtin_alloca.as_ref().unwrap().clone(), tok.clone());
let mut node = Node {
kind: NodeFields::FnCall {
func_ty: alloca.ty.clone(),
args: sz,
pass_by_stack: false,
ret_buffer: None,
expr: Some(Rc::new(RefCell::new(expr))),
},
ty: Some(return_ty),
tok,
};
node.add_type();
return node;
}
impl TokenList {
// declaration = declspec (declarator ("=" expr)? ("," declarator ("=" expr)?)*)? ";"
fn declaration(&mut self, basety: &Type, attr: &Option<VarAttr>, parse: &mut Parse) -> Node {
let mut nodes = Vec::new();
let mut i = 0;
while !self[0].equal(";") {
if i > 0 {
self.skip(",");
}
i += 1;
let ty = self.declarator(basety.clone(), parse);
if matches!(ty.kind, TypeKind::Void) {
self[0].error("variable declared void");
}
if let Some(TypeDeclaration { name, name_pos }) = &ty.decl {
match name {
None => self[0].error("variable name omitted"),
Some(name) => {
if let Some(attr) = attr {
if attr.is_static {
// static local variable
let var = parse.new_anon_gvar(ty.clone());
let sc = parse.push_scope(name.get_ident().into());
sc.borrow_mut().var = Some(var.clone());
if self[0].equal("=") {
self.gvar_initializer(var, parse);
}
continue;
}
}
// Generate code for computing a VLA size. We need to do this
// even if ty is not VLA because ty may be a pointer to VLA
// (e.g. int (*foo)(n)[m] where n and m are variables.)
let (expr, new_ty) = compute_vla_size(ty.clone(), &self[0], parse);
nodes.push(Node {
kind: NodeFields::Stmt {
kind: NodeStmtKind::ExprStmt,
expr: Some(Rc::new(RefCell::new(expr))),
body: Vec::new(),
},
ty: None,
tok: self[0].clone(),
});
if let TypeKind::Vla {
base,
vla_len,
vla_size,
} = &new_ty.kind
{
if self[0].equal("=") {
self[0].error("variable-sized object may not be initialized");
}
// Variable length arrays (VLAs) are translated to alloca() calls.
// For example, `int x[n+2]` is translated to `tmp = n + 2,
// x = alloca(tmp)`.
let tok = *name.clone();
let var = parse.new_lvar(name.get_ident().into(), new_ty.clone());
let sz = vec![Rc::new(RefCell::new(Node::new_var_node(
vla_size.clone().unwrap(),
tok.clone(),
)))];
let expr_rhs = new_alloca(sz, parse);
let expr = Node::new_binary(
NodeBinaryKind::Assign,
Node::new_vla_ptr(var, tok.clone()),
expr_rhs,
tok.clone(),
);
nodes.push(Node::new_expr_stmt(expr, tok));
continue;
}
let var = parse.new_lvar(name.get_ident().into(), new_ty.clone());
if let Some(attr) = attr {
if attr.align != 0 {
var.borrow_mut().align = attr.align;
}
}
if self[0].equal("=") {
self.idx += 1;
let expr: Node = self.lvar_initializer(var.clone(), parse);
nodes.push(Node::new_expr_stmt(expr, self[0].clone()));
}
if var.borrow().ty.size < 0 {
name.error("variable has incomplete type");
}
if matches!(var.borrow().ty.kind, TypeKind::Void) {
name.error("variable declared void");
}
}
}
} else {
self[0].error("variable name omitted");
}
}
let nodesrc = nodes
.into_iter()
.map(|n| Rc::new(RefCell::new(n)))
.collect();
let node = Node {
kind: NodeFields::Block { body: nodesrc },
ty: None,
tok: self[0].clone(),
};
self.skip(";");
return node;
}
fn skip_excess_element(&mut self, parse: &mut Parse) {
if self[0].equal("{") {
self.idx += 1;
self.skip_excess_element(parse);
return self.skip("}");
}
self.assign(parse);
}
// string-initializer = string-literal
fn string_initializer(&mut self, init: &mut Initializer) {
if init.is_flexible {
match &init.ty.kind {
TypeKind::Array { base, len } => {
*init = Initializer::new(array_of(*base.clone(), *len), false);
}
_ => unreachable!(),
}
}
match &init.ty.kind {
TypeKind::Array { base, len: initlen } => match &self[0].other {
TokenFields::Str(ty, s) => match &ty.kind {
TypeKind::Array { len: toklen, .. } => {
let len = std::cmp::min(initlen, toklen);
match base.size {
1 => {
for (i, c) in s.iter().enumerate() {
init.children[i].expr =
Some(Node::new_num(*c as i64, self[0].clone()));
}
}
2 => {
let s: &Box<[u16]> = unsafe { std::mem::transmute(s) };
for (i, c) in s.iter().enumerate() {
init.children[i].expr =
Some(Node::new_num(*c as i64, self[0].clone()));
}
}
4 => {
let s: &Box<[u32]> = unsafe { std::mem::transmute(s) };
for (i, c) in s.iter().enumerate() {
init.children[i].expr =
Some(Node::new_num(*c as i64, self[0].clone()));
}
}
_ => unreachable!(),
}
}
_ => unreachable!(),
},
_ => unreachable!(),
},
_ => unreachable!(),
}
self.idx += 1;
}
// array-designator = "[" const-expr "]"
//
// C99 added the designated initializer to the language, which allows
// programmers to move the "cursor" of an initializer to any element.
// The syntax looks like this:
//
// int x[10] = { 1, 2, [5]=3, 4, 5, 6, 7 };
//
// `[5]` moves the cursor to the 5th element, so the 5th element of x
// is set to 3. Initialization then continues forward in order, so
// 6th, 7th, 8th and 9th elements are initialized with 4, 5, 6 and 7,
// respectively. Unspecified elements (in this case, 3rd and 4th
// elements) are initialized with zero.
//
// Nesting is allowed, so the following initializer is valid:
//
// int x[5][10] = { [5][8]=1, 2, 3 };
//
// It sets x[5][8], x[5][9] and x[6][0] to 1, 2 and 3, respectively.
fn array_designator(&mut self, ty: &Type, parse: &mut Parse) -> (usize, usize) {
match &ty.kind {
TypeKind::Array { base, len } => {
self.skip("[");
let begin = self.const_expr(parse);
if begin >= *len as i64 {
self[0].error("array designator index exceeds array bounds");
}
let end = if self[0].equal("...") {
self.idx += 1;
let end = self.const_expr(parse);
if end >= *len as i64 {
self[0].error("array designator index exceeds array bounds");
}
if end < begin {
self[0].error(
format!("array designator range [{}, {}] is empty", begin, end)
.as_str(),
);
}
end
} else {
begin
};
self.skip("]");
assert!(begin >= 0);
assert!(end >= 0);
return (begin as usize, end as usize);
}
_ => unreachable!(),
}
}
// struct-designator = "." ident
//
// Use `.fieldname` to move the cursor for a struct initializer. E.g.
//
// struct { int a, b, c; } x = { .c=5 };
//
// The above initializer sets x.c to 5.
fn struct_designator(&mut self, ty: &Type) -> Vec<Member> {
let start = self.idx;
self.skip(".");
if self[0].kind != TokenKind::Ident {
self[0].error("expected a field designator");
}
match &ty.kind {
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
for i in 0..members.len() {
match &ty.decl {
Some(TypeDeclaration { name, name_pos }) => {
match name {
// Anonymous struct member
None => {
if members[i].ty.get_struct_member(&self[0]).is_some() {
self.idx = start;
return Vec::from(&members[i..]);
}
}
Some(name) => {
// Regular struct member
if name.loc.len() == self[0].loc.len()
&& name.loc.as_bytes() == self[0].loc.as_bytes()
{
self.idx += 1;
return Vec::from(&members[i..]);
}
}
}
}
None => {
// TODO: is this really unreachable?
unreachable!();
}
}
}
}
_ => unreachable!(),
}
self[0].error("struct has no such member");
unreachable!();
}
// designation = ("[" const-expr "]" | "." ident)* "="? initializer
fn designation(&mut self, init: &mut Initializer, parse: &mut Parse) {
if self[0].equal("[") {
match &init.ty.kind {
TypeKind::Array { base, len } => {
let (begin, end) = self.array_designator(&init.ty, parse);
let tok = self.idx;
for i in begin..=end {
self.idx = tok;
self.designation(&mut init.children[i], parse);
}
self.array_initializer2(init, begin + 1, parse);
return;
}
_ => {
self[0].error("array index in non-array initializer");
}
}
}
if self[0].equal(".") {
match &init.ty.kind {
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
let mem = self.struct_designator(&init.ty);
{
let idx = mem[0].idx;
let ic = &mut init.children[idx];
self.designation(ic, parse);
}
init.expr = None;
self.struct_initializer2(init, &mem[1..], parse);
return;
}
TypeKind::Union {
members,
is_flexible,
} => {
let mem = self.struct_designator(&init.ty);
let ic = &mut init.children[mem[0].idx];
self.designation(ic, parse);
init.mem = Some(mem[0].clone());
return;
}
_ => {
self[0].error("field name not in struct or union initializer");
}
}
}
if self[0].equal("=") {
self.idx += 1;
}
self.initializer2(init, parse);
}
// An array length can be omitted if an array has an initializer
// (e.g. `int x[] = {1,2,3}`). If it's omitted, count the number
// of initializer elements.
fn count_array_init_elements(&mut self, ty: &Type, parse: &mut Parse) -> i32 {
match &ty.kind {
TypeKind::Array { base, len } => {
let start_idx = self.idx;
let mut first = true;
let mut dummy = Initializer::new(*base.clone(), false);
let mut i = 0;
let mut max = 0;
while !self.consume_end() {
if !first {
self.skip(",");
}
first = false;
if self[0].equal("[") {
self.idx += 1;
i = self.const_expr(parse);
if self[0].equal("...") {
self.idx += 1;
i = self.const_expr(parse);
}
self.skip("]");
self.designation(&mut dummy, parse);
} else {
self.initializer2(&mut dummy, parse);
}
i += 1;
max = std::cmp::max(max, i);
}
self.idx = start_idx;
assert!(max <= i32::MAX as i64);
return max as i32;
}
_ => unreachable!(),
}
}
// array-initializer1 = "{" initializer ("," initializer)* ","? "}"
fn array_initializer1(&mut self, init: &mut Initializer, parse: &mut Parse) {
let (new_init, length) = match &init.ty.kind {
TypeKind::Array { base, len } => {
if init.is_flexible {
let len = self.count_array_init_elements(&init.ty, parse);
(
Some(Initializer::new(array_of(*base.clone(), len), false)),
len,
)
} else {
(None, *len)
}
}
_ => unreachable!(),
};
if let Some(ni) = new_init {
*init = ni;
}
match &init.ty.kind {
TypeKind::Array { .. } => {
self.skip("{");
let mut first = true;
let mut i = 0;
while !self.consume_end() {
if !first {
self.skip(",");
}
first = false;
if self[0].equal("[") {
let (begin, end) = self.array_designator(&init.ty, parse);
let tok_idx = self.idx;
for j in begin..=end {
self.idx = tok_idx;
self.designation(init, parse);
}
i = end;
} else {
if (i as i32) < length {
self.initializer2(&mut init.children[i], parse);
} else {
self.skip_excess_element(parse);
}
}
i += 1;
}
}
_ => unreachable!(),
}
}
// array-initializer2 = initializer ("," initializer)*
fn array_initializer2(&mut self, init: &mut Initializer, mut i: usize, parse: &mut Parse) {
let new_init = match &init.ty.kind {
TypeKind::Array { base, len } => {
if init.is_flexible {
let len = self.count_array_init_elements(&init.ty, parse);
Some(Initializer::new(array_of(*base.clone(), len), false))
} else {
None
}
}
_ => unreachable!(),
};
if let Some(new_init) = new_init {
*init = new_init;
}
match &init.ty.kind {
TypeKind::Array { base, len } => {
while (i as i32) < *len {
let start_idx = self.idx;
if i > 0 {
self.skip(",");
}
if self[0].equal("[") || self[0].equal(".") {
self.idx = start_idx;
return;
}
self.initializer2(&mut init.children[i as usize], parse);
i += 1;
}
}
_ => unreachable!(),
}
}
// struct-initializer1 = "{" initializer ("," initializer)* ","? "}"
fn struct_initializer1(&mut self, init: &mut Initializer, parse: &mut Parse) {
self.skip("{");
match &init.ty.kind {
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
let mut first = true;
let mut placeholder = Vec::new();
let mut mem = &members[..];
while !self.consume_end() {
if !first {
self.skip(",");
}
first = false;
if self[0].equal(".") {
// TODO: can this ever be out of order?
placeholder = self.struct_designator(&mut init.ty);
mem = &placeholder[..];
self.designation(init, parse);
mem = &mem[1..];
continue;
}
if mem.len() > 0 {
self.initializer2(&mut init.children[mem[0].idx], parse);
mem = &mem[1..];
} else {
self.skip_excess_element(parse);
}
}
}
_ => unreachable!(),
}
}
// struct-initializer2 = initializer ("," initializer)*
fn struct_initializer2(
&mut self,
init: &mut Initializer,
members: &[Member],
parse: &mut Parse,
) {
let mut first = true;
for m in members {
if self.is_end() {
break;
}
let start_idx = self.idx;
if !first {
self.skip(",");
}
first = false;
if self[0].equal("[") || self[0].equal(".") {
self.idx = start_idx;
return;
}
self.initializer2(&mut init.children[m.idx], parse);
}
}
fn union_initializer(&mut self, init: &mut Initializer, parse: &mut Parse) {
// Unlike structs, union initializers take only one initializer,
// and that initializes the first union member by default.
// You can initialize other members using a designated initializer.
if self[0].equal("{") && self[1].equal(".") {
self.idx += 1;
let mem = self.struct_designator(&init.ty);
init.mem = Some(mem[0].clone());
let ic = &mut init.children[mem[0].idx];
self.designation(ic, parse);
self.skip("}");
return;
}
match &init.ty.kind {
TypeKind::Union {
members,
is_flexible,
} => {
init.mem = Some(members[0].clone());
if self[0].equal("{") {
self.idx += 1;
self.initializer2(&mut init.children[0], parse);
self.consume(",");
self.skip("}");
} else {
self.initializer2(&mut init.children[0], parse);
}
}
_ => unreachable!(),
}
}
// initializer = string-initializer | array-initializer
// | struct-initializer | union-initializer
//
fn initializer2(&mut self, init: &mut Initializer, parse: &mut Parse) {
let kind = init.ty.kind.clone(); // also clones members
match kind {
TypeKind::Array { base, len } => {
if self[0].kind == TokenKind::Str {
self.string_initializer(init);
return;
} else if self[0].equal("{") {
self.array_initializer1(init, parse);
return;
} else {
self.array_initializer2(init, 0, parse);
return;
}
}
TypeKind::Vla {
base,
vla_len,
vla_size,
} => todo!(),
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
if self[0].equal("{") {
self.struct_initializer1(init, parse);
return;
} else {
// A struct can be initialized with another struct. E.g.
// `struct T x = y;` where y is a variable of type `struct T`.
// Handle that case first.
let start = self.idx;
let mut expr: Node = self.assign(parse);
expr.add_type();
if matches!(expr.ty.as_ref().unwrap().kind, TypeKind::Struct { .. }) {
init.expr = Some(expr);
return;
}
self.idx = start;
self.struct_initializer2(init, &members, parse);
return;
}
}
TypeKind::Union {
members,
is_flexible,
} => {
self.union_initializer(init, parse);
return;
}
_ => {
if self[0].equal("{") {
// An initializer for a scalar variable can be surrounded by
// braces. E.g. `int x = {3};`. Handle that case.
self.initializer2(init, parse);
self.skip("}");
return;
}
}
}
init.expr = Some(self.assign(parse));
}
}
fn copy_struct_type(ty: &Type) -> Type {
return copy_type(ty.clone());
}
impl TokenList {
fn initializer(&mut self, ty: &Type, parse: &mut Parse) -> (Initializer, Type) {
let mut init = Initializer::new(ty.clone(), true);
self.initializer2(&mut init, parse);
match &ty.kind {
TypeKind::Struct {
members,
is_flexible,
..
}
| TypeKind::Union {
members,
is_flexible,
} => {
if *is_flexible {
let mut new_type = copy_struct_type(&ty.clone());
match &mut new_type.kind {
TypeKind::Struct {
members,
is_flexible,
..
}
| TypeKind::Union {
members,
is_flexible,
} => {
// Is this correct, or do we just want the last member?
for m in members {
m.ty = init.children[m.idx].ty.clone();
new_type.size += m.ty.size;
}
}
_ => unreachable!(),
}
return (init, new_type);
}
}
_ => {}
}
let ty = init.ty.clone();
return (init, ty);
}
}
impl Node {
fn init_desg_expr(desg: Rc<RefCell<InitDesg>>, tok: Token) -> Node {
let desg = desg.borrow();
match &desg.var {
Some(v) => {
return Node::new_var_node(v.clone(), tok);
}
None => match &desg.member {
Some(m) => {
let n = Node::init_desg_expr(desg.next.as_ref().unwrap().clone(), tok.clone());
return Node {
kind: NodeFields::Member {
member: m.clone(),
expr: Rc::new(RefCell::new(n)),
},
tok: tok,
ty: None,
};
}
None => {
let lhs =
Node::init_desg_expr(desg.next.as_ref().unwrap().clone(), tok.clone());
let rhs = Node::new_num(desg.idx as i64, tok.clone());
return Node::new_unary(
NodeUnaryKind::Deref,
Node::new_add(lhs, rhs, tok.clone()),
tok,
);
}
},
}
}
fn create_lvar_init(
init: &Initializer,
ty: &Type,
desg: Rc<RefCell<InitDesg>>,
tok: Token,
) -> Node {
match &ty.kind {
TypeKind::Array { base, len } => {
let mut node = Node {
kind: NodeFields::NullExpr,
ty: None,
tok: tok.clone(),
};
for i in 0..*len {
let desg2 = InitDesg {
next: Some(desg.clone()),
idx: i,
member: None,
var: None,
};
let desg2 = Rc::new(RefCell::new(desg2));
let rhs = Node::create_lvar_init(
&init.children[i as usize],
&base,
desg2,
tok.clone(),
);
node = Node::new_binary(NodeBinaryKind::Comma, node, rhs, tok.clone());
}
return node;
}
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
if init.expr.is_none() {
let mut node = Node {
kind: NodeFields::NullExpr,
ty: None,
tok: tok.clone(),
};
for m in members.iter() {
let desg2 = InitDesg {
next: Some(desg.clone()),
idx: 0,
member: Some(m.clone()),
var: None,
};
let desg2 = Rc::new(RefCell::new(desg2));
let rhs = Node::create_lvar_init(
&init.children[m.idx],
&m.ty,
desg2,
tok.clone(),
);
node = Node::new_binary(NodeBinaryKind::Comma, node, rhs, tok.clone());
}
return node;
}
}
TypeKind::Union {
members,
is_flexible,
} => {
let mem = match &init.mem {
Some(m) => m,
None => &members[0],
};
let desg2 = InitDesg {
next: Some(desg),
idx: 0,
member: Some(mem.clone()),
var: None,
};
let desg2 = Rc::new(RefCell::new(desg2));
return Node::create_lvar_init(&init.children[mem.idx], &mem.ty, desg2, tok);
}
_ => {}
}
if init.expr.is_none() {
return Node {
kind: NodeFields::NullExpr,
ty: None,
tok: tok.clone(),
};
}
let lhs = Node::init_desg_expr(desg, tok.clone());
return Node::new_binary(
NodeBinaryKind::Assign,
lhs,
init.expr.as_ref().unwrap().clone(),
tok,
);
}
}
impl TokenList {
// A variable definition with an initializer is a shorthand notation
// for a variable definition followed by assignments. This function
// generates assignment expressions for an initializer. For example,
// `int x[2][2] = {{6,7},{8,9}}` is converted to the following
// expressions:
//
// x[0][0] = 6;
// x[0][1] = 7;
// x[1][0] = 8;
// x[1][1] = 0
fn lvar_initializer(&mut self, var: Rc<RefCell<Obj>>, parse: &mut Parse) -> Node {
let start_tok = self[0].clone();
let (init, ty) = self.initializer(&var.borrow().ty, parse);
var.borrow_mut().ty = ty;
let desg = InitDesg {
next: None,
idx: 0,
member: None,
var: Some(var.clone()),
};
let desg = Rc::new(RefCell::new(desg));
// If a partial initializer list is given, the standard requires
// that unspecified elements are set to 0. Here, we simply
// zero-initialize the entire memory region of a variable before
// initializing it with the user-supplied values.
let lhs = Node {
kind: NodeFields::MemZero { var: var.clone() },
ty: None,
tok: start_tok.clone(),
};
let rhs = Node::create_lvar_init(&init, &var.borrow().ty, desg, start_tok.clone());
return Node::new_binary(NodeBinaryKind::Comma, lhs, rhs, start_tok);
}
}
fn read_buf(buf: &[u8], sz: i32) -> u64 {
return match sz {
1 => buf[0] as u64,
2 => {
let buf: &[u16] = unsafe { std::mem::transmute(buf) };
buf[0] as u64
}
4 => {
let buf: &[u32] = unsafe { std::mem::transmute(buf) };
buf[0] as u64
}
8 => {
let buf: &[u64] = unsafe { std::mem::transmute(buf) };
buf[0] as u64
}
_ => unreachable!(),
};
}
fn write_buf(buf: &mut [u8], val: u64, sz: i32) {
match sz {
1 => {
buf[0] = val as u8;
}
2 => {
let buf: &mut [u16] = unsafe { std::mem::transmute(buf) };
buf[0] = val as u16;
}
4 => {
let buf: &mut [u32] = unsafe { std::mem::transmute(buf) };
buf[0] = val as u32;
}
8 => {
let buf: &mut [u64] = unsafe { std::mem::transmute(buf) };
buf[0] = val;
}
_ => unreachable!(),
};
}
impl Relocation {
fn write_gvar_data(
self,
init: &mut Initializer,
ty: &Type,
buf: &mut [u8],
offset: usize,
) -> Relocation {
match &ty.kind {
TypeKind::Array { base, len } => {
let sz = base.size;
let mut rel = self;
for i in 0..*len {
rel = rel.write_gvar_data(init, ty, buf, offset + (sz * i) as usize);
}
return rel;
}
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
let mut rel = self;
for mem in members.iter() {
if mem.is_bitfield {
let expr = &init.children[mem.idx].expr;
if expr.is_none() {
break;
}
let off = offset + (mem.offset as usize); // TODO: is usize alway
let oldval = read_buf(&buf[offset..], mem.ty.size);
let newval = expr.clone().unwrap().eval() as u64;
let mask = (1u64 << mem.bit_width) - 1;
let combined = oldval | ((newval & mask) << mem.bit_offset);
write_buf(&mut buf[offset..], combined, mem.ty.size);
} else {
rel = rel.write_gvar_data(
&mut init.children[mem.idx],
ty,
buf,
offset + mem.offset,
);
}
}
return rel;
}
TypeKind::Union {
members,
is_flexible,
} => match &init.mem {
None => return self,
Some(mem) => {
return self.write_gvar_data(&mut init.children[mem.idx], &mem.ty, buf, offset);
}
},
_ => {}
}
match &mut init.expr {
None => return self,
Some(expr) => match &ty.kind {
TypeKind::Float => {
let buf: &mut [f32] = unsafe { std::mem::transmute(buf) };
buf[offset] = expr.eval_double() as f32;
return self;
}
TypeKind::Double => {
let buf: &mut [f64] = unsafe { std::mem::transmute(buf) };
buf[offset] = expr.eval_double();
return self;
}
_ => {
let mut label = None;
let val = expr.eval2(&mut label);
if label.is_none() {
write_buf(&mut buf[offset..], val as u64, ty.size);
return self;
} else {
let rel = Relocation {
offset,
label: label.unwrap(),
addend: val,
};
// TODO:
// self.next = rel
return rel;
}
}
},
}
}
}
impl TokenList {
// Initializers for global variables are evaluated at compile-time and
// embedded to .data section. This function serializes Initializer
// objects to a flat byte array. It is a compile error if an
// initializer list contains a non-constant expression.
fn gvar_initializer(&mut self, var: Rc<RefCell<Obj>>, parse: &mut Parse) {
let (mut init, ty) = self.initializer(&var.borrow().ty, parse);
var.borrow_mut().ty = ty;
let sz = var.borrow().ty.size;
assert!(sz >= 0);
let mut buf = vec![0u8; sz as usize];
let rel = Relocation {
offset: 0,
label: "".into(),
addend: 0,
};
let relocation = rel.write_gvar_data(&mut init, &var.borrow().ty, buf.as_mut_slice(), 0);
var.borrow_mut().init_data = buf;
var.borrow_mut().rel = Some(relocation);
}
// asm-stmt = "asm" ("volatile" | "inline")* "(" string-literal ")"
fn asm_stmt(&mut self) -> Node {
let start_tok = self[0].clone();
self.skip("asm");
while ["volatile", "inline"]
.into_iter()
.any(|op| self[0].equal(op))
{
self.idx += 1;
}
self.skip("(");
let str_error = || {
self[0].error("expected string literal");
};
let node = if let TokenFields::Str(ty, s) = &self[0].other {
if let TypeKind::Array { base, len } = &ty.kind {
if matches!(base.kind, TypeKind::Char) {
Node {
kind: NodeFields::Asm(s.clone()),
ty: None,
tok: start_tok,
}
} else {
str_error();
unreachable!();
}
} else {
str_error();
unreachable!();
}
} else {
str_error();
unreachable!();
};
self.skip(")");
return node;
}
// stmt = "return" expr? ";"
// | "if" "(" expr ")" stmt "("else" stmt)?
// | "switch" "(" expr ")" stmt
// | "case" const-expr ("..." const-expr)? ":" stmt
// | "default" ":" stmt
// | "for" "(" expr-stmt expr? ";" expr? ")" stmt
// | "while" "(" expr ")" stmt
// | "do" stmt "while" "(" expr ")" ";"
// | "asm" asm-stmt
// | "goto" (ident | "*" expr) ";"
// | "break" ";"
// | "continue" ";"
// | ident ":" stmt
// | "{" compound-stmt
// | expr-stmt
fn stmt(&mut self, parse: &mut Parse) -> Rc<RefCell<Node>> {
fn rrc(n: Node) -> Rc<RefCell<Node>> {
return Rc::new(RefCell::new(n));
}
if self[0].equal("return") {
let mut node = Node {
kind: NodeFields::Return { expr: None },
ty: None,
tok: self[0].clone(),
};
self.idx += 1;
if self.consume(";") {
return rrc(node);
}
let mut expr: Node = self.expr(parse);
self.skip(";");
expr.add_type();
let ty = parse.current_fn.as_ref().unwrap().borrow().ty.clone();
let return_ty = match &ty.kind {
TypeKind::Func {
return_ty,
params,
is_variadic,
next,
} => *return_ty.clone(),
_ => unreachable!(),
};
match &ty.kind {
TypeKind::Struct { .. } | TypeKind::Union { .. } => {}
_ => {
expr = Node::new_cast(rrc(expr), return_ty);
}
}
node.kind = NodeFields::Return {
expr: Some(rrc(expr)),
};
return rrc(node);
}
if self[0].equal("if") {
let mut node = Node::new_cond(NodeCondKind::If, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default,
case_begin,
case_end,
label: case_label,
expr: case_expr,
goto_next,
unique_label,
} => {
self.skip("(");
*cond = Some(rrc(self.expr(parse)));
self.skip(")");
*then = Some(self.stmt(parse));
if self[0].equal("else") {
*els = Some(self.stmt(parse));
}
return rrc(node);
}
_ => unreachable!(),
}
}
if self[0].equal("switch") {
let mut node = Node::new_cond(NodeCondKind::Switch, self[0].clone());
let node = Rc::new(RefCell::new(node));
match &mut node.borrow_mut().kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default,
case_begin,
case_end,
label: case_label,
expr: case_expr,
goto_next,
unique_label,
} => {
self.skip("(");
*cond = Some(rrc(self.expr(parse)));
self.skip(")");
let sw = parse.current_switch.clone();
parse.current_switch = Some(node.clone());
let brk = parse.brk_label.clone();
*brk_label = Some(parse.new_unique_name());
*then = Some(rrc(self.expr(parse)));
parse.current_switch = sw;
parse.brk_label = brk;
}
_ => unreachable!(),
}
return node;
}
if self[0].equal("case") {
match parse.current_switch.clone() {
// clone to avoid borrow checker issue
None => {
self[0].error("stray case");
}
Some(current_switch) => {
let node = Node::new_cond(NodeCondKind::Case, self[0].clone());
self.idx += 1;
let begin = self.const_expr(parse);
let end = if self[0].equal("...") {
// [GNU] Case ranges, e.g. "case 1 ... 5:"
self.idx += 1;
let end = self.const_expr(parse);
if end < begin {
self[0].error("empty case range specified");
}
end
} else {
begin
};
self.skip(":");
let node = Rc::new(RefCell::new(node));
match &mut node.borrow_mut().kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default,
case_begin,
case_end,
label: case_label,
expr: case_expr,
goto_next,
unique_label,
} => {
*case_label = Some(parse.new_unique_name());
*case_expr = Some(self.stmt(parse));
*case_begin = Some(begin as i32);
*case_end = Some(end as i32);
match ¤t_switch.borrow().kind {
NodeFields::Cond {
case_next: sw_next, ..
} => {
*case_next = sw_next.clone();
}
_ => unreachable!(),
}
}
_ => unreachable!(),
}
match &mut current_switch.borrow_mut().kind {
NodeFields::Cond { case_next, .. } => {
*case_next = Some(node.clone());
return node;
}
_ => unreachable!(),
}
}
}
}
if self[0].equal("default") {
match parse.current_switch.clone() {
// clone to avoid borrow checker issue
None => {
self[0].error("stray default");
}
Some(current_switch) => {
let node = Node::new_cond(NodeCondKind::Case, self[0].clone());
self.idx += 1;
self.skip(":");
let node = rrc(node);
match &mut node.borrow_mut().kind {
NodeFields::Cond {
label: case_label,
expr: case_expr,
..
} => {
*case_label = Some(parse.new_unique_name());
*case_expr = Some(self.stmt(parse));
}
_ => unreachable!(),
}
match &mut current_switch.borrow_mut().kind {
NodeFields::Cond { case_default, .. } => {
*case_default = Some(node.clone());
}
_ => unreachable!(),
}
return node;
}
}
}
if self[0].equal("for") {
let mut node = Node::new_cond(NodeCondKind::For, self[0].clone());
self.idx += 1;
self.skip("(");
parse.enter_scope();
let brk = parse.brk_label.clone();
let cont = parse.cont_label.clone();
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
label: case_label,
expr: case_expr,
..
} => {
*brk_label = Some(parse.new_unique_name());
parse.brk_label = brk_label.clone();
*cont_label = Some(parse.new_unique_name());
parse.cont_label = brk_label.clone();
if parse.is_typename(&self[0]) {
let basety = self.declspec(&mut None, parse);
*init = Some(either::Left(rrc(self.declaration(&basety, &None, parse))));
} else {
*init = Some(either::Left(rrc(self.expr_stmt(parse))));
}
if !self[0].equal(";") {
*cond = Some(rrc(self.expr(parse)));
}
self.skip(";");
if !self[0].equal(")") {
*inc = Some(rrc(self.expr(parse)));
}
self.skip(")");
*then = Some(self.stmt(parse));
}
_ => unreachable!(),
}
parse.leave_scope();
parse.brk_label = brk;
parse.cont_label = cont;
return rrc(node);
}
if self[0].equal("while") {
let mut node = Node::new_cond(NodeCondKind::For, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default,
case_begin,
case_end,
label: case_label,
expr: case_expr,
..
} => {
self.idx += 1;
self.skip("(");
*cond = Some(rrc(self.expr(parse)));
self.skip(")");
let brk = parse.brk_label.clone();
let cont = parse.cont_label.clone();
*brk_label = Some(parse.new_unique_name());
*cont_label = Some(parse.new_unique_name());
parse.brk_label = brk_label.clone();
parse.cont_label = cont_label.clone();
*then = Some(self.stmt(parse));
parse.brk_label = brk;
parse.cont_label = cont;
return rrc(node);
}
_ => unreachable!(),
}
}
if self[0].equal("do") {
// TODO:
let mut node = Node::new_cond(NodeCondKind::Do, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default,
case_begin,
case_end,
label: case_label,
expr: case_expr,
goto_next,
..
} => {
let brk = parse.brk_label.clone();
let cont = parse.brk_label.clone();
*brk_label = Some(parse.new_unique_name());
*cont_label = Some(parse.new_unique_name());
parse.brk_label = brk_label.clone();
parse.cont_label = cont_label.clone();
self.idx += 1;
*then = Some(self.stmt(parse));
parse.brk_label = brk;
parse.cont_label = cont;
self.skip("while");
self.skip("(");
*cond = Some(rrc(self.expr(parse)));
self.skip(")");
self.skip(";");
return rrc(node);
}
_ => {}
}
}
if self[0].equal("asm") {
return rrc(self.asm_stmt());
}
if self[0].equal("goto") {
if self[1].equal("*") {
// [GNU] `goto *ptr` jumps to the address specified by `ptr`.
let mut node = Node::new_cond(NodeCondKind::GotoExpr, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default,
case_begin,
case_end,
label: case_label,
expr,
goto_next,
..
} => {
self.idx += 2;
*expr = Some(rrc(self.expr(parse)));
self.skip(";");
return rrc(node);
}
_ => unreachable!(),
}
} else {
let mut node = Node::new_cond(NodeCondKind::Goto, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
case_next,
case_default,
case_begin,
case_end,
label,
expr,
goto_next,
..
} => {
self.idx += 1;
*label = Some(self[0].get_ident().into());
*goto_next = parse.gotos.clone();
}
_ => unreachable!(),
}
let node = rrc(node);
parse.gotos = Some(node.clone());
self.idx += 2;
self.skip(";");
return node;
}
}
fn break_cond(tl: &mut TokenList, label: &Option<String>) -> Rc<RefCell<Node>> {
match label {
None => {
tl[0].error("stray break or continue");
unreachable!()
}
Some(brk_label) => {
let mut node = Node::new_cond(NodeCondKind::Goto, tl[0].clone());
match &mut node.kind {
NodeFields::Cond { unique_label, .. } => {
*unique_label = Some(brk_label.clone());
}
_ => unreachable!(),
}
tl.idx += 1;
tl.skip(";");
return rrc(node);
}
}
}
if self[0].equal("break") {
break_cond(self, &parse.brk_label);
}
if self[0].equal("continue") {
break_cond(self, &parse.cont_label);
}
if matches!(self[0].kind, TokenKind::Ident) && self[1].equal(":") {
let mut node = Node::new_cond(NodeCondKind::Label, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
unique_label,
case_next,
case_default,
case_begin,
case_end,
label,
expr,
goto_next,
} => {
*label = Some(String::from(self[0].loc.as_str()));
*unique_label = Some(parse.new_unique_name());
self.idx += 2;
*expr = Some(self.stmt(parse));
*goto_next = parse.labels.clone();
}
_ => unreachable!(),
}
let node = rrc(node);
parse.labels = Some(node.clone());
return node;
}
if self[0].equal("{") {
self.idx += 1;
return rrc(self.compound_stmt(parse));
}
return rrc(self.expr_stmt(parse));
}
// compound-stmt = (typedef | declaration | stmt)* "}"
fn compound_stmt(&mut self, parse: &mut Parse) -> Node {
let mut body = Vec::new();
parse.enter_scope();
while !self[0].equal("}") {
if parse.is_typename(&self[0]) && !self[1].equal(":") {
let mut attr = Some(VarAttr {
is_typedef: false,
is_static: false,
is_extern: false,
is_inline: false,
is_tls: false,
align: 0,
});
let basety = self.declspec(&mut attr, parse);
if attr.as_ref().unwrap().is_typedef {
self.parse_typedef(&basety, parse);
continue;
}
if self.is_function(parse) {
self.function(basety, attr.as_ref().unwrap(), parse);
continue;
}
if attr.as_ref().unwrap().is_extern {
self.global_variable(&basety, attr.as_ref().unwrap(), parse);
continue;
}
let decl = Rc::new(RefCell::new(self.declaration(&basety, &attr, parse)));
body.push(decl);
} else {
let stmt = self.stmt(parse);
body.push(stmt);
}
}
parse.leave_scope();
return Node {
kind: NodeFields::Block { body },
ty: None,
tok: self[0].clone(),
};
}
// expr-stmt = expr? ";"
fn expr_stmt(&mut self, parse: &mut Parse) -> Node {
if self[0].equal(";") {
self.idx += 1;
return Node {
kind: NodeFields::Block { body: Vec::new() },
ty: None,
tok: self[0].clone(),
};
}
let node = Node::new_expr_stmt(self.expr(parse), self[0].clone());
self.skip(";");
return node;
}
// expr = assign ("," expr)?
fn expr(&mut self, parse: &mut Parse) -> Node {
let node = self.assign(parse);
if self[0].equal(",") {
self.idx += 1;
return Node::new_binary(
NodeBinaryKind::Comma,
node,
self.expr(parse),
self[0].clone(),
);
}
return node;
}
}
impl Node {
fn eval(&mut self) -> i64 {
return self.eval2(&mut None);
}
fn eval2(&mut self, label: &mut Option<String>) -> i64 {
self.add_type();
if is_flonum(self.ty.as_ref().unwrap()) {
return self.eval_double() as i64;
}
match &mut self.kind {
NodeFields::NullExpr => todo!(),
NodeFields::Binary { kind, lhs, rhs } => {
let mut lhsm = lhs.borrow_mut();
let mut rhsm = rhs.borrow_mut();
match &kind {
NodeBinaryKind::Add => {
return lhsm.eval2(label) + rhsm.eval();
}
NodeBinaryKind::Sub => {
return lhsm.eval2(label) - rhsm.eval();
}
NodeBinaryKind::Mul => {
return lhsm.eval() * rhsm.eval();
}
NodeBinaryKind::Div => {
if self.ty.as_ref().unwrap().is_unsigned {
return (lhsm.eval() as u64 / rhsm.eval() as u64) as i64;
}
return lhsm.eval() / rhsm.eval();
}
NodeBinaryKind::Mod => {
if self.ty.as_ref().unwrap().is_unsigned {
return (lhsm.eval() as u64 % rhsm.eval() as u64) as i64;
}
return lhsm.eval() % rhsm.eval();
}
NodeBinaryKind::BitAnd => {
return lhsm.eval() & rhsm.eval();
}
NodeBinaryKind::BitOr => {
return lhsm.eval() | rhsm.eval();
}
NodeBinaryKind::BitXor => {
return lhsm.eval() ^ rhsm.eval();
}
NodeBinaryKind::Shl => {
return lhsm.eval() << rhsm.eval();
}
NodeBinaryKind::Shr => {
return lhsm.eval() >> rhsm.eval();
}
NodeBinaryKind::Eq => return if lhsm.eval() == rhsm.eval() { 1 } else { 0 },
NodeBinaryKind::Ne => {
return if lhsm.eval() != rhsm.eval() { 1 } else { 0 };
}
NodeBinaryKind::Lt => {
if self.ty.as_ref().unwrap().is_unsigned {
return if (lhsm.eval() as u64) < rhsm.eval() as u64 {
1
} else {
0
};
}
return if lhsm.eval().lt(&rhsm.eval()) { 1 } else { 0 };
}
NodeBinaryKind::Le => {
if self.ty.as_ref().unwrap().is_unsigned {
return if (lhsm.eval() as u64 <= rhsm.eval() as u64) {
1
} else {
0
};
}
return if lhsm.eval() <= rhsm.eval() { 1 } else { 0 };
}
NodeBinaryKind::Assign => {}
NodeBinaryKind::Comma => {
return rhsm.eval2(label);
}
NodeBinaryKind::LogAnd => return (lhsm.eval() != 0 && rhsm.eval() != 0) as i64,
NodeBinaryKind::LogOr => {
return (lhsm.eval() != 0 || rhsm.eval() != 0) as i64;
}
}
}
NodeFields::Member { member, expr } => {
if label.is_none() {
self.tok.error("not a compile-time constant");
}
if !matches!(self.ty.as_ref().unwrap().kind, TypeKind::Array { .. }) {
self.tok.error("invalid initializer");
}
return expr.borrow_mut().eval_rval(label) + member.offset as i64;
}
NodeFields::Unary { kind, expr } => {
let mut exprm = expr.borrow_mut();
match kind {
NodeUnaryKind::Neg => {
return -exprm.eval();
}
NodeUnaryKind::Addr => {
return exprm.eval_rval(label);
}
NodeUnaryKind::Deref => {}
NodeUnaryKind::Not => {
return if exprm.eval() == 0 { 1 } else { 0 };
}
NodeUnaryKind::BitNot => {
return !exprm.eval();
}
}
}
NodeFields::Return { expr } => {}
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
unique_label,
case_next,
case_default,
case_begin,
case_end,
label,
expr,
goto_next,
} => match kind {
NodeCondKind::Cond => {
return if cond.as_ref().unwrap().borrow_mut().eval() != 0 {
then.as_ref().unwrap().borrow_mut().eval2(label)
} else {
els.as_ref().unwrap().borrow_mut().eval2(label)
}
}
NodeCondKind::LabelVal => {
*label = unique_label.clone();
return 0;
}
_ => {}
},
NodeFields::Var(var) => {
if label.is_none() {
self.tok.error("not a compile-time constant");
}
if !matches!(self.ty.as_ref().unwrap().kind, TypeKind::Array { .. }) {
self.tok.error("invalid initializer");
}
*label = Some(var.borrow().name.clone());
return 0;
}
NodeFields::Int(v) => return *v,
_ => {}
}
self.tok.error("not a compile-time constant");
unreachable!();
}
fn eval_rval(&self, label: &mut Option<String>) -> i64 {
match &self.kind {
NodeFields::Var(var) => {
if var.borrow().is_local {
self.tok.error("not a compile-time constant");
}
*label = Some(var.borrow().name.clone());
return 0;
}
NodeFields::Unary { kind, expr } => match kind {
NodeUnaryKind::Deref => {
return expr.borrow_mut().eval2(label);
}
_ => {}
},
NodeFields::Member { member, expr } => {
return expr.borrow_mut().eval_rval(label) + member.offset as i64;
}
_ => {}
}
self.tok.error("invalid initializer");
unreachable!();
}
fn is_const_expr(&mut self) -> bool {
self.add_type();
match &self.kind {
NodeFields::Binary { kind, lhs, rhs } => match kind {
NodeBinaryKind::Add
| NodeBinaryKind::Sub
| NodeBinaryKind::Mul
| NodeBinaryKind::Div
| NodeBinaryKind::Mod
| NodeBinaryKind::BitAnd
| NodeBinaryKind::BitOr
| NodeBinaryKind::BitXor
| NodeBinaryKind::Shl
| NodeBinaryKind::Shr
| NodeBinaryKind::Eq
| NodeBinaryKind::Ne
| NodeBinaryKind::Lt
| NodeBinaryKind::Le
| NodeBinaryKind::LogAnd
| NodeBinaryKind::LogOr => {
return lhs.borrow_mut().is_const_expr() && rhs.borrow_mut().is_const_expr();
}
NodeBinaryKind::Assign => {}
NodeBinaryKind::Comma => {
return rhs.borrow_mut().is_const_expr();
}
},
NodeFields::Unary { kind, expr } => match kind {
NodeUnaryKind::Neg | NodeUnaryKind::Not | NodeUnaryKind::BitNot => {
return expr.borrow_mut().is_const_expr();
}
_ => {}
},
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
unique_label,
case_next,
case_default,
case_begin,
case_end,
label,
expr,
goto_next,
} => match kind {
NodeCondKind::Cond => {
if !cond.as_ref().unwrap().borrow_mut().is_const_expr() {
return false;
}
let expr = if cond.as_ref().unwrap().borrow_mut().eval() != 0 {
then.as_ref().unwrap()
} else {
els.as_ref().unwrap()
};
return expr.borrow_mut().is_const_expr();
}
_ => {}
},
NodeFields::Int(_) => return true,
NodeFields::Float(_) => return true,
NodeFields::Cast(expr) => {
return expr.borrow_mut().is_const_expr();
}
_ => {}
}
return false;
}
}
impl TokenList {
fn const_expr(&mut self, parse: &mut Parse) -> i64 {
let mut node: Node = self.conditional(parse);
return node.eval();
}
}
impl Node {
fn eval_double(&mut self) -> f64 {
self.add_type();
if is_integer(self.ty.as_ref().unwrap()) {
if self.ty.as_ref().unwrap().is_unsigned {
return (self.eval() as u64) as f64;
}
return self.eval() as f64;
}
match &self.kind {
NodeFields::Binary { kind, lhs, rhs } => {
let mut lhsm = lhs.borrow_mut();
let mut rhsm = rhs.borrow_mut();
match kind {
NodeBinaryKind::Add => {
return lhsm.eval_double() + rhsm.eval_double();
}
NodeBinaryKind::Sub => {
return lhsm.eval_double() - rhsm.eval_double();
}
NodeBinaryKind::Mul => {
return lhsm.eval_double() * rhsm.eval_double();
}
NodeBinaryKind::Div => {
return lhsm.eval_double() / rhsm.eval_double();
}
NodeBinaryKind::Comma => {
return rhsm.eval_double();
}
_ => {}
}
}
NodeFields::Unary { kind, expr } => match kind {
NodeUnaryKind::Neg => {
return -expr.borrow_mut().eval_double();
}
_ => {}
},
NodeFields::Cast(expr) => {
if is_flonum(expr.borrow().ty.as_ref().unwrap()) {
return expr.borrow_mut().eval_double();
}
return expr.borrow_mut().eval() as f64;
}
NodeFields::Float(v) => {
return *v;
}
_ => {}
}
self.tok.error("not a compile-time constant");
unreachable!()
}
// Convert op= operators to expressions containing an assignment.
//
// In general, `A op= C` is converted to `tmp = &A, *tmp = *tmp op B`.
// However, if a given expression is of form `A.x op= C`, the input is
// converted to `tmp = &A, (*tmp).x = (*tmp).x op C` to handle assignment
// to bitfields.
fn to_assign(mut binary: Node, parse: &mut Parse) -> Node {
fn rrc(n: Node) -> Rc<RefCell<Node>> {
return Rc::new(RefCell::new(n));
}
match &mut binary.kind {
NodeFields::Binary {
kind: binary_kind,
lhs,
rhs,
} => {
lhs.borrow_mut().add_type();
rhs.borrow_mut().add_type();
let tok = binary.tok.clone();
// Convert `A.x op= C` to `tmp = &A, (*tmp).x = (*tmp).x op C`.
match &lhs.borrow().kind {
NodeFields::Member { member, expr } => {
let var = parse
.new_lvar("".into(), pointer_to(expr.borrow().ty.clone().unwrap()));
let expr1 = Node::new_binary(
NodeBinaryKind::Assign,
Node::new_var_node(var.clone(), tok.clone()),
Node::new_unary1(NodeUnaryKind::Addr, expr.clone(), tok.clone()),
tok.clone(),
);
fn deref_tmp_x(
member: &Member,
var: Rc<RefCell<Obj>>,
tok: &Token,
) -> Node {
Node {
kind: NodeFields::Member {
member: member.clone(),
expr: Rc::new(RefCell::new(Node::new_unary(
NodeUnaryKind::Deref,
Node::new_var_node(var, tok.clone()),
tok.clone(),
))),
},
ty: None,
tok: tok.clone(),
}
}
let expr2 = deref_tmp_x(member, var.clone(), &tok);
let expr3 = deref_tmp_x(member, var, &tok);
// TODO: avoid node clone?
let expr4 = Node::new_binary(
NodeBinaryKind::Assign,
expr2,
Node::new_binary1(binary_kind.clone(), expr3, rhs.clone(), tok.clone()),
tok.clone(),
);
return Node::new_binary(NodeBinaryKind::Comma, expr1, expr4, tok);
}
_ => {}
}
// If A is an atomic type, Convert `A op= B` to
//
// ({
// T1 *addr = &A; T2 val = (B); T1 old = *addr; T1 new;
// do {
// new = old op val
// } while (!atomic_compare_exchange_strong(addr, &old, new))
// new;
// })
if lhs.borrow().ty.as_ref().unwrap().is_atomic {
let mut body = Vec::new();
let addr =
parse.new_lvar("".into(), pointer_to(lhs.borrow().ty.clone().unwrap()));
let val = parse.new_lvar("".into(), rhs.borrow().ty.clone().unwrap());
let old = parse.new_lvar("".into(), lhs.borrow().ty.clone().unwrap());
let new = parse.new_lvar("".into(), lhs.borrow().ty.clone().unwrap());
body.push(Node::new_expr_stmt(
Node::new_binary(
NodeBinaryKind::Assign,
Node::new_var_node(addr.clone(), tok.clone()),
Node::new_unary1(NodeUnaryKind::Addr, lhs.clone(), tok.clone()),
tok.clone(),
),
tok.clone(),
));
body.push(Node::new_expr_stmt(
Node::new_binary1(
NodeBinaryKind::Assign,
Node::new_var_node(val.clone(), tok.clone()),
rhs.clone(),
tok.clone(),
),
tok.clone(),
));
body.push(Node::new_expr_stmt(
Node::new_binary(
NodeBinaryKind::Assign,
Node::new_var_node(old.clone(), tok.clone()),
Node::new_unary(
NodeUnaryKind::Deref,
Node::new_var_node(addr.clone(), tok.clone()),
tok.clone(),
),
tok.clone(),
),
tok.clone(),
));
let mut loopn = Node::new_cond(NodeCondKind::Do, tok.clone());
match &mut loopn.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
unique_label,
case_next,
case_default,
case_begin,
case_end,
label,
expr,
goto_next,
} => {
*brk_label = Some(parse.new_unique_name());
*cont_label = Some(parse.new_unique_name());
let loop_body = Node::new_binary(
NodeBinaryKind::Assign,
Node::new_var_node(new.clone(), tok.clone()),
Node::new_binary(
binary_kind.clone(),
Node::new_var_node(old.clone(), tok.clone()),
Node::new_var_node(val.clone(), tok.clone()),
tok.clone(),
),
tok.clone(),
);
let loop_then = Node {
kind: NodeFields::Block {
body: vec![rrc(loop_body)],
},
ty: None,
tok: tok.clone(),
};
*then = Some(rrc(loop_then));
let cas = Node {
kind: NodeFields::Cas {
addr: rrc(Node::new_var_node(addr, tok.clone())),
old: rrc(Node::new_var_node(old, tok.clone())),
new: rrc(Node::new_var_node(new.clone(), tok.clone())),
},
ty: None,
tok: tok.clone(),
};
let loop_cond = Node::new_unary(NodeUnaryKind::Not, cas, tok.clone());
*cond = Some(rrc(loop_cond));
}
_ => unreachable!(),
}
body.push(loopn);
body.push(Node::new_expr_stmt(
Node::new_var_node(new, tok.clone()),
tok.clone(),
));
let node = Node {
kind: NodeFields::Stmt {
kind: NodeStmtKind::StmtExpr,
expr: None,
body: body.into_iter().map(|n| rrc(n)).collect(),
},
ty: None,
tok: tok.clone(),
};
return node;
}
// convert `A op= B` to `tmp = &A, *tmp = *tmp op B`
let var = parse.new_lvar("".into(), pointer_to(lhs.borrow().ty.clone().unwrap()));
let expr1 = Node::new_binary(
NodeBinaryKind::Assign,
Node::new_var_node(var.clone(), tok.clone()),
Node::new_unary1(NodeUnaryKind::Addr, lhs.clone(), tok.clone()),
tok.clone(),
);
let expr2 = Node::new_binary(
NodeBinaryKind::Assign,
Node::new_unary(
NodeUnaryKind::Deref,
Node::new_var_node(var.clone(), tok.clone()),
tok.clone(),
),
Node::new_binary1(
binary_kind.clone(),
Node::new_unary(
NodeUnaryKind::Deref,
Node::new_var_node(var.clone(), tok.clone()),
tok.clone(),
),
rhs.clone(),
tok.clone(),
),
tok.clone(),
);
return Node::new_binary(NodeBinaryKind::Comma, expr1, expr2, tok);
}
_ => unreachable!(),
}
}
}
impl TokenList {
// assign = conditional (assign-op assign)?
// assign-op = "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^="
// | "<<=" | ">>="
fn assign(&mut self, parse: &mut Parse) -> Node {
let node = self.conditional(parse);
let tok = self[0].clone();
fn default(
tl: &mut TokenList,
kind: NodeBinaryKind,
parse: &mut Parse,
node: Node,
tok: Token,
) -> Node {
tl.idx += 1;
let rhs = tl.assign(parse);
let b = Node::new_binary(kind, node, rhs, tok);
return Node::to_assign(b, parse);
}
if self[0].equal("=") {
self.idx += 1;
return Node::new_binary(NodeBinaryKind::Assign, node, self.assign(parse), tok);
}
if self[0].equal("+") {
self.idx += 1;
return Node::to_assign(Node::new_add(node, self.assign(parse), tok), parse);
}
if self[0].equal("-") {
self.idx += 1;
return Node::to_assign(Node::new_sub(node, self.assign(parse), tok), parse);
}
if self[0].equal("*=") {
return default(self, NodeBinaryKind::Mul, parse, node, tok);
}
if self[0].equal("/=") {
return default(self, NodeBinaryKind::Div, parse, node, tok);
}
if self[0].equal("%=") {
return default(self, NodeBinaryKind::Mod, parse, node, tok);
}
if self[0].equal("&=") {
return default(self, NodeBinaryKind::BitAnd, parse, node, tok);
}
if self[0].equal("|=") {
return default(self, NodeBinaryKind::BitOr, parse, node, tok);
}
if self[0].equal("^=") {
return default(self, NodeBinaryKind::BitXor, parse, node, tok);
}
if self[0].equal("<<=") {
return default(self, NodeBinaryKind::Shl, parse, node, tok);
}
if self[0].equal(">>=") {
return default(self, NodeBinaryKind::Shr, parse, node, tok);
}
return node;
}
// conditional = logor ("?" expr? ":" conditional)?
fn conditional(&mut self, parse: &mut Parse) -> Node {
let mut conditional = self.logor(parse);
if !self[0].equal("?") {
return conditional;
}
if self[1].equal(":") {
// [GNU] Compile `a ?: b` as `tmp = a, tmp ? tmp : b`.
let tok = self[0].clone();
conditional.add_type();
let var = parse.new_lvar("".into(), conditional.ty.clone().unwrap());
let lhs = Node::new_binary(
NodeBinaryKind::Assign,
Node::new_var_node(var.clone(), tok.clone()),
conditional,
tok.clone(),
);
let mut rhs = Node::new_cond(NodeCondKind::Cond, tok.clone());
match &mut rhs.kind {
NodeFields::Cond {
cond, then, els, ..
} => {
*cond = Some(Rc::new(RefCell::new(Node::new_var_node(
var.clone(),
tok.clone(),
))));
*then = Some(Rc::new(RefCell::new(Node::new_var_node(
var.clone(),
tok.clone(),
))));
self.idx += 2;
*els = Some(Rc::new(RefCell::new(self.conditional(parse))));
}
_ => unreachable!(),
}
return Node::new_binary(NodeBinaryKind::Comma, lhs, rhs, tok);
}
let mut node = Node::new_cond(NodeCondKind::Cond, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
unique_label,
case_next,
case_default,
case_begin,
case_end,
label,
expr,
goto_next,
} => {
*cond = Some(Rc::new(RefCell::new(conditional)));
self.idx += 1;
*then = Some(Rc::new(RefCell::new(self.expr(parse))));
self.skip(":");
*els = Some(Rc::new(RefCell::new(self.conditional(parse))));
}
_ => unreachable!(),
}
return node;
}
// logor = logand ("||" logand)*
fn logor(&mut self, parse: &mut Parse) -> Node {
let mut node = self.logand(parse);
while self[0].equal("||") {
let start = self[0].clone();
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::LogOr, node, self.logand(parse), start);
}
return node;
}
// logand = bitor ("&&" bitor)*
fn logand(&mut self, parse: &mut Parse) -> Node {
let mut node = self.bitor(parse);
while self[0].equal("&&") {
let start = self[0].clone();
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::LogAnd, node, self.bitor(parse), start);
}
return node;
}
// bitor = bitxor ("|" bitxor)*
fn bitor(&mut self, parse: &mut Parse) -> Node {
let mut node = self.bitxor(parse);
while self[0].equal("|") {
let start = self[0].clone();
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::BitOr, node, self.bitxor(parse), start);
}
return node;
}
// bitxor = bitand ("|" bitand)*
fn bitxor(&mut self, parse: &mut Parse) -> Node {
let mut node = self.bitand(parse);
while self[0].equal("^") {
let start = self[0].clone();
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::BitXor, node, self.bitand(parse), start);
}
return node;
}
// bitand = equality ("&" equality)*
fn bitand(&mut self, parse: &mut Parse) -> Node {
let mut node: Node = self.equality(parse);
while self[0].equal("&") {
let start = self[0].clone();
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::BitAnd, node, self.equality(parse), start);
}
return node;
}
// equality = relational ("==" relational | "!=" relational)*
fn equality(&mut self, parse: &mut Parse) -> Node {
let mut node: Node = self.relational(parse);
loop {
let kind = if self[0].equal("==") {
NodeBinaryKind::Eq
} else if self[0].equal("!=") {
NodeBinaryKind::Ne
} else {
break;
};
let start = self[0].clone();
self.idx += 1;
node = Node::new_binary(kind, node, self.relational(parse), start);
}
return node;
}
// relational = shift ("<" shift | "<=" shift | ">" shift | ">=" shift)*
fn relational(&mut self, parse: &mut Parse) -> Node {
let mut node: Node = self.shift(parse);
loop {
let start = self[0].clone();
if self[0].equal("<") {
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::Lt, node, self.shift(parse), start);
} else if self[0].equal("<") {
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::Le, node, self.shift(parse), start);
} else if self[0].equal(">") {
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::Lt, self.shift(parse), node, start);
} else if self[0].equal(">=") {
self.idx += 1;
node = Node::new_binary(NodeBinaryKind::Le, self.shift(parse), node, start);
} else {
break;
}
}
return node;
}
// shift = add ("<<" add | ">>" add)*
fn shift(&mut self, parse: &mut Parse) -> Node {
let mut node: Node = self.add(parse);
loop {
let kind: NodeBinaryKind = if self[0].equal("<<") {
NodeBinaryKind::Shl
} else if self[0].equal(">>") {
NodeBinaryKind::Shr
} else {
break;
};
let start = self[0].clone();
self.idx += 1;
node = Node::new_binary(kind, node, self.add(parse), start);
}
return node;
}
}
impl Node {
// In C, the `+` is overloaded to perform pointer arithmetic.
// If p is a pointer, p+n adds not n but sizeof(*p)*n to the value of p,
// so that p+n points to the location n elements (not bytes) ahead of p.
// In other words, we need to scale an integer value before adding to a
// pointer value. This function takes care of the scaling.
fn new_add(mut lhs: Node, mut rhs: Node, tok: Token) -> Node {
lhs.add_type();
rhs.add_type();
if is_numeric(lhs.ty.as_ref().unwrap()) && is_numeric(rhs.ty.as_ref().unwrap()) {
return Node::new_binary(NodeBinaryKind::Add, lhs, rhs, tok);
}
// Canonicalize `num + ptr` to `ptr + num`.
match &rhs.ty.as_ref().unwrap().kind {
TypeKind::Array { base, .. } | TypeKind::Ptr { base } | TypeKind::Vla { base, .. } => {
let tmp = lhs;
lhs = rhs;
rhs = tmp;
}
_ => {}
}
if !is_integer(rhs.ty.as_ref().unwrap()) {
tok.error("invalid operands");
}
// VLA + num
match &lhs.ty.as_ref().unwrap().kind {
TypeKind::Vla {
base,
vla_len,
vla_size,
} => {
// Do we want to VLA size of base instead?
let rhs = Node::new_binary(
NodeBinaryKind::Mul,
rhs,
Node::new_var_node(vla_size.as_ref().unwrap().clone(), tok.clone()),
tok.clone(),
);
return Node::new_binary(NodeBinaryKind::Add, lhs, rhs, tok);
}
TypeKind::Array { base, .. } | TypeKind::Ptr { base } => {
let rhs = Node::new_binary(
NodeBinaryKind::Mul,
rhs,
Node::new_long(base.size as i64, tok.clone()),
tok.clone(),
);
return Node::new_binary(NodeBinaryKind::Add, lhs, rhs, tok);
}
_ => unreachable!(),
}
}
// Like '+', '-' is overloaded for the pointer type.
fn new_sub(mut lhs: Node, mut rhs: Node, tok: Token) -> Node {
lhs.add_type();
rhs.add_type();
// num - num
if is_numeric(lhs.ty.as_ref().unwrap()) && is_numeric(rhs.ty.as_ref().unwrap()) {
return Node::new_binary(NodeBinaryKind::Sub, lhs, rhs, tok);
}
match lhs.ty.as_ref().unwrap().kind.clone() {
TypeKind::Vla { base, .. }
| TypeKind::Array { base, .. }
| TypeKind::Ptr { base, .. } => {
match &base.kind {
TypeKind::Vla {
base,
vla_len,
vla_size,
} => {
let mut rhs = Node::new_binary(
NodeBinaryKind::Mul,
rhs,
Node::new_var_node(vla_size.clone().unwrap(), tok.clone()),
tok.clone(),
);
rhs.add_type();
let ty = lhs.ty.clone();
let mut node = Node::new_binary(NodeBinaryKind::Sub, lhs, rhs, tok);
node.ty = ty;
return node;
}
_ => {}
}
// ptr - num
if is_integer(rhs.ty.as_ref().unwrap()) {
let mut rhs = Node::new_binary(
NodeBinaryKind::Mul,
rhs,
Node::new_long(base.size as i64, tok.clone()),
tok.clone(),
);
rhs.add_type();
let ty = lhs.ty.clone();
let mut node = Node::new_binary(NodeBinaryKind::Sub, lhs, rhs, tok);
node.ty = ty;
return node;
}
// ptr - ptr, which returns how many elements are between the two.
match &rhs.ty.as_ref().unwrap().kind {
TypeKind::Vla { .. } | TypeKind::Array { .. } | TypeKind::Ptr { .. } => {
let ty = lhs.ty.clone();
let mut node = Node::new_binary(NodeBinaryKind::Sub, lhs, rhs, tok.clone());
node.ty = ty;
return Node::new_binary(
NodeBinaryKind::Div,
node,
Node::new_num(base.size as i64, tok.clone()),
tok,
);
}
_ => {}
}
}
_ => unreachable!(),
}
tok.error("invalid operands");
unreachable!()
}
}
impl TokenList {
// add = mul ("+" mul | "-" mul)*
fn add(&mut self, parse: &mut Parse) -> Node {
let mut node: Node = self.mul(parse);
loop {
let start = self[0].clone();
if self[0].equal("+") {
self.idx += 1;
node = Node::new_add(node, self.mul(parse), start);
} else if self[0].equal("-") {
self.idx -= 1;
node = Node::new_sub(node, self.mul(parse), start);
} else {
break;
}
}
return node;
}
// mul = cast ("*" cast | "/" cast | "%" cast)*
fn mul(&mut self, parse: &mut Parse) -> Node {
let mut node: Node = self.cast(parse);
loop {
let start = self[0].clone();
let kind = if self[0].equal("*") {
NodeBinaryKind::Mul
} else if self[0].equal("/") {
NodeBinaryKind::Div
} else if self[0].equal("%") {
NodeBinaryKind::Mod
} else {
break;
};
self.idx += 1;
node = Node::new_binary(kind, node, self.cast(parse), start);
}
return node;
}
// cast = "(" type-name ")" cast | unary
fn cast(&mut self, parse: &mut Parse) -> Node {
if self[0].equal("(") && parse.is_typename(&self[1]) {
let start_idx = self.idx;
let start_tok = self[0].clone();
let ty = self.typename(parse);
self.skip(")");
// compound literal
if self[0].equal("{") {
self.idx = start_idx;
return self.unary(parse);
}
// type cast
let mut node = Node::new_cast(Rc::new(RefCell::new(self.cast(parse))), ty);
node.tok = start_tok;
return node;
}
return self.unary(parse);
}
// unary = ("+" | "-" | "*" | "&" | "!" | "~") cast
// | ("++" | "--") unary
// | "&&" ident
// | postfix
fn unary(&mut self, parse: &mut Parse) -> Node {
if self[0].equal("+") {
self.idx += 1;
return self.cast(parse);
}
if self[0].equal("-") {
let tok = self[0].clone();
self.idx += 1;
return Node::new_unary(NodeUnaryKind::Neg, self.cast(parse), tok);
}
if self[0].equal("&") {
let tok = self[0].clone();
self.idx += 1;
let mut expr = self.cast(parse);
expr.add_type();
match &expr.kind {
NodeFields::Member { .. } => tok.error("cannot take address of bitfield"),
_ => {}
}
return Node::new_unary(NodeUnaryKind::Addr, expr, tok);
}
if self[0].equal("*") {
// [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity
// in the C spec, but dereferencing a function shouldn't do
// anything. If foo is just a function, `*foo`, `**foo`, or
// `******foo` are all equivalent to just `foo`.
let tok = self[0].clone();
self.idx += 1;
let mut node = self.cast(parse);
match &node.ty.as_ref().unwrap().kind {
TypeKind::Func { .. } => return node,
_ => {
return Node::new_unary(NodeUnaryKind::Deref, node, tok);
}
}
}
if self[0].equal("!") {
let tok = self[0].clone();
self.idx += 1;
return Node::new_unary(NodeUnaryKind::Not, self.cast(parse), tok);
}
if self[0].equal("~") {
let tok = self[0].clone();
self.idx += 1;
return Node::new_unary(NodeUnaryKind::BitNot, self.cast(parse), tok);
}
if self[0].equal("++") {
let tok = self[0].clone();
self.idx += 1;
return Node::to_assign(
Node::new_add(self.unary(parse), Node::new_num(1, tok.clone()), tok),
parse,
);
}
if self[0].equal("--") {
let tok = self[0].clone();
self.idx += 1;
return Node::to_assign(
Node::new_sub(self.unary(parse), Node::new_num(1, tok.clone()), tok),
parse,
);
}
// [GNU] labels-as-values
if self[0].equal("&&") {
let mut node = Node::new_cond(NodeCondKind::LabelVal, self[0].clone());
match &mut node.kind {
NodeFields::Cond {
kind,
cond,
then,
els,
init,
inc,
brk_label,
cont_label,
unique_label,
case_next,
case_default,
case_begin,
case_end,
label,
expr,
goto_next,
} => {
*label = Some(self[1].get_ident().into());
*goto_next = parse.gotos.clone();
}
_ => unreachable!(),
}
let ret = node.clone();
let node = Rc::new(RefCell::new(node));
parse.gotos = Some(node.clone());
self.idx += 2;
return ret; // TODO: parse.gotos
}
return self.postfix(parse);
}
// struct-members = (declspec declarator ("," declarator)* ";")*
fn struct_members(&mut self, ty: &mut Type, parse: &mut Parse) {
match &mut ty.kind {
TypeKind::Struct {
is_flexible,
members,
..
}
| TypeKind::Union {
is_flexible,
members,
} => {
let mut cur = Vec::new();
let mut idx = 0;
while !self[0].equal("}") {
let mut attr = Some(VarAttr {
is_typedef: false,
is_static: false,
is_extern: false,
is_inline: false,
is_tls: false,
align: 0,
});
let basety: Type = self.declspec(&mut attr, parse);
let mut first = true;
match &basety.kind {
TypeKind::Struct { members, .. } | TypeKind::Union { members, .. } => {
// Anonymous struct member
if self.consume(";") {
let mut mem = Member::new(basety.clone());
mem.idx = idx;
idx += 1;
mem.align = if attr.as_ref().unwrap().align != 0 {
attr.as_ref().unwrap().align
} else {
mem.ty.align
};
cur.push(mem);
continue;
}
}
_ => {}
}
// Regular struct members
while !self.consume(";") {
if !first {
self.skip(",");
}
first = false;
let ty = self.declarator(basety.clone(), parse);
let mut mem = Member::new(ty);
mem.name = mem.ty.decl.as_ref().unwrap().name.clone().map(|bt| *bt);
mem.idx = idx;
idx += 1;
mem.align = if attr.as_ref().unwrap().align != 0 {
attr.as_ref().unwrap().align
} else {
mem.ty.align
};
if self.consume(":") {
mem.is_bitfield = true;
mem.bit_width = self.const_expr(parse) as usize;
}
cur.push(mem);
}
}
// If the last element is an array of incomplete type, it's
// called a "flexible array member". It should behave as if
// it were a zero-sized array.
if let Some(mem) = cur.last() {
match &mem.ty.kind {
TypeKind::Array { base, len } => {
if *len < 0 {
*is_flexible = true;
}
}
_ => {}
}
}
self.skip(";");
*members = cur;
}
_ => unreachable!(),
}
}
// attribute = ("__attribute__" "(" "(" (("packed" | ("aligned" "(" const-expr ")" ) ("," "packed" | "aligned")*)? ")" ")")*
fn attribute_list(&mut self, ty: &mut Type, parse: &mut Parse) {
while self.consume("__attribute__") {
self.skip("(");
self.skip("(");
let mut first = true;
while !self.consume(")") {
if !first {
self.skip(",")
}
first = false;
if self.consume("packed") {
match &mut ty.kind {
TypeKind::Struct { is_packed, .. } => {
*is_packed = true;
}
_ => unreachable!(),
}
continue;
}
if self.consume("aligned") {
self.skip("(");
ty.align = self.const_expr(parse) as i32;
self.skip(")");
continue;
}
}
self.skip(")");
}
}
// struct-union-decl = attribute? ident? ("{" struct-members)?
fn struct_union_decl(&mut self, parse: &mut Parse) -> Type {
let mut ty = struct_type();
self.attribute_list(&mut ty, parse);
// Read a tag
let tag = if matches!(self[0].kind, TokenKind::Ident) {
let tag = self[0].clone();
self.idx += 1;
Some(tag)
} else {
None
};
match &tag {
Some(tag) => {
if !self[0].equal("{") {
let ty2 = parse.find_tag(&tag);
match ty2 {
Some(ty2) => return ty2.clone(),
None => {
ty.size = -1;
parse.push_tag_scope(&tag, ty.clone());
}
}
}
}
None => {}
}
self.skip("{");
// Construct a struct object.
self.struct_members(&mut ty, parse);
self.attribute_list(&mut ty, parse);
if let Some(tag) = &tag {
// If this is a redefinition, overwrite a previous type.
// Otherwise, register the struct type.
let ty2 = parse
.scope
.front_mut()
.unwrap()
.tags
.get_mut(tag.loc.as_str());
if let Some(ty2) = ty2 {
*ty2 = ty;
return ty2.clone();
}
parse.push_tag_scope(&tag, ty.clone());
}
return ty;
}
// struct-declr = struct-union-decl
fn struct_decl(&mut self, parse: &mut Parse) -> Type {
let mut ty = self.struct_union_decl(parse);
assert!(matches!(ty.kind, TypeKind::Struct { .. }));
if ty.size < 0 {
return ty;
}
// Assign offsets within the struct to members
let mut bits = 0;
match &mut ty.kind {
TypeKind::Struct {
members,
is_flexible,
is_packed,
} => {
for m in members.iter_mut() {
if m.is_bitfield && m.bit_width == 0 {
// Zero-width anonymous bitfield has a special meaning.
// It affects only alignment
bits = align_to(bits, m.ty.size * 8);
} else if m.is_bitfield {
let sz = m.ty.size;
if (bits / (sz * 8)) != (bits + m.bit_width as i32 - 1) / (sz * 8) {
bits = align_to(bits, sz * 8);
}
m.offset = align_down(bits / 8, sz) as usize;
m.bit_offset = (bits % (sz * 8)) as usize;
bits += m.bit_width as i32;
} else {
if !*is_packed {
bits = align_to(bits, m.align * 8);
}
m.offset = (bits / 8) as usize;
bits += m.ty.size * 8;
}
if !*is_packed && ty.align < m.align {
ty.align = m.align;
}
}
}
_ => unreachable!(),
}
ty.size = align_to(bits, ty.align * 8) / 8;
return ty;
}
fn union_decl(&mut self, parse: &mut Parse) -> Type {
let mut ty = self.struct_union_decl(parse);
let (members, is_flexible) = if let TypeKind::Struct {
members,
is_flexible,
is_packed,
} = &ty.kind
{
(members.clone(), *is_flexible)
} else {
unreachable!()
};
if ty.size < 0 {
return ty;
}
// If union, we don't have to assign offsets because they are already initialized to zero.
// We need to compute the alignment and the size though.
for m in members.iter() {
if ty.align < m.align {
ty.align = m.align;
}
if ty.size < m.ty.size {
ty.size = m.ty.size;
}
}
ty.kind = TypeKind::Union {
members: members,
is_flexible: is_flexible,
};
ty.size = align_to(ty.size, ty.align);
return ty;
}
}
impl Type {
// Find a struct member by name.
fn get_struct_member(&self, tok: &Token) -> Option<&Member> {
match &self.kind {
TypeKind::Struct { members, .. } | TypeKind::Union { members, .. } => {
for m in members.iter() {
// Anonymous struct member
if m.name.is_none() {
if m.ty.get_struct_member(tok).is_some() {
return Some(m);
}
continue;
}
// Regular struct member
if m.name.as_ref().unwrap().loc.as_bytes() == tok.loc.as_bytes() {
return Some(m);
}
}
}
_ => unreachable!(),
}
return None;
}
}
impl Node {
// Create a node representing a struct member access, such as foo.bar
// where foo is a struct and bar is a member name.
//
// C has a feature called "anonymous struct" which allows a struct to
// have another unnamed struct as a member like this:
//
// struct { struct { int a; }; int b; } x;
//
// The members of an anonymous struct belong to the outer struct's
// member namespace. Therefore, in the above example, you can access
// member "a" of the anonymous struct as "x.a".
//
// This function takes care of anonymous structs.
fn struct_ref(mut node: Node, tok: Token) -> Node {
node.add_type();
match &node.ty.as_ref().unwrap().kind {
TypeKind::Struct {
members,
is_flexible,
..
}
| TypeKind::Union {
members,
is_flexible,
} => {
let mut ty = node.ty.clone().unwrap();
loop {
let mem = ty.get_struct_member(&tok);
match mem {
None => tok.error("no such member"),
Some(mem) => {
node = Node {
kind: NodeFields::Member {
member: mem.clone(),
expr: Rc::new(RefCell::new(node)),
},
tok: tok.clone(),
ty: None,
};
if mem.name.is_some() {
break;
}
ty = mem.ty.clone();
}
}
}
}
_ => node.tok.error("not a struct nor a union"),
}
return node;
}
// Convert A++ to `(typeof A)((A += 1) - 1)`
fn new_inc_dec(mut node: Node, tok: Token, addend: i32, parse: &mut Parse) -> Node {
node.add_type();
let ty = node.ty.clone();
let add = Node::new_add(node, Node::new_num(addend as i64, tok.clone()), tok.clone());
let ass = Node::to_assign(add, parse);
let add2 = Node::new_add(ass, Node::new_num(-addend as i64, tok.clone()), tok.clone());
return Node::new_cast(Rc::new(RefCell::new(add2)), ty.unwrap());
}
}
impl TokenList {
// postfix = "(" type-name ")" "{" initializer-list "}"
// = ident "(" func-args ")" postfix-tail*
// | primary postfix-tail*
//
// postfix-tail = "[" expr "]"
// | "(" func-args ")"
// | "." ident
// | "->" ident
// | "++"
// | "--"
fn postfix(&mut self, parse: &mut Parse) -> Node {
if self[0].equal("(") && parse.is_typename(&self[0]) {
// Compound literal
let start = self[0].clone();
self.idx += 1;
let ty = self.typename(parse);
self.skip(")");
if parse.scope.len() == 1 {
let var = parse.new_anon_gvar(ty);
self.gvar_initializer(var.clone(), parse);
return Node::new_var_node(var, start);
}
let var = parse.new_lvar("".into(), ty);
let tok = self[0].clone();
let lhs = self.lvar_initializer(var.clone(), parse);
let rhs = Node::new_var_node(var, tok);
return Node::new_binary(NodeBinaryKind::Comma, lhs, rhs, start);
}
let mut node = self.primary(parse);
loop {
if self[0].equal("(") {
self.idx += 1;
node = self.funcall(node, parse);
continue;
}
if self[0].equal("[") {
// x[y] is short for *(x + y)
let start = self[0].clone();
self.idx += 1;
let idx = self.expr(parse);
self.skip("]");
node = Node::new_unary(
NodeUnaryKind::Deref,
Node::new_add(node, idx, start.clone()),
start,
);
}
if self[0].equal(".") {
node = Node::struct_ref(node, self[1].clone());
self.idx += 2;
continue;
}
if self[0].equal("->") {
// x->y is short for (*x).y
node = Node::new_unary(NodeUnaryKind::Deref, node, self[0].clone());
node = Node::struct_ref(node, self[1].clone());
self.idx += 2;
continue;
}
if self[0].equal("++") {
node = Node::new_inc_dec(node, self[0].clone(), 1, parse);
self.idx += 1;
continue;
}
if self[0].equal("--") {
node = Node::new_inc_dec(node, self[0].clone(), -1, parse);
self.idx += 1;
continue;
}
}
return node;
}
// funcall = (assign ("," assign)*)? ")"
fn funcall(&mut self, mut f: Node, parse: &mut Parse) -> Node {
f.add_type();
let (ty, return_ty, params, is_variadic) = match &f.ty.as_ref().unwrap().kind {
TypeKind::Func {
return_ty,
params,
is_variadic,
next,
} => (f.ty.clone().unwrap(), return_ty, params, is_variadic),
TypeKind::Ptr { base } => {
if let TypeKind::Func {
return_ty,
params,
is_variadic,
next,
} = &base.kind
{
(*base.clone(), return_ty, params, is_variadic)
} else {
f.tok.error("not a function");
unreachable!()
}
}
_ => {
f.tok.error("not a function");
unreachable!()
}
};
let mut param_idx = 0;
let mut args: Vec<Rc<RefCell<Node>>> = Vec::new();
while !self[0].equal(")") {
if args.len() > 0 {
self.skip(",")
}
let mut arg = self.assign(parse);
arg.add_type();
if param_idx >= params.len() && !*is_variadic {
self[0].error("too many arguments");
}
if param_idx < params.len() {
let param = ¶ms[param_idx];
match ¶m.kind {
TypeKind::Struct { .. } | TypeKind::Union { .. } => {}
_ => {
arg = Node::new_cast(Rc::new(RefCell::new(arg)), param.clone());
}
}
param_idx += 1;
} else if matches!(arg.ty.as_ref().unwrap().kind, TypeKind::Float) {
arg = Node::new_cast(Rc::new(RefCell::new(arg)), TY_DOUBLE);
}
args.push(Rc::new(RefCell::new(arg)));
}
if param_idx != params.len() {
self[0].error("too few arguments");
}
self.skip(")");
// If a function returns a struct, it is a caller's responsibility
// to allocate a space for the return value.
let ret_buffer = match &return_ty.kind {
TypeKind::Struct { .. } | TypeKind::Union { .. } => {
Some(parse.new_lvar("".into(), *return_ty.clone()))
}
_ => None,
};
let node = Node {
kind: NodeFields::FnCall {
func_ty: ty,
args,
pass_by_stack: false,
ret_buffer: ret_buffer,
expr: None,
},
ty: Some(*return_ty.clone()),
tok: self[0].clone(),
};
return node;
}
// generic-selection = "(" assign "," generic-assoc (",") generic-assoc)* ")"
//
// generic-assoc = type-name ":" assign
// | "default" ":" assign
fn generic_selection(&mut self, parse: &mut Parse) -> Node {
let start = self[0].clone();
self.skip("(");
let mut ctrl = self.assign(parse);
ctrl.add_type();
let mut t1 = ctrl.ty.clone().unwrap();
if let TypeKind::Func { .. } = &t1.kind {
t1 = pointer_to(t1);
} else if let TypeKind::Array { base, .. } = &t1.kind {
t1 = pointer_to(*base.clone());
}
let mut ret = None;
while !self.consume(")") {
self.skip(",");
if self[0].equal("default") {
self.idx += 1;
self.skip(":");
let node = self.assign(parse);
if ret.is_none() {
ret = Some(node)
}
continue;
}
let t2 = self.typename(parse);
self.skip(":");
let node = self.assign(parse);
if is_compatible(&t1, &t2) {
ret = Some(node)
}
}
if ret.is_none() {
start.error(
"controlling expression type not compatible with any generic association type",
);
}
return ret.unwrap();
}
// primary = "(" "{" stmt+ "}" ")"
// | "(" expr ")"
// | "sizeof" "(" type-name ")"
// | "sizeof" unary
// | "_Alignof" "(" type-name ")"
// | "_Alignof" unary
// | "_Generic" generic-selection
// | "__builtin_types_compatible_p" "(" type-name, type-name, ")"
// | "__builtin_reg_class" "(" type-name ")"
// | ident
// | str
// | num
fn primary(&mut self, parse: &mut Parse) -> Node {
let start = self[0].clone();
if self[0].equal("(") && self[1].equal("{") {
// This is a GNU statement expression
self.idx += 2;
let mut node = self.compound_stmt(parse);
let body = if let NodeFields::Block { body } = &node.kind {
body.clone()
} else {
unreachable!()
};
node.kind = NodeFields::Stmt {
kind: NodeStmtKind::StmtExpr,
expr: None,
body: body,
};
node.tok = start;
self.skip(")");
return node;
}
if self[0].equal("(") {
self.idx += 1;
let node = self.expr(parse);
self.skip(")");
return node;
}
if self[0].equal("sizeof") && self[1].equal("(") && parse.is_typename(&self[2]) {
self.idx += 2;
let ty = self.typename(parse);
self.skip(")");
match &ty.kind {
TypeKind::Vla {
base,
vla_len,
vla_size,
} => {
if let Some(vla_size) = &vla_size {
return Node::new_var_node(vla_size.clone(), self[0].clone());
}
// TODO: how do you set vla_size
let (lhs, ty) = compute_vla_size(ty, &self[0], parse);
if let TypeKind::Vla {
base,
vla_len,
vla_size,
} = &ty.kind
{
let rhs = Node::new_var_node(vla_size.clone().unwrap(), self[0].clone());
return Node::new_binary(NodeBinaryKind::Comma, lhs, rhs, self[0].clone());
} else {
unreachable!()
}
}
_ => {
return Node::new_ulong(ty.size as i64, start);
}
}
}
if self[0].equal("sizeof") {
self.idx += 1;
let mut node = self.unary(parse);
node.add_type();
if let TypeKind::Vla {
base,
vla_len,
vla_size,
} = &node.ty.as_ref().unwrap().kind
{
return Node::new_var_node(vla_size.clone().unwrap(), self[0].clone());
}
return Node::new_ulong(node.ty.as_ref().unwrap().size as i64, start);
}
if self[0].equal("_Alignof") && self[1].equal("(") && parse.is_typename(&self[2]) {
self.idx += 2;
let ty = self.typename(parse);
self.skip(")");
return Node::new_ulong(ty.align as i64, self[0].clone());
}
if self[0].equal("_Alignof") {
self.idx += 1;
let mut node = self.unary(parse);
node.add_type();
return Node::new_ulong(node.ty.as_ref().unwrap().align as i64, self[0].clone());
}
if self[0].equal("__builtin_reg_class") {
self.idx += 1;
self.skip("(");
let ty = self.typename(parse);
self.skip(")");
if is_integer(&ty) || matches!(ty.kind, TypeKind::Ptr { .. }) {
return Node::new_num(0, start);
} else if is_flonum(&ty) {
return Node::new_num(1, start);
}
return Node::new_num(2, start);
}
if self[0].equal("__builtin_compare_and_swap") {
self.idx += 1;
let addr = Rc::new(RefCell::new(self.assign(parse)));
let old = Rc::new(RefCell::new(self.assign(parse)));
let new = Rc::new(RefCell::new(self.assign(parse)));
return Node {
kind: NodeFields::Cas { addr, old, new },
tok: start,
ty: None,
};
}
if self[0].equal("__builtin_atomic_exchange") {
self.skip("(");
let lhs = Rc::new(RefCell::new(self.assign(parse)));
let rhs = Rc::new(RefCell::new(self.assign(parse)));
self.skip(")");
return Node {
kind: NodeFields::Exch { lhs, rhs },
ty: None,
tok: start,
};
}
if self[0].kind == TokenKind::Ident {
// Variable or enum constant
let sc = parse.find_var(&self[0]);
// For "static inline" function
if let Some(sc) = &sc {
if let Some(var) = &sc.borrow().var {
if var.borrow().is_function {
if let Some(current_fn) = &mut parse.current_fn {
if let Some(ObjFunction { refs, .. }) =
&mut current_fn.borrow_mut().func
{
refs.push(var.borrow().name.clone());
} else {
unreachable!()
}
} else {
if let Some(ObjFunction { is_root, .. }) = &mut var.borrow_mut().func {
*is_root = true;
} else {
unreachable!()
}
}
}
}
}
if let Some(sc) = sc {
if let Some(var) = &sc.borrow().var {
return Node::new_var_node(var.clone(), self[0].clone());
}
if sc.borrow().enum_ty.is_some() {
return Node::new_num(
sc.borrow().enum_val.clone().unwrap() as i64,
self[0].clone(),
);
}
}
if self[1].equal("(") {
self[0].error("implicit declaration of a function");
}
self[0].error("undefined variable");
}
if self[0].kind == TokenKind::Str {
if let TokenFields::Str(ty, s) = &self[0].other {
let var = parse.new_string_literal(s, ty.clone());
self.idx += 1;
return Node::new_var_node(var, start);
} else {
unreachable!()
}
}
if self[0].kind == TokenKind::Num {
let node = match &self[0].other {
TokenFields::Int(ty, val) => {
let mut n = Node::new_num(val.clone(), self[0].clone());
n.ty = Some(ty.clone());
n
}
TokenFields::Float(ty, val) => {
let mut n = Node::new_fnum(val.clone(), self[0].clone());
n.ty = Some(ty.clone());
n
}
_ => panic!(),
};
self.idx += 1;
return node;
}
self[0].error("expected an expression");
unreachable!()
}
fn parse_typedef(&mut self, basety: &Type, parse: &mut Parse) {
let mut first = true;
while !self.consume(";") {
if !first {
self.skip(",");
}
first = false;
let ty = self.declarator(basety.clone(), parse);
if ty.decl.as_ref().unwrap().name.is_none() {
ty.decl
.as_ref()
.unwrap()
.name_pos
.error("typedef name omitted");
}
let sc = parse.push_scope(
ty.decl
.as_ref()
.unwrap()
.name
.clone()
.unwrap()
.get_ident()
.into(),
);
sc.borrow_mut().type_def = Some(ty);
}
}
}
fn create_param_lvars(params: &[Type], parse: &mut Parse) {
for p in params {
let name = &p.decl.as_ref().unwrap().name;
if name.is_none() {
p.decl
.as_ref()
.unwrap()
.name_pos
.error("parameter name omitted");
}
parse.new_lvar(name.as_ref().unwrap().get_ident().into(), p.clone());
}
}
// This function matches gotos or labels-as-values with labels.
//
// We cannot resolve gotos as we parse a function because gotos
// can refer to a label that appears later in the function.
// So, we need to do this after we parse the entire function.
fn resolve_goto_labels(parse: &mut Parse) {
let mut xo = parse.gotos.clone();
let mut yo = parse.gotos.clone();
while let Some(x) = &xo {
while let Some(y) = &yo {
if let NodeFields::Cond {
label: xlabel,
unique_label: xunique_label,
..
} = &mut x.borrow_mut().kind
{
if let NodeFields::Cond {
label: ylabel,
unique_label: yunique_label,
..
} = &y.borrow().kind
{
if xlabel == ylabel {
*xunique_label = yunique_label.clone();
break;
}
} else {
unreachable!()
}
} else {
unreachable!()
}
}
if let NodeFields::Cond { unique_label, .. } = &x.borrow().kind {
// TODO: why x.tok.next?
x.borrow().tok.error("use of undeclared label");
}
}
parse.gotos = None;
parse.labels = None;
}
fn find_func(name: &str, parse: &Parse) -> Option<Rc<RefCell<Obj>>> {
let sc = parse.scope.back().unwrap();
let sc2 = sc.vars.get(name);
if let Some(sc2) = sc2 {
if let Some(var) = &sc2.borrow().var {
if var.borrow().is_function {
return Some(var.clone());
}
}
}
return None;
}
fn mark_live(var: Rc<RefCell<Obj>>, parse: &Parse) {
let mut v = var.borrow_mut();
match &mut v.func {
Some(of) => {
if of.is_live {
return;
}
of.is_live = true;
for r in of.refs.iter() {
let f = find_func(r, parse);
if let Some(f) = f {
mark_live(f, parse);
}
}
}
None => return,
}
}
impl TokenList {
fn function(&mut self, basety: Type, attr: &VarAttr, parse: &mut Parse) {
let ty = self.declarator(basety, parse);
if ty.decl.as_ref().unwrap().name.is_none() {
ty.decl
.as_ref()
.unwrap()
.name_pos
.error("function name omitted");
}
let name_str = ty.decl.as_ref().unwrap().name.as_ref().unwrap().get_ident();
let mut fo = find_func(name_str, parse);
let f = match fo {
Some(func) => {
{
let mut f = func.borrow_mut();
// Redeclaration
if !f.is_function {
self[0].error("redeclared as a different kind of symbol");
}
if f.is_definition && self[0].equal("{") {
self[0].error(format!("redefinition of {}", name_str).as_str());
}
if !f.is_static && attr.is_static {
self[0].error("static declaraion follows a non-static declaration");
}
f.is_definition = f.is_definition || self[0].equal("{");
}
func
}
None => {
let f = parse.new_gvar(name_str.into(), ty.clone());
{
let mut f = f.borrow_mut();
f.is_function = true;
f.is_definition = self[0].equal("{");
f.is_static = attr.is_static || (attr.is_inline && !attr.is_extern);
}
if self.consume(";") {
return;
}
f
}
};
parse.current_fn = Some(f.clone());
parse.locals.clear();
parse.enter_scope();
let (fnparams, va_area, alloca_bottom, body, locals) = match &ty.kind {
TypeKind::Func {
return_ty,
params,
is_variadic,
next,
} => {
create_param_lvars(¶ms, parse);
// A buffer for a struct/union return value is passed
// as the hidden first parameter.
if (matches!(return_ty.kind, TypeKind::Struct { .. })
|| matches!(return_ty.kind, TypeKind::Union { .. }))
&& return_ty.size > 16
{
parse.new_lvar("".into(), pointer_to(*return_ty.clone()));
}
let mut fnparams = Vec::new();
for l in parse.locals.iter() {
fnparams.push(l.clone());
}
let va_area = if *is_variadic {
Some(parse.new_lvar("__va_area__".into(), array_of(TY_CHAR, 136)))
} else {
None
};
let alloca_bottom = parse.new_lvar("__alloca_size".into(), pointer_to(TY_CHAR));
self.skip("{");
// [https://www.sigbus.info/n1570#6.4.2.2p1] "__func__" is
// automatically defined as a local variable containing the
// current function name.
let mut sc: Rc<RefCell<VarScope>> = parse.push_scope("__func__".into());
sc.borrow_mut().var = Some(parse.new_string_literal(
f.borrow().name.clone().as_bytes(),
array_of(TY_CHAR, (f.borrow().name.len() + 1) as i32),
));
// [GNU] __FUNCTION is yet another name of __func__.
let mut sc: Rc<RefCell<VarScope>> = parse.push_scope("__FUNCTION__".into());
sc.borrow_mut().var = Some(parse.new_string_literal(
f.borrow().name.clone().as_bytes(),
array_of(TY_CHAR, (f.borrow().name.len() + 1) as i32),
));
let body = self.compound_stmt(parse);
let locals: Vec<Rc<RefCell<Obj>>> =
parse.locals.iter().map(|x| x.clone()).collect();
parse.leave_scope();
resolve_goto_labels(parse);
(fnparams, va_area, alloca_bottom, body, locals)
}
_ => unreachable!(),
};
let func_fields = ObjFunction {
is_inline: attr.is_inline,
params: fnparams,
body: Rc::new(RefCell::new(body)),
locals,
va_area,
alloca_bottom,
stack_size: 0,
is_live: false,
is_root: !(f.borrow().is_static && attr.is_inline),
refs: Vec::new(),
};
f.borrow_mut().func = Some(func_fields);
}
fn global_variable(&mut self, basety: &Type, attr: &VarAttr, parse: &mut Parse) {
let mut first = true;
while !self.consume(";") {
if !first {
self.skip(",");
}
first = false;
let ty = self.declarator(basety.clone(), parse);
if ty.decl.as_ref().unwrap().name.is_none() {
ty.decl
.as_ref()
.unwrap()
.name_pos
.error("variable name omitted");
}
let mut var = parse.new_gvar(
ty.decl
.as_ref()
.unwrap()
.name
.as_ref()
.unwrap()
.get_ident()
.into(),
ty,
);
{
let mut v = var.borrow_mut();
v.is_definition = !attr.is_extern;
v.is_static = attr.is_static;
v.is_tls = attr.is_tls;
if attr.align != 0 {
v.align = attr.align
}
}
if self[0].equal("=") {
self.idx += 1;
self.gvar_initializer(var, parse);
} else if !attr.is_extern && !attr.is_tls {
var.borrow_mut().is_tentative = true;
}
}
}
// Lookaehad tokens and returns true if a given token is a start
// of a function definition or declaration.
fn is_function(&mut self, parse: &mut Parse) -> bool {
if self[0].equal(";") {
return false;
}
let dummy = Type {
kind: TypeKind::Void,
size: 0,
align: 0,
is_unsigned: false,
is_atomic: false,
origin: None,
decl: None,
};
let start_idx = self.idx;
let ty = self.declarator(dummy, parse);
self.idx = start_idx;
return matches!(ty.kind, TypeKind::Func { .. });
}
}
// Remove redundant tentative definitions
fn scan_globals(parse: &mut Parse) {
let mut new_globals = VecDeque::new();
for var in parse.globals.iter() {
if !var.borrow().is_tentative {
new_globals.push_back(var.clone());
continue;
}
// Find another definition of the same identifier.
let mut found = false;
for var2 in parse.globals.iter() {
if !var2.borrow().is_tentative
&& var2.borrow().is_definition
&& var.borrow().name == var2.borrow().name
{
found = true;
break;
}
}
// If there's another definition, the tentative definition is
// redundant.
if !found {
new_globals.push_back(var.clone());
}
}
parse.globals = new_globals;
}
fn declare_builtin_functions(parse: &mut Parse) {
let mut ty = func_type(pointer_to(TY_VOID));
if let TypeKind::Func { params, .. } = &mut ty.kind {
*params = vec![copy_type(TY_INT)];
}
let mut alloca: Rc<RefCell<Obj>> = parse.new_gvar("alloca".into(), ty);
alloca.borrow_mut().is_definition = false;
parse.builtin_alloca = Some(alloca);
}
impl Parse {
// program = (typedef | function-definition | global-variable)*
pub fn parse(mut self, mut tl: TokenList) -> VecDeque<Rc<RefCell<Obj>>> {
declare_builtin_functions(&mut self);
self.globals.clear();
while tl.len() > 0 {
let mut attr = Some(VarAttr {
is_typedef: false,
is_static: false,
is_extern: false,
is_inline: false,
is_tls: false,
align: 0,
});
let basety = tl.declspec(&mut attr, &mut self);
// Typedef
if attr.as_ref().unwrap().is_typedef {
tl.parse_typedef(&basety, &mut self);
continue;
}
// Function
if tl.is_function(&mut self) {
tl.function(basety.clone(), attr.as_ref().unwrap(), &mut self);
}
// Global variable
tl.global_variable(&basety, attr.as_ref().unwrap(), &mut self);
}
for var in self.globals.iter() {
if let Some(ObjFunction { is_root, .. }) = &var.borrow().func {
if *is_root {
mark_live(var.clone(), &self);
}
}
}
// Remove redundant tentative definitions
scan_globals(&mut self);
return self.globals;
}
}