1use logos::Logos;
2
3use num::BigUint;
4
5#[derive(Debug, PartialEq, Clone)]
6pub enum LiteralKind {
7 Unsized,
8 Signed(BigUint),
9 Unsigned(BigUint),
10}
11
12fn parse_int(slice: &str, radix: u32) -> (BigUint, LiteralKind) {
13 let lower = slice.to_ascii_lowercase().replace(['_'], "");
14
15 let (cleaned, kind) = if lower.contains("u") {
16 let split = lower.split("u").collect::<Vec<_>>();
17 let kind = LiteralKind::Unsigned(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
18 (split[0], kind)
19 } else if lower.contains("i") {
20 let split = lower.split("i").collect::<Vec<_>>();
21 let kind = LiteralKind::Signed(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
22 (split[0], kind)
23 } else {
24 (lower.as_str(), LiteralKind::Unsized)
25 };
26
27 (
28 BigUint::parse_bytes(cleaned.as_bytes(), radix).unwrap(),
29 kind,
30 )
31}
32
33#[derive(Logos, Debug, PartialEq, Clone)]
34pub enum TokenKind {
35 #[regex(r#"(?x:
37 [\p{XID_Start}_]
38 \p{XID_Continue}*
39 (\u{3F} | \u{21} | (\u{3F}\u{21}) | \u{2048})? # ? ! ?! ⁈
40 )"#, |lex| lex.slice().to_string())]
41 Identifier(String),
42
43 #[regex(r"[0-9][0-9_]*([uUiI][0-9]+)?", |lex| {
44 parse_int(lex.slice(), 10)
45 })]
46 Integer((BigUint, LiteralKind)),
47 #[regex(r"0x[0-9A-Fa-f][0-9_A-Fa-f]*([uUiI][0-9]+)?", |lex| {
48 parse_int(&lex.slice()[2..], 16)
49 })]
50 HexInteger((BigUint, LiteralKind)),
51 #[regex(r"0b[0-1][0-1_]*([uUiI][0-9]+)?", |lex| {
52 parse_int(&lex.slice()[2..], 2)
53 })]
54 BinInteger((BigUint, LiteralKind)),
55
56 #[token("true")]
57 True,
58 #[token("false")]
59 False,
60
61 #[token("LOW")]
62 Low,
63 #[token("HIGH")]
64 High,
65 #[token("HIGHIMP")]
66 HighImp,
67
68 #[token("reg")]
70 Reg,
71 #[token("let")]
72 Let,
73 #[token("decl")]
74 Decl,
75 #[token("inst")]
76 Instance,
77 #[token("reset")]
78 Reset,
79 #[token("initial")]
80 Initial,
81 #[token("if")]
82 If,
83 #[token("else")]
84 Else,
85 #[token("match")]
86 Match,
87 #[token("set")]
88 Set,
89
90 #[token("pipeline")]
91 Pipeline,
92 #[token("stage")]
93 Stage,
94 #[token("entity")]
95 Entity,
96 #[token("trait")]
97 Trait,
98 #[token("impl")]
99 Impl,
100 #[token("for")]
101 For,
102 #[token("fn")]
103 Function,
104 #[token("enum")]
105 Enum,
106 #[token("struct")]
107 Struct,
108 #[token("port")]
109 Port,
110 #[token("mod")]
111 Mod,
112 #[token("use")]
113 Use,
114 #[token("as")]
115 As,
116 #[token("assert")]
117 Assert,
118 #[token("mut")]
119 Mut,
120 #[token("inv")]
121 Inv,
122 #[token("where")]
123 Where,
124
125 #[token("gen")]
126 Gen,
127
128 #[token("extern")]
129 Extern,
130
131 #[token("+")]
133 Plus,
134 #[token("-")]
135 Minus,
136 #[token("*")]
137 Asterisk,
138 #[token("/")]
139 Slash,
140 #[token("%")]
141 Percentage,
142 #[token("==")]
143 Equals,
144 #[token("!=")]
145 NotEquals,
146 #[token("<")]
147 Lt,
148 #[token(">")]
149 Gt,
150 #[token("<=")]
151 Le,
152 #[token(">=")]
153 Ge,
154 #[token(">>>")]
155 ArithmeticRightShift,
156 #[token(">>")]
157 RightShift,
158 #[token("<<")]
159 LeftShift,
160 #[token("||")]
161 LogicalOr,
162 #[token("&&")]
163 LogicalAnd,
164 #[token("^^")]
165 LogicalXor,
166 #[token("&")]
167 Ampersand,
168 #[token("|")]
169 BitwiseOr,
170 #[token("!")]
171 Not,
172 #[token("^")]
173 BitwiseXor,
174 #[token("~")]
175 Tilde,
176 #[token("`")]
177 InfixOperatorSeparator,
178 #[token("'")]
179 SingleQuote,
180
181 #[token("=")]
183 Assignment,
184
185 #[token("(")]
186 OpenParen,
187 #[token(")")]
188 CloseParen,
189
190 #[token("{")]
191 OpenBrace,
192 #[token("}")]
193 CloseBrace,
194
195 #[token("[")]
196 OpenBracket,
197 #[token("]")]
198 CloseBracket,
199
200 #[token("=>")]
201 FatArrow,
202 #[token("->")]
203 SlimArrow,
204 #[token(",")]
205 Comma,
206 #[token(".")]
207 Dot,
208 #[token("..")]
209 DotDot,
210 #[token(";")]
211 Semi,
212 #[token(";")]
213 GreekQuestionMark,
214 #[token(":")]
215 Colon,
216 #[token("::")]
217 PathSeparator,
218 #[token("#")]
219 Hash,
220 #[token("$")]
221 Dollar,
222
223 #[regex("///[^\n]*", |lex| lex.slice()[3..].to_string())]
224 OutsideDocumentation(String),
225 #[regex("//![^\n]*", |lex| lex.slice()[3..].to_string())]
226 InsideDocumentation(String),
227
228 #[regex("[ \t\n\r]", logos::skip)]
230 Whitespace,
231
232 #[regex("//[^\n]*", logos::skip)]
233 Comment,
234
235 #[token("/*")]
236 BlockCommentStart,
237 #[token("*/")]
238 BlockCommentEnd,
239
240 Eof,
241}
242
243impl TokenKind {
244 pub fn as_str(&self) -> &'static str {
245 match self {
246 TokenKind::Identifier(_) => "identifier",
247 TokenKind::Integer(_) => "integer",
248 TokenKind::HexInteger(_) => "hexadecimal integer",
249 TokenKind::BinInteger(_) => "binary integer",
250 TokenKind::True => "true",
251 TokenKind::False => "false",
252 TokenKind::Low => "LOW",
253 TokenKind::High => "HIGH",
254 TokenKind::HighImp => "HIGHIMP",
255
256 TokenKind::Let => "let",
257 TokenKind::Reg => "reg",
258 TokenKind::Decl => "decl",
259 TokenKind::Entity => "entity",
260 TokenKind::Pipeline => "pipeline",
261 TokenKind::Stage => "stage",
262 TokenKind::Instance => "inst",
263 TokenKind::Reset => "reset",
264 TokenKind::Initial => "initial",
265 TokenKind::If => "if",
266 TokenKind::Else => "else",
267 TokenKind::Match => "match",
268 TokenKind::Impl => "impl",
269 TokenKind::Trait => "trait",
270 TokenKind::For => "for",
271 TokenKind::Function => "fn",
272 TokenKind::Enum => "enum",
273 TokenKind::Struct => "struct",
274 TokenKind::Port => "port",
275 TokenKind::Mod => "mod",
276 TokenKind::As => "as",
277 TokenKind::Use => "use",
278 TokenKind::Assert => "assert",
279 TokenKind::Set => "set",
280 TokenKind::Mut => "mut",
281 TokenKind::Inv => "inv",
282 TokenKind::Where => "where",
283
284 TokenKind::Gen => "gen",
285
286 TokenKind::Extern => "extern",
287
288 TokenKind::Assignment => "=",
289 TokenKind::Plus => "+",
290 TokenKind::Minus => "-",
291 TokenKind::Asterisk => "*",
292 TokenKind::Slash => "/",
293 TokenKind::Percentage => "%",
294 TokenKind::Equals => "==",
295 TokenKind::NotEquals => "!=",
296 TokenKind::Lt => "<",
297 TokenKind::Gt => ">",
298 TokenKind::Le => "<=",
299 TokenKind::Ge => ">=",
300 TokenKind::LeftShift => "<<",
301 TokenKind::RightShift => ">>",
302 TokenKind::ArithmeticRightShift => ">>>",
303 TokenKind::LogicalOr => "||",
304 TokenKind::LogicalAnd => "&&",
305 TokenKind::LogicalXor => "^^",
306 TokenKind::Ampersand => "&",
307 TokenKind::BitwiseOr => "|",
308 TokenKind::Not => "!",
309 TokenKind::Tilde => "~",
310 TokenKind::BitwiseXor => "^",
311 TokenKind::InfixOperatorSeparator => "`",
312
313 TokenKind::OpenParen => "(",
314 TokenKind::CloseParen => ")",
315 TokenKind::OpenBrace => "{",
316 TokenKind::CloseBrace => "}",
317 TokenKind::OpenBracket => "[",
318 TokenKind::CloseBracket => "]",
319
320 TokenKind::FatArrow => "=>",
321 TokenKind::SlimArrow => "->",
322 TokenKind::Semi => ";",
323 TokenKind::GreekQuestionMark => "GreekQuestionMark(;)",
324 TokenKind::Colon => ":",
325 TokenKind::Comma => ",",
326 TokenKind::Dot => ".",
327 TokenKind::DotDot => "..",
328 TokenKind::PathSeparator => "::",
329 TokenKind::SingleQuote => "'",
330
331 TokenKind::Hash => "#",
332 TokenKind::Dollar => "$",
333
334 TokenKind::Eof => "end of file",
335
336 TokenKind::OutsideDocumentation(_) => "///",
337 TokenKind::InsideDocumentation(_) => "//!",
338
339 TokenKind::Whitespace => "whitespace",
340 TokenKind::Comment => "comment",
341
342 TokenKind::BlockCommentStart => "/*",
343 TokenKind::BlockCommentEnd => "*/",
344 }
345 }
346
347 pub fn is_identifier(&self) -> bool {
348 matches!(self, TokenKind::Identifier(_))
349 }
350 pub fn is_integer(&self) -> bool {
351 matches!(
352 self,
353 TokenKind::Integer(_) | TokenKind::HexInteger(_) | TokenKind::BinInteger(_)
354 )
355 }
356
357 pub fn as_biguint(&self) -> Option<BigUint> {
358 match self {
359 TokenKind::Integer((i, _))
360 | TokenKind::HexInteger((i, _))
361 | TokenKind::BinInteger((i, _)) => Some(i.clone()),
362 _ => None,
363 }
364 }
365}
366
367#[cfg(test)]
368mod tests {
369 use spade_common::num_ext::InfallibleToBigUint;
370
371 use super::*;
372
373 #[test]
374 fn identifiers_work() {
375 let mut lex = TokenKind::lexer("abc123_");
376
377 assert_eq!(
378 lex.next(),
379 Some(Ok(TokenKind::Identifier("abc123_".to_string())))
380 );
381 }
382
383 #[test]
384 fn integer_literals_work() {
385 let mut lex = TokenKind::lexer("123");
386
387 assert_eq!(
388 lex.next(),
389 Some(Ok(TokenKind::Integer((
390 123_u32.to_biguint(),
391 LiteralKind::Unsized
392 ))))
393 );
394 assert_eq!(lex.next(), None);
395 }
396
397 #[test]
398 fn sized_uint_integer_literals_work() {
399 let mut lex = TokenKind::lexer("123u3");
400
401 assert_eq!(
402 lex.next(),
403 Some(Ok(TokenKind::Integer((
404 123_u32.to_biguint(),
405 LiteralKind::Unsigned(3u32.to_biguint())
406 ))))
407 );
408 assert_eq!(lex.next(), None);
409 }
410
411 #[test]
412 fn sized_int_integer_literals_work() {
413 let mut lex = TokenKind::lexer("123i3");
414
415 assert_eq!(
416 lex.next(),
417 Some(Ok(TokenKind::Integer((
418 123_u32.to_biguint(),
419 LiteralKind::Signed(3u32.to_biguint())
420 ))))
421 );
422 assert_eq!(lex.next(), None);
423 }
424
425 #[test]
426 fn hex_array() {
427 let mut lex = TokenKind::lexer("[0x45]");
428 assert_eq!(lex.next(), Some(Ok(TokenKind::OpenBracket)));
429 assert_eq!(
430 lex.next(),
431 Some(Ok(TokenKind::HexInteger((
432 0x45_u32.to_biguint(),
433 LiteralKind::Unsized
434 ))))
435 );
436 assert_eq!(lex.next(), Some(Ok(TokenKind::CloseBracket)));
437 assert_eq!(lex.next(), None);
438 }
439
440 #[test]
441 fn invalid_hex_is_not_hex() {
442 let mut lex = TokenKind::lexer("0xg");
443 assert_eq!(
444 lex.next(),
445 Some(Ok(TokenKind::Integer((
446 0_u32.to_biguint(),
447 LiteralKind::Unsized
448 ))))
449 );
450 assert_eq!(
451 lex.next(),
452 Some(Ok(TokenKind::Identifier("xg".to_string())))
453 );
454 assert_eq!(lex.next(), None);
455 }
456
457 #[test]
458 fn doc_comments_slice_correctly() {
459 let mut lex = TokenKind::lexer("//! Hello\n///G'day");
460 assert_eq!(
461 lex.next(),
462 Some(Ok(TokenKind::InsideDocumentation(" Hello".to_string())))
463 );
464 assert_eq!(
465 lex.next(),
466 Some(Ok(TokenKind::OutsideDocumentation("G'day".to_string())))
467 );
468 assert_eq!(lex.next(), None);
469 }
470}