From 2a62f88ca03d5db406756525b5a6e24eae027eab Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Fri, 26 Jun 2020 21:56:34 -0500 Subject: Rename parse to tokenize, add Environment. --- src/lib/tokenize.rs | 296 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 src/lib/tokenize.rs (limited to 'src/lib/tokenize.rs') diff --git a/src/lib/tokenize.rs b/src/lib/tokenize.rs new file mode 100644 index 0000000..384b04f --- /dev/null +++ b/src/lib/tokenize.rs @@ -0,0 +1,296 @@ +use super::types::Type; +use super::types::Number; +use super::types::Op; +use super::types::SEXP; + +pub type MaybeToken = (Option>, usize); + +#[derive(PartialEq, Debug)] +pub enum Token { + LParen, + RParen, + Value(Type) +} + +pub struct TokenStream { + expr: String, + index: usize, + rules: Vec MaybeToken>, + on_err: String, +} + +impl TokenStream { + /// Creates a new TokenStream object with the provided string. + pub fn new(expr: String, rules: Vec MaybeToken>) -> TokenStream { + TokenStream { + expr, + index: 0, + rules, + on_err: "ERROR".to_string(), + } + } + + pub fn default(e: &str) -> TokenStream { + TokenStream { + expr: e.to_string(), + index: 0, + rules: vec!(is_paren, is_op, is_bool, is_var, is_string, is_number), + on_err: "ERROR".to_string(), + } + } + + pub fn peek(&self) -> Option> { + + let i = self.count_whitespace(); + if self.index + i == self.expr.len() { + return None + } + /* + let (token, _) = analyze(&self.expr[self.index + i..], + self.rules.as_slice(), + &self.on_err); + */ + let (token, _) = analyze2(&self.expr[self.index + i ..]); + token + } + + + fn count_whitespace(&self) -> usize { + let mut whitespace_count = 0; + for x in self.expr[self.index..].chars() { + if x.is_whitespace() { + whitespace_count += 1; + } else { + break + } + } + whitespace_count + } + + fn skip_whitespace(&mut self) { + if self.index < self.expr.len() { + self.index += self.count_whitespace(); + } + } +} + +impl Iterator for TokenStream { + type Item = Result; + + fn next(&mut self) -> Option { + if self.index == self.expr.len() { + return None + } + + self.skip_whitespace(); + /* + let (token, len) = analyze( + &self.expr[self.index..], + self.rules.as_ref(), &self.on_err); + */ + let (token, len) = analyze2(&self.expr[self.index ..]); + self.index += len; + token + } + + fn size_hint(&self) -> (usize, Option) { + if self.index == self.expr.len() { + (0, None) + } else { + (1, Some(self.expr.len() - self.index)) + } + } +} + +pub fn analyze(expr: &str, funs: &[fn(&str) -> MaybeToken], + on_err: &str) -> MaybeToken { + for &fun in funs.iter() { + let (token, len) = fun(expr); + if token.is_some() { + return (token, len) + } + } + + (Some(Err(on_err.to_string())), 0) +} + +fn analyze2(expr: &str) -> MaybeToken { + //is_var, is_number + let c = expr.chars().next().unwrap(); + /* Check for strings, ( and ) */ + if c == '"' { + let close = get_string_end(expr); + let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); + let expr_len = close + 2; + return (Some(Ok(value)), expr_len) + } else if c == '(' { + return (Some(Ok(Token::LParen)), 1) + } else if c == ')' { + return (Some(Ok(Token::RParen)), 1) + } + + let word = &expr[0 .. get_word_end(expr)]; + if word == "true" { + (Some(Ok(Token::Value(Type::Bool(true)))), 4) + } else if word == "false" { + (Some(Ok(Token::Value(Type::Bool(false)))), 5) + } else if let Ok(op) = word.tokenize::() { + (Some(Ok(Token::Value(Type::Operator(op)))), word.len()) + } else if c.is_alphabetic() { + (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) + } else if let (Some(x), len) = is_int(&word) { + (Some(x), len) + } else { + is_float(&word) + } +} + +pub fn make_word(expr: &str) -> String { + let word = expr.split(|c: char| { + c.is_whitespace() + }).next().unwrap(); + let termination = |c: char| { c == ')' || c == '('}; + let word_length = word.find(termination).unwrap_or_else(|| word.len()); + word[0..word_length].to_string() +} + +pub fn get_word_end(expr: &str) -> usize { + let word = expr.split(|c: char| { c.is_whitespace() }).next().unwrap(); + let termination_predicate = |c: char| { c == ')' || c == '('}; + word.find(termination_predicate).unwrap_or_else(|| word.len()) +} + +pub fn is_paren(expr: &str) -> MaybeToken { + match expr.chars().next().unwrap() { + '(' => (Some(Ok(Token::LParen)), 1), + ')' => (Some(Ok(Token::RParen)), 1), + _ => (None, 0) + } +} + +pub fn is_op(expr: &str) -> MaybeToken { + let word = make_word(expr); + match word.tokenize::() { + Ok(op) => (Some(Ok(Token::Value(Type::Operator(op)))), word.len()), + _ => (None, 0) + } +} + + +pub fn is_bool(expr: &str) -> MaybeToken { + let word = make_word(expr); + match word.as_ref() { + "true" => (Some(Ok(Token::Value(Type::Bool(true)))), 4), + "false" => (Some(Ok(Token::Value(Type::Bool(false)))), 5), + _ => (None, 0) + } +} + +pub fn is_var(expr: &str) -> MaybeToken { + let word = make_word(expr); + let c = word.chars().next().unwrap(); + if c.is_alphabetic() { + (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) + } else { + (None, 0) + } +} + +pub fn is_string(expr: &str) -> MaybeToken { + let c = expr.chars().next().unwrap(); + if c == '"' { + let close = get_string_end(expr); + let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); + let expr_len = close + 2; + (Some(Ok(value)), expr_len) + } else { + (None, 0) + } +} + +fn get_string_end(expr: &str) -> usize { + let mut previous = '"'; + let maybe_close = expr[1..].find(|current: char| { + if current == '"' && previous != '\\' { + true + } else { + previous = current; + false + } + }); + + match maybe_close { + Some(x) => x, + None => panic!("No string ending found!") + } +} + +pub fn is_number(expr: &str) -> MaybeToken { + let word = make_word(expr); + if let (Some(x), len) = is_int(&word) { + (Some(x), len) + } else { + is_float(&word) + } +} + +pub fn is_int(word: &str) -> MaybeToken { + //let word = make_word(expr); + match word.tokenize::() { + Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Int(x))))), word.len()), + _ => (None, 0) + } +} + +pub fn is_float(word: &str) -> MaybeToken { + //let word = make_word(expr); + match word.tokenize::() { + Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Float(x))))), word.len()), + _ => (None, 0) + } +} + +pub fn tokenize(expr: &str) -> Result { + let mut tokenstream = TokenStream::default(expr); + match tokenstream.peek() { + Some(Ok(Token::LParen)) => { + let _ = tokenstream.next(); + descend(&mut tokenstream) + }, + Some(Ok(Token::RParen)) => Err("Malformed expression".to_string()), + Some(Ok(Token::Value(x))) => Ok(SEXP::Atom(x)), + Some(Err(f)) => Err(f), + None => Err("Empty expression".to_string()) + } +} + +pub fn descend(tokenstream: &mut TokenStream) -> Result { + let mut sexp = Vec::new(); + loop { + let token = match tokenstream.next() { + Some(Ok(x)) => x, + Some(Err(f)) => return Err(f), + None => panic!("Empty string".to_string()) + }; + + match token { + Token::LParen => { + let sexp_inner = match descend(tokenstream) { + Ok(x) => x, + Err(f) => return Err(f) + }; + sexp.push(sexp_inner); + continue; + }, + Token::RParen => { + break; + }, + Token::Value(atom) => { + sexp.push(SEXP::Atom(atom)); + continue; + } + } + } + + Ok(SEXP::Sexpr(sexp)) +} -- cgit v1.2.3