summaryrefslogtreecommitdiff
path: root/src/lib/tokenize.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/tokenize.rs')
-rw-r--r--src/lib/tokenize.rs296
1 files changed, 296 insertions, 0 deletions
diff --git a/src/lib/tokenize.rs b/src/lib/tokenize.rs
new file mode 100644
index 0000000..384b04f
--- /dev/null
+++ b/src/lib/tokenize.rs
@@ -0,0 +1,296 @@
+use super::types::Type;
+use super::types::Number;
+use super::types::Op;
+use super::types::SEXP;
+
+pub type MaybeToken = (Option<Result<Token, String>>, usize);
+
+#[derive(PartialEq, Debug)]
+pub enum Token {
+ LParen,
+ RParen,
+ Value(Type)
+}
+
+pub struct TokenStream {
+ expr: String,
+ index: usize,
+ rules: Vec<fn(&str) -> MaybeToken>,
+ on_err: String,
+}
+
+impl TokenStream {
+ /// Creates a new TokenStream object with the provided string.
+ pub fn new(expr: String, rules: Vec<fn(&str) -> MaybeToken>) -> TokenStream {
+ TokenStream {
+ expr,
+ index: 0,
+ rules,
+ on_err: "ERROR".to_string(),
+ }
+ }
+
+ pub fn default(e: &str) -> TokenStream {
+ TokenStream {
+ expr: e.to_string(),
+ index: 0,
+ rules: vec!(is_paren, is_op, is_bool, is_var, is_string, is_number),
+ on_err: "ERROR".to_string(),
+ }
+ }
+
+ pub fn peek(&self) -> Option<Result<Token, String>> {
+
+ let i = self.count_whitespace();
+ if self.index + i == self.expr.len() {
+ return None
+ }
+ /*
+ let (token, _) = analyze(&self.expr[self.index + i..],
+ self.rules.as_slice(),
+ &self.on_err);
+ */
+ let (token, _) = analyze2(&self.expr[self.index + i ..]);
+ token
+ }
+
+
+ fn count_whitespace(&self) -> usize {
+ let mut whitespace_count = 0;
+ for x in self.expr[self.index..].chars() {
+ if x.is_whitespace() {
+ whitespace_count += 1;
+ } else {
+ break
+ }
+ }
+ whitespace_count
+ }
+
+ fn skip_whitespace(&mut self) {
+ if self.index < self.expr.len() {
+ self.index += self.count_whitespace();
+ }
+ }
+}
+
+impl Iterator for TokenStream {
+ type Item = Result<Token, String>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.index == self.expr.len() {
+ return None
+ }
+
+ self.skip_whitespace();
+ /*
+ let (token, len) = analyze(
+ &self.expr[self.index..],
+ self.rules.as_ref(), &self.on_err);
+ */
+ let (token, len) = analyze2(&self.expr[self.index ..]);
+ self.index += len;
+ token
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if self.index == self.expr.len() {
+ (0, None)
+ } else {
+ (1, Some(self.expr.len() - self.index))
+ }
+ }
+}
+
+pub fn analyze(expr: &str, funs: &[fn(&str) -> MaybeToken],
+ on_err: &str) -> MaybeToken {
+ for &fun in funs.iter() {
+ let (token, len) = fun(expr);
+ if token.is_some() {
+ return (token, len)
+ }
+ }
+
+ (Some(Err(on_err.to_string())), 0)
+}
+
+fn analyze2(expr: &str) -> MaybeToken {
+ //is_var, is_number
+ let c = expr.chars().next().unwrap();
+ /* Check for strings, ( and ) */
+ if c == '"' {
+ let close = get_string_end(expr);
+ let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string()));
+ let expr_len = close + 2;
+ return (Some(Ok(value)), expr_len)
+ } else if c == '(' {
+ return (Some(Ok(Token::LParen)), 1)
+ } else if c == ')' {
+ return (Some(Ok(Token::RParen)), 1)
+ }
+
+ let word = &expr[0 .. get_word_end(expr)];
+ if word == "true" {
+ (Some(Ok(Token::Value(Type::Bool(true)))), 4)
+ } else if word == "false" {
+ (Some(Ok(Token::Value(Type::Bool(false)))), 5)
+ } else if let Ok(op) = word.tokenize::<Op>() {
+ (Some(Ok(Token::Value(Type::Operator(op)))), word.len())
+ } else if c.is_alphabetic() {
+ (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len())
+ } else if let (Some(x), len) = is_int(&word) {
+ (Some(x), len)
+ } else {
+ is_float(&word)
+ }
+}
+
+pub fn make_word(expr: &str) -> String {
+ let word = expr.split(|c: char| {
+ c.is_whitespace()
+ }).next().unwrap();
+ let termination = |c: char| { c == ')' || c == '('};
+ let word_length = word.find(termination).unwrap_or_else(|| word.len());
+ word[0..word_length].to_string()
+}
+
+pub fn get_word_end(expr: &str) -> usize {
+ let word = expr.split(|c: char| { c.is_whitespace() }).next().unwrap();
+ let termination_predicate = |c: char| { c == ')' || c == '('};
+ word.find(termination_predicate).unwrap_or_else(|| word.len())
+}
+
+pub fn is_paren(expr: &str) -> MaybeToken {
+ match expr.chars().next().unwrap() {
+ '(' => (Some(Ok(Token::LParen)), 1),
+ ')' => (Some(Ok(Token::RParen)), 1),
+ _ => (None, 0)
+ }
+}
+
+pub fn is_op(expr: &str) -> MaybeToken {
+ let word = make_word(expr);
+ match word.tokenize::<Op>() {
+ Ok(op) => (Some(Ok(Token::Value(Type::Operator(op)))), word.len()),
+ _ => (None, 0)
+ }
+}
+
+
+pub fn is_bool(expr: &str) -> MaybeToken {
+ let word = make_word(expr);
+ match word.as_ref() {
+ "true" => (Some(Ok(Token::Value(Type::Bool(true)))), 4),
+ "false" => (Some(Ok(Token::Value(Type::Bool(false)))), 5),
+ _ => (None, 0)
+ }
+}
+
+pub fn is_var(expr: &str) -> MaybeToken {
+ let word = make_word(expr);
+ let c = word.chars().next().unwrap();
+ if c.is_alphabetic() {
+ (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len())
+ } else {
+ (None, 0)
+ }
+}
+
+pub fn is_string(expr: &str) -> MaybeToken {
+ let c = expr.chars().next().unwrap();
+ if c == '"' {
+ let close = get_string_end(expr);
+ let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string()));
+ let expr_len = close + 2;
+ (Some(Ok(value)), expr_len)
+ } else {
+ (None, 0)
+ }
+}
+
+fn get_string_end(expr: &str) -> usize {
+ let mut previous = '"';
+ let maybe_close = expr[1..].find(|current: char| {
+ if current == '"' && previous != '\\' {
+ true
+ } else {
+ previous = current;
+ false
+ }
+ });
+
+ match maybe_close {
+ Some(x) => x,
+ None => panic!("No string ending found!")
+ }
+}
+
+pub fn is_number(expr: &str) -> MaybeToken {
+ let word = make_word(expr);
+ if let (Some(x), len) = is_int(&word) {
+ (Some(x), len)
+ } else {
+ is_float(&word)
+ }
+}
+
+pub fn is_int(word: &str) -> MaybeToken {
+ //let word = make_word(expr);
+ match word.tokenize::<isize>() {
+ Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Int(x))))), word.len()),
+ _ => (None, 0)
+ }
+}
+
+pub fn is_float(word: &str) -> MaybeToken {
+ //let word = make_word(expr);
+ match word.tokenize::<f32>() {
+ Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Float(x))))), word.len()),
+ _ => (None, 0)
+ }
+}
+
+pub fn tokenize(expr: &str) -> Result<SEXP, String> {
+ let mut tokenstream = TokenStream::default(expr);
+ match tokenstream.peek() {
+ Some(Ok(Token::LParen)) => {
+ let _ = tokenstream.next();
+ descend(&mut tokenstream)
+ },
+ Some(Ok(Token::RParen)) => Err("Malformed expression".to_string()),
+ Some(Ok(Token::Value(x))) => Ok(SEXP::Atom(x)),
+ Some(Err(f)) => Err(f),
+ None => Err("Empty expression".to_string())
+ }
+}
+
+pub fn descend(tokenstream: &mut TokenStream) -> Result<SEXP, String> {
+ let mut sexp = Vec::new();
+ loop {
+ let token = match tokenstream.next() {
+ Some(Ok(x)) => x,
+ Some(Err(f)) => return Err(f),
+ None => panic!("Empty string".to_string())
+ };
+
+ match token {
+ Token::LParen => {
+ let sexp_inner = match descend(tokenstream) {
+ Ok(x) => x,
+ Err(f) => return Err(f)
+ };
+ sexp.push(sexp_inner);
+ continue;
+ },
+ Token::RParen => {
+ break;
+ },
+ Token::Value(atom) => {
+ sexp.push(SEXP::Atom(atom));
+ continue;
+ }
+ }
+ }
+
+ Ok(SEXP::Sexpr(sexp))
+}