From 6c1c571586abcfcea4a351924350cd4eed97b429 Mon Sep 17 00:00:00 2001 From: Arnaud Castellanos Galea Date: Tue, 29 Jan 2019 15:13:15 -0500 Subject: [PATCH 1/2] Parse string escape sequences --- README.md | 2 +- parser/src/parser.rs | 37 +++++++++++++++++++++++++++++++++++- parser/src/scl.pest | 2 +- parser/src/tests/parser.rs | 4 ++++ parser/tests/valid.rs | 1 + parser/tests/valid/basic.scl | 2 ++ 6 files changed, 45 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 954578a..e0cb601 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ They are surrounded by double quotes: ``` str = "The dog says \"woof\"." ``` -As you can see, double quotes have to be escaped if used in basic strings. +As you can see, double quotes have to be escaped if used in basic strings. Valid escape secuences are quotes with `\"`, newlines with `\n` and two digit hexadecimal characters with `\x00`. ### Multi-line They are surrounded by three double quotes on each side. diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 24a756d..4f9107c 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -32,6 +32,41 @@ struct ParserState<'a> { path: Option<&'a Path>, } +fn escape_string (s: &str) -> String { + let mut result = String::with_capacity(s.len() - 2); + + let mut chars = s[1 .. s.len()-1].chars(); + + while let Some(c) = chars.next() { + if c == '\\' { + let c = chars.next().expect("Unfinished escape sequence"); + match c { + '\\' => result.push('\\'), + '"' => result.push('"'), + 'n' => result.push('\n'), + 'r' => result.push('\r'), + 'x' => { + let a = chars.next() + .expect("Unfinished escape sequence") + .to_digit(16) + .expect("Invalid hexadecimal digit"); + let b = chars.next() + .expect("Unfinished escape sequence") + .to_digit(16) + .expect("Invalid hexadecimal digit"); + // Fails when not ascii + result.push((a*16 + b) as u8 as char); + } + c => panic!("Invalid escape sequence: \\{}", c) + } + } else { + result.push(c); + } + } + + result +} + impl<'a> ParserState<'a> { // TODO: error on different cast/default type fn parse_env_var(&self, pair: Pair) -> Value { @@ -136,7 +171,7 @@ impl<'a> ParserState<'a> { "false" => Value::Boolean(false), _ => unreachable!(), }, - Rule::string => Value::String(pair.as_str().replace("\"", "").to_string()), + Rule::string => Value::String(escape_string(pair.as_str())), Rule::multiline_string => { let text = pair.as_str().replace("\"\"\"", ""); if text.starts_with('\n') { diff --git a/parser/src/scl.pest b/parser/src/scl.pest index c7ff3f5..512973f 100644 --- a/parser/src/scl.pest +++ b/parser/src/scl.pest @@ -6,7 +6,7 @@ comments = _{ "#" ~ (!line_end ~ any)* ~ line_end } /// LITERALS boolean = { "true" | "false" } -string = @{ "\"" ~ (!("\"") ~ any)* ~ "\"" } +string = @{ "\"" ~ ( "\\" ~ any | !("\"" | "\\") ~ any)* ~ "\"" } multiline_string = @{ "\""{3} ~ (!("\""{3}) ~ any)* ~ "\""{3} } // normal digits or a set of readable one (ie preceded by an underscore) diff --git a/parser/src/tests/parser.rs b/parser/src/tests/parser.rs index 7d63b1c..be5014c 100644 --- a/parser/src/tests/parser.rs +++ b/parser/src/tests/parser.rs @@ -33,6 +33,10 @@ fn parse_simple_key_value() { ("val = true", Value::Boolean(true)), ("val = false", Value::Boolean(false)), (r#"val = "a string""#, Value::String("a string".to_string())), + ( + r#"val = "a \n\r\"\x21 \\string""#, + Value::String("a \n\r\"\x21 \\string".to_string()), + ), ( r#"val = """a \n\r "'string""""#, Value::String(r#"a \n\r "'string"#.to_string()), diff --git a/parser/tests/valid.rs b/parser/tests/valid.rs index dd42cea..464956b 100644 --- a/parser/tests/valid.rs +++ b/parser/tests/valid.rs @@ -40,6 +40,7 @@ fn test_basic() { "ho" => V::Array(vec![V::Integer(1), V::Integer(2), V::Integer(3)]), "hey" => V::Array(vec![V::Integer(1), V::Integer(2)]), "max_upload_size" => V::Integer(10000000), + "escaped" => V::String("this \"is\" an\nescaped string\x21".to_string()), "db" => V::Dict(btreemap!( "url" => V::String("blabla".to_string()), "password" => V::String("****".to_string()), diff --git a/parser/tests/valid/basic.scl b/parser/tests/valid/basic.scl index 173927b..5c08a7c 100644 --- a/parser/tests/valid/basic.scl +++ b/parser/tests/valid/basic.scl @@ -13,6 +13,8 @@ hey = [ ] max_upload_size = 10MB +escaped = "this \"is\" an\nescaped string\x21" + # other = include "other.scl" From 5ba7cd8d642dbebbd224c0e6e6fb10cb88e1e8b1 Mon Sep 17 00:00:00 2001 From: Arnaud Castellanos Galea Date: Sat, 2 Feb 2019 12:25:22 -0500 Subject: [PATCH 2/2] Better error handling in escape_string --- README.md | 2 +- parser/src/parser.rs | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index e0cb601..44c7aa8 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ They are surrounded by double quotes: ``` str = "The dog says \"woof\"." ``` -As you can see, double quotes have to be escaped if used in basic strings. Valid escape secuences are quotes with `\"`, newlines with `\n` and two digit hexadecimal characters with `\x00`. +As you can see, double quotes have to be escaped if used in basic strings. Valid escape sequences are quotes with `\"`, newlines with `\n` and two digit hexadecimal characters with `\x00`. ### Multi-line They are surrounded by three double quotes on each side. diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 4f9107c..8911275 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -32,39 +32,51 @@ struct ParserState<'a> { path: Option<&'a Path>, } -fn escape_string (s: &str) -> String { +fn escape_string (s: &str) -> Result { + + fn next > (iter: &mut T) -> Result { + iter.next().ok_or_else(|| + Error::InvalidSyntax("Unfinished escape sequence".to_string()) + ) + } + + fn digit_from_char (ch: char) -> Result { + ch.to_digit(16).ok_or_else(|| + Error::InvalidSyntax("Invalid hexadecimal digit".to_string()) + ) + } + let mut result = String::with_capacity(s.len() - 2); let mut chars = s[1 .. s.len()-1].chars(); while let Some(c) = chars.next() { if c == '\\' { - let c = chars.next().expect("Unfinished escape sequence"); + let c = next(&mut chars)?; match c { '\\' => result.push('\\'), '"' => result.push('"'), 'n' => result.push('\n'), 'r' => result.push('\r'), 'x' => { - let a = chars.next() - .expect("Unfinished escape sequence") - .to_digit(16) - .expect("Invalid hexadecimal digit"); - let b = chars.next() - .expect("Unfinished escape sequence") - .to_digit(16) - .expect("Invalid hexadecimal digit"); - // Fails when not ascii - result.push((a*16 + b) as u8 as char); + let a = digit_from_char(next(&mut chars)?)?; + let b = digit_from_char(next(&mut chars)?)?; + let n = a*16 + b; + if n > 127 { + return Err(Error::InvalidSyntax("Not an ASCII value".to_string())); + } + result.push(n as u8 as char); } - c => panic!("Invalid escape sequence: \\{}", c) + c => return Err(Error::InvalidSyntax( + format!("Invalid escape sequence: \\{}", c) + )) } } else { result.push(c); } } - result + Ok(result) } impl<'a> ParserState<'a> { @@ -171,7 +183,7 @@ impl<'a> ParserState<'a> { "false" => Value::Boolean(false), _ => unreachable!(), }, - Rule::string => Value::String(escape_string(pair.as_str())), + Rule::string => Value::String(escape_string(pair.as_str()).unwrap()), Rule::multiline_string => { let text = pair.as_str().replace("\"\"\"", ""); if text.starts_with('\n') {