More tests, minor behavioral changes to provide better information.
This commit is contained in:
parent
f7bc41e539
commit
64a0e8bd92
3 changed files with 61 additions and 14 deletions
|
@ -70,14 +70,14 @@ class Tokenizer(private val reader: CharacterReader):
|
||||||
Left(Error.BackSlashNotAllowed(reader.currentSourcePosition()))
|
Left(Error.BackSlashNotAllowed(reader.currentSourcePosition()))
|
||||||
case TokenDelimiter.DoubleQuote =>
|
case TokenDelimiter.DoubleQuote =>
|
||||||
whiteSpaceOnly = false
|
whiteSpaceOnly = false
|
||||||
nextInternal(
|
val st = State.InDoubleQuote(reader.currentSourcePosition())
|
||||||
State.InDoubleQuote(reader.currentSourcePosition())
|
states.push(st)
|
||||||
)
|
nextInternal(st)
|
||||||
case TokenDelimiter.SingleQuote =>
|
case TokenDelimiter.SingleQuote =>
|
||||||
whiteSpaceOnly = false
|
whiteSpaceOnly = false
|
||||||
nextInternal(
|
val st = State.InSingleQuote(reader.currentSourcePosition())
|
||||||
State.InSingleQuote(reader.currentSourcePosition())
|
states.push(st)
|
||||||
)
|
nextInternal(st)
|
||||||
case '-' =>
|
case '-' =>
|
||||||
if whiteSpaceOnly then
|
if whiteSpaceOnly then
|
||||||
whiteSpaceOnly = false
|
whiteSpaceOnly = false
|
||||||
|
@ -87,13 +87,15 @@ class Tokenizer(private val reader: CharacterReader):
|
||||||
else
|
else
|
||||||
whiteSpaceOnly = false
|
whiteSpaceOnly = false
|
||||||
buffer.addOne(ch)
|
buffer.addOne(ch)
|
||||||
nextInternal(
|
val st = State.InGeneric(reader.currentSourcePosition())
|
||||||
State.InGeneric(reader.currentSourcePosition())
|
states.push(st)
|
||||||
)
|
nextInternal(st)
|
||||||
case _ =>
|
case _ =>
|
||||||
whiteSpaceOnly = false
|
whiteSpaceOnly = false
|
||||||
buffer.addOne(ch)
|
buffer.addOne(ch)
|
||||||
nextInternal(State.InGeneric(reader.currentSourcePosition()))
|
val st = State.InGeneric(reader.currentSourcePosition())
|
||||||
|
states.push(st)
|
||||||
|
nextInternal(st)
|
||||||
case State.PotentialComment(startPos) =>
|
case State.PotentialComment(startPos) =>
|
||||||
reader.consume() match
|
reader.consume() match
|
||||||
case None => Left(Error.PrematureEof(dumpStack()))
|
case None => Left(Error.PrematureEof(dumpStack()))
|
||||||
|
@ -120,18 +122,20 @@ class Tokenizer(private val reader: CharacterReader):
|
||||||
whiteSpaceOnly = false
|
whiteSpaceOnly = false
|
||||||
buffer.addOne('-')
|
buffer.addOne('-')
|
||||||
buffer.addOne(ch)
|
buffer.addOne(ch)
|
||||||
nextInternal(State.InGeneric(startPos))
|
val st = State.InGeneric(startPos)
|
||||||
|
states.push(st)
|
||||||
|
nextInternal(st)
|
||||||
case State.InComment(startPos) =>
|
case State.InComment(startPos) =>
|
||||||
reader.consume() match
|
reader.consume() match
|
||||||
case None =>
|
case None =>
|
||||||
// Reaching EOF during a comment is perfectly fine.
|
// Reaching EOF during a comment is perfectly fine.
|
||||||
Right(Token.Comment(buffer.mkString))
|
Right(Token.Comment(buffer.mkString.trim()))
|
||||||
case Some(ch) =>
|
case Some(ch) =>
|
||||||
ch match
|
ch match
|
||||||
case '\n' =>
|
case '\n' =>
|
||||||
// Newlines terminate a comment.
|
// Newlines terminate a comment.
|
||||||
whiteSpaceOnly = true
|
whiteSpaceOnly = true
|
||||||
Right(Token.Comment(buffer.mkString))
|
Right(Token.Comment(buffer.mkString.trim()))
|
||||||
case _ =>
|
case _ =>
|
||||||
// Any non-newline character is considered part of a comment.
|
// Any non-newline character is considered part of a comment.
|
||||||
buffer.addOne(ch)
|
buffer.addOne(ch)
|
||||||
|
|
|
@ -66,6 +66,49 @@ class TokenizerTests extends munit.FunSuite:
|
||||||
assertTokens("-\n", Right(Token.Generic("-")))
|
assertTokens("-\n", Right(Token.Generic("-")))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("should handle basic comments") {
|
||||||
|
assertTokens("-- Comment", Right(Token.Comment("Comment")))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("should handle comments, if only whitespace precedes them") {
|
||||||
|
assertTokens("\t -- Comment", Right(Token.Comment("Comment")))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("should handle successive lines of comments") {
|
||||||
|
assertTokens(
|
||||||
|
"--c1\n--c2\n--c3",
|
||||||
|
Right(Token.Comment("c1")),
|
||||||
|
Right(Token.Comment("c2")),
|
||||||
|
Right(Token.Comment("c3"))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
test("should ignore comment contents") {
|
||||||
|
assertTokens("-- a\\12 3\t4", Right(Token.Comment("a\\12 3\t4")))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("should throw an error if EOF is reached inside a string literal") {
|
||||||
|
assertTokens(
|
||||||
|
"\"",
|
||||||
|
Left(
|
||||||
|
Tokenizer.Error.PrematureEof(
|
||||||
|
List(Tokenizer.State.InDoubleQuote(pos(1, 1, 1)))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
test("should throw an error if EOF is reached inside a character literal") {
|
||||||
|
assertTokens(
|
||||||
|
"\'",
|
||||||
|
Left(
|
||||||
|
Tokenizer.Error.PrematureEof(
|
||||||
|
List(Tokenizer.State.InSingleQuote(pos(1, 1, 1)))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
private def assertTokens(
|
private def assertTokens(
|
||||||
source: String,
|
source: String,
|
||||||
expectedOutput: Either[Tokenizer.Error, Token]*
|
expectedOutput: Either[Tokenizer.Error, Token]*
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
sbt.version=1.9.8
|
sbt.version=1.9.9
|
||||||
|
|
Loading…
Add table
Reference in a new issue