More tests, minor behavioral changes to provide better information.

This commit is contained in:
Pat Garrity 2024-02-24 10:40:46 -06:00
parent f7bc41e539
commit 64a0e8bd92
Signed by: pfm
GPG key ID: 5CA5D21BAB7F3A76
3 changed files with 61 additions and 14 deletions

View file

@ -70,14 +70,14 @@ class Tokenizer(private val reader: CharacterReader):
Left(Error.BackSlashNotAllowed(reader.currentSourcePosition()))
case TokenDelimiter.DoubleQuote =>
whiteSpaceOnly = false
nextInternal(
State.InDoubleQuote(reader.currentSourcePosition())
)
val st = State.InDoubleQuote(reader.currentSourcePosition())
states.push(st)
nextInternal(st)
case TokenDelimiter.SingleQuote =>
whiteSpaceOnly = false
nextInternal(
State.InSingleQuote(reader.currentSourcePosition())
)
val st = State.InSingleQuote(reader.currentSourcePosition())
states.push(st)
nextInternal(st)
case '-' =>
if whiteSpaceOnly then
whiteSpaceOnly = false
@ -87,13 +87,15 @@ class Tokenizer(private val reader: CharacterReader):
else
whiteSpaceOnly = false
buffer.addOne(ch)
nextInternal(
State.InGeneric(reader.currentSourcePosition())
)
val st = State.InGeneric(reader.currentSourcePosition())
states.push(st)
nextInternal(st)
case _ =>
whiteSpaceOnly = false
buffer.addOne(ch)
nextInternal(State.InGeneric(reader.currentSourcePosition()))
val st = State.InGeneric(reader.currentSourcePosition())
states.push(st)
nextInternal(st)
case State.PotentialComment(startPos) =>
reader.consume() match
case None => Left(Error.PrematureEof(dumpStack()))
@ -120,18 +122,20 @@ class Tokenizer(private val reader: CharacterReader):
whiteSpaceOnly = false
buffer.addOne('-')
buffer.addOne(ch)
nextInternal(State.InGeneric(startPos))
val st = State.InGeneric(startPos)
states.push(st)
nextInternal(st)
case State.InComment(startPos) =>
reader.consume() match
case None =>
// Reaching EOF during a comment is perfectly fine.
Right(Token.Comment(buffer.mkString))
Right(Token.Comment(buffer.mkString.trim()))
case Some(ch) =>
ch match
case '\n' =>
// Newlines terminate a comment.
whiteSpaceOnly = true
Right(Token.Comment(buffer.mkString))
Right(Token.Comment(buffer.mkString.trim()))
case _ =>
// Any non-newline character is considered part of a comment.
buffer.addOne(ch)

View file

@ -66,6 +66,49 @@ class TokenizerTests extends munit.FunSuite:
assertTokens("-\n", Right(Token.Generic("-")))
}
test("should handle basic comments") {
assertTokens("-- Comment", Right(Token.Comment("Comment")))
}
test("should handle comments, if only whitespace precedes them") {
assertTokens("\t -- Comment", Right(Token.Comment("Comment")))
}
test("should handle successive lines of comments") {
assertTokens(
"--c1\n--c2\n--c3",
Right(Token.Comment("c1")),
Right(Token.Comment("c2")),
Right(Token.Comment("c3"))
)
}
test("should ignore comment contents") {
assertTokens("-- a\\12 3\t4", Right(Token.Comment("a\\12 3\t4")))
}
test("should throw an error if EOF is reached inside a string literal") {
assertTokens(
"\"",
Left(
Tokenizer.Error.PrematureEof(
List(Tokenizer.State.InDoubleQuote(pos(1, 1, 1)))
)
)
)
}
test("should throw an error if EOF is reached inside a character literal") {
assertTokens(
"\'",
Left(
Tokenizer.Error.PrematureEof(
List(Tokenizer.State.InSingleQuote(pos(1, 1, 1)))
)
)
)
}
private def assertTokens(
source: String,
expectedOutput: Either[Tokenizer.Error, Token]*

View file

@ -1 +1 @@
sbt.version=1.9.8
sbt.version=1.9.9