One big test case and improving code documentation.

This commit is contained in:
Pat Garrity 2024-02-24 23:35:19 -06:00
parent b27e16776e
commit 1bd0e383ed
Signed by: pfm
GPG key ID: 5CA5D21BAB7F3A76
3 changed files with 130 additions and 11 deletions

View file

@ -5,17 +5,38 @@ import scala.annotation.tailrec
import scala.collection.mutable.ListBuffer
import scala.collection.mutable.Stack
/** Transforms a stream of characters into a stream of tokens.
*
* @param reader
* The [[CharacterReader]].
*/
class Tokenizer(private val reader: CharacterReader):
import Tokenizer.*
private val buffer: ListBuffer[Char] = ListBuffer.empty
private val states: Stack[State] = Stack.empty
private val errors: ListBuffer[Error] = ListBuffer.empty
private var whiteSpaceOnly: Boolean = true
// Accumulates characters for tokens.
private val buffer: ListBuffer[Char] = ListBuffer.empty
// Stack of states. This is refreshed on each call for a new token.
private val states: Stack[State] = Stack.empty
// List of errors within the scope of the current token.
private val errors: ListBuffer[Error] = ListBuffer.empty
// Tracks whether ONLY white space has been seen since the next newline. Used
// To determine if comments are allowed. Comments are only allowed as the
// first non-whitespace character on a line.
private var whiteSpaceOnly: Boolean = true
/** Dump the current [[State]] stack.
*
* @return
* List of [[State]], from top to bottom.
*/
private def dumpStack(): List[State] = states.toList
/** Consume the next available token.
*
* This function is **not** thread safe.
*
* @return
* The next available token, or an error if resolving a token fails.
@ -267,6 +288,8 @@ class Tokenizer(private val reader: CharacterReader):
object Tokenizer:
/** Enumeration which defines the [[Tokenizer]] internal state.
*/
sealed trait State
object State:
@ -276,17 +299,61 @@ object Tokenizer:
*/
case object Initial extends State
/** Used if the `-` character is seen as the first character on some line.
* In this case, the token may or may not be a comment.
*
* @param start
* The [[SourcePosition]] of the `-` character.
*/
case class PotentialComment(start: SourcePosition) extends State
case class InComment(start: SourcePosition) extends State
case class InDoubleQuote(start: SourcePosition) extends State
case class InSingleQuote(start: SourcePosition) extends State
case class InCharEscape(start: SourcePosition) extends State
case class InGeneric(start: SourcePosition) extends State
/** State for being within a comment. This state ends when a newline is hit.
*
* @param start
* The [[SourcePosition]] of the beginning of the comment.
*/
case class InComment(start: SourcePosition) extends State
/** State for being within double quotes -- a String Literal. This state
* ends when a closing double quote is hit. Newlines are not allowed within
* this state.
*
* @param start
* The [[SourcePosition]] of the beginning of the string.
*/
case class InDoubleQuote(start: SourcePosition) extends State
/** State for being within single quotes -- a Char Literal. This state ends
* when a closing single quote is hit. Newlines are not allowed within this
* state.
*
* @param start
* The [[SourcePosition]] of the beginning of the character.
*/
case class InSingleQuote(start: SourcePosition) extends State
/** State indicating that a character escape within a string or character
* literal was initiated.
*
* @param start
* The [[SourcePosition]] of the character escape.
*/
case class InCharEscape(start: SourcePosition) extends State
/** State that indicates some generic token is being parsed. These might be
* keywords, operators, or names.
*
* @param start
* The [[SourcePosition]] of the start of the token.
*/
case class InGeneric(start: SourcePosition) extends State
given CanEqual[State, State] = CanEqual.derived
end State
/** Enumeration which defines all possible [[Tokenizer]] errors.
*/
sealed trait Error
object Error:
@ -294,8 +361,6 @@ object Tokenizer:
sealed trait Positional extends Error:
def sourcePosition: SourcePosition
case object NotImplemented extends Error
/** This error occurs when the end of file is reached if the tokenizer is
* still expecting more characters.
*

View file

@ -0,0 +1,9 @@
namespace unit.test
--- Type class for type constructors which can be mapped over.
given F *
class Functor
--- Transform some wrapped data from one type to another.
given A, B
defn map: F A -> (A -> B) -> F B
end class

View file

@ -302,6 +302,51 @@ class TokenizerTests extends munit.FunSuite:
)
}
test("should tokenize a valid file (case 3)") {
val source = loadFileToString("tokenizer-3.ava")
println(source)
assertTokens(
source,
Right(Token.Generic("namespace")),
Right(Token.Generic("unit")),
Right(Token.Dot),
Right(Token.Generic("test")),
Right(
Token.Comment(
"- Type class for type constructors which can be mapped over."
)
),
Right(Token.Generic("given")),
Right(Token.Generic("F")),
Right(Token.Generic("*")),
Right(Token.Generic("class")),
Right(Token.Generic("Functor")),
Right(
Token.Comment("- Transform some wrapped data from one type to another.")
),
Right(Token.Generic("given")),
Right(Token.Generic("A")),
Right(Token.Comma),
Right(Token.Generic("B")),
Right(Token.Generic("defn")),
Right(Token.Generic("map")),
Right(Token.Colon),
Right(Token.Generic("F")),
Right(Token.Generic("A")),
Right(Token.Generic("->")),
Right(Token.OpenParen),
Right(Token.Generic("A")),
Right(Token.Generic("->")),
Right(Token.Generic("B")),
Right(Token.CloseParen),
Right(Token.Generic("->")),
Right(Token.Generic("F")),
Right(Token.Generic("B")),
Right(Token.Generic("end")),
Right(Token.Generic("class"))
)
}
private def assertTokens(
source: String,
expectedOutput: Either[Error, Token]*