One big test case and improving code documentation.
This commit is contained in:
parent
b27e16776e
commit
1bd0e383ed
3 changed files with 130 additions and 11 deletions
|
@ -5,17 +5,38 @@ import scala.annotation.tailrec
|
|||
import scala.collection.mutable.ListBuffer
|
||||
import scala.collection.mutable.Stack
|
||||
|
||||
/** Transforms a stream of characters into a stream of tokens.
|
||||
*
|
||||
* @param reader
|
||||
* The [[CharacterReader]].
|
||||
*/
|
||||
class Tokenizer(private val reader: CharacterReader):
|
||||
import Tokenizer.*
|
||||
|
||||
// Accumulates characters for tokens.
|
||||
private val buffer: ListBuffer[Char] = ListBuffer.empty
|
||||
|
||||
// Stack of states. This is refreshed on each call for a new token.
|
||||
private val states: Stack[State] = Stack.empty
|
||||
|
||||
// List of errors within the scope of the current token.
|
||||
private val errors: ListBuffer[Error] = ListBuffer.empty
|
||||
|
||||
// Tracks whether ONLY white space has been seen since the next newline. Used
|
||||
// To determine if comments are allowed. Comments are only allowed as the
|
||||
// first non-whitespace character on a line.
|
||||
private var whiteSpaceOnly: Boolean = true
|
||||
|
||||
/** Dump the current [[State]] stack.
|
||||
*
|
||||
* @return
|
||||
* List of [[State]], from top to bottom.
|
||||
*/
|
||||
private def dumpStack(): List[State] = states.toList
|
||||
|
||||
/** Consume the next available token.
|
||||
*
|
||||
* This function is **not** thread safe.
|
||||
*
|
||||
* @return
|
||||
* The next available token, or an error if resolving a token fails.
|
||||
|
@ -267,6 +288,8 @@ class Tokenizer(private val reader: CharacterReader):
|
|||
|
||||
object Tokenizer:
|
||||
|
||||
/** Enumeration which defines the [[Tokenizer]] internal state.
|
||||
*/
|
||||
sealed trait State
|
||||
|
||||
object State:
|
||||
|
@ -276,17 +299,61 @@ object Tokenizer:
|
|||
*/
|
||||
case object Initial extends State
|
||||
|
||||
/** Used if the `-` character is seen as the first character on some line.
|
||||
* In this case, the token may or may not be a comment.
|
||||
*
|
||||
* @param start
|
||||
* The [[SourcePosition]] of the `-` character.
|
||||
*/
|
||||
case class PotentialComment(start: SourcePosition) extends State
|
||||
|
||||
/** State for being within a comment. This state ends when a newline is hit.
|
||||
*
|
||||
* @param start
|
||||
* The [[SourcePosition]] of the beginning of the comment.
|
||||
*/
|
||||
case class InComment(start: SourcePosition) extends State
|
||||
|
||||
/** State for being within double quotes -- a String Literal. This state
|
||||
* ends when a closing double quote is hit. Newlines are not allowed within
|
||||
* this state.
|
||||
*
|
||||
* @param start
|
||||
* The [[SourcePosition]] of the beginning of the string.
|
||||
*/
|
||||
case class InDoubleQuote(start: SourcePosition) extends State
|
||||
|
||||
/** State for being within single quotes -- a Char Literal. This state ends
|
||||
* when a closing single quote is hit. Newlines are not allowed within this
|
||||
* state.
|
||||
*
|
||||
* @param start
|
||||
* The [[SourcePosition]] of the beginning of the character.
|
||||
*/
|
||||
case class InSingleQuote(start: SourcePosition) extends State
|
||||
|
||||
/** State indicating that a character escape within a string or character
|
||||
* literal was initiated.
|
||||
*
|
||||
* @param start
|
||||
* The [[SourcePosition]] of the character escape.
|
||||
*/
|
||||
case class InCharEscape(start: SourcePosition) extends State
|
||||
|
||||
/** State that indicates some generic token is being parsed. These might be
|
||||
* keywords, operators, or names.
|
||||
*
|
||||
* @param start
|
||||
* The [[SourcePosition]] of the start of the token.
|
||||
*/
|
||||
case class InGeneric(start: SourcePosition) extends State
|
||||
|
||||
given CanEqual[State, State] = CanEqual.derived
|
||||
|
||||
end State
|
||||
|
||||
/** Enumeration which defines all possible [[Tokenizer]] errors.
|
||||
*/
|
||||
sealed trait Error
|
||||
|
||||
object Error:
|
||||
|
@ -294,8 +361,6 @@ object Tokenizer:
|
|||
sealed trait Positional extends Error:
|
||||
def sourcePosition: SourcePosition
|
||||
|
||||
case object NotImplemented extends Error
|
||||
|
||||
/** This error occurs when the end of file is reached if the tokenizer is
|
||||
* still expecting more characters.
|
||||
*
|
||||
|
|
9
modules/parser/src/test/resources/tokenizer-3.ava
Normal file
9
modules/parser/src/test/resources/tokenizer-3.ava
Normal file
|
@ -0,0 +1,9 @@
|
|||
namespace unit.test
|
||||
|
||||
--- Type class for type constructors which can be mapped over.
|
||||
given F *
|
||||
class Functor
|
||||
--- Transform some wrapped data from one type to another.
|
||||
given A, B
|
||||
defn map: F A -> (A -> B) -> F B
|
||||
end class
|
|
@ -302,6 +302,51 @@ class TokenizerTests extends munit.FunSuite:
|
|||
)
|
||||
}
|
||||
|
||||
test("should tokenize a valid file (case 3)") {
|
||||
val source = loadFileToString("tokenizer-3.ava")
|
||||
println(source)
|
||||
assertTokens(
|
||||
source,
|
||||
Right(Token.Generic("namespace")),
|
||||
Right(Token.Generic("unit")),
|
||||
Right(Token.Dot),
|
||||
Right(Token.Generic("test")),
|
||||
Right(
|
||||
Token.Comment(
|
||||
"- Type class for type constructors which can be mapped over."
|
||||
)
|
||||
),
|
||||
Right(Token.Generic("given")),
|
||||
Right(Token.Generic("F")),
|
||||
Right(Token.Generic("*")),
|
||||
Right(Token.Generic("class")),
|
||||
Right(Token.Generic("Functor")),
|
||||
Right(
|
||||
Token.Comment("- Transform some wrapped data from one type to another.")
|
||||
),
|
||||
Right(Token.Generic("given")),
|
||||
Right(Token.Generic("A")),
|
||||
Right(Token.Comma),
|
||||
Right(Token.Generic("B")),
|
||||
Right(Token.Generic("defn")),
|
||||
Right(Token.Generic("map")),
|
||||
Right(Token.Colon),
|
||||
Right(Token.Generic("F")),
|
||||
Right(Token.Generic("A")),
|
||||
Right(Token.Generic("->")),
|
||||
Right(Token.OpenParen),
|
||||
Right(Token.Generic("A")),
|
||||
Right(Token.Generic("->")),
|
||||
Right(Token.Generic("B")),
|
||||
Right(Token.CloseParen),
|
||||
Right(Token.Generic("->")),
|
||||
Right(Token.Generic("F")),
|
||||
Right(Token.Generic("B")),
|
||||
Right(Token.Generic("end")),
|
||||
Right(Token.Generic("class"))
|
||||
)
|
||||
}
|
||||
|
||||
private def assertTokens(
|
||||
source: String,
|
||||
expectedOutput: Either[Error, Token]*
|
||||
|
|
Loading…
Add table
Reference in a new issue