Working on building out tokenization and codifying things like keywords.
This commit is contained in:
parent
fdb150b12d
commit
bad991337d
8 changed files with 166 additions and 14 deletions
5
ava.ebnf
5
ava.ebnf
|
@ -102,11 +102,14 @@ k_true ::= 't', 'r', 'u', 'e';
|
|||
(* false is a Boolean literal *)
|
||||
k_false ::= 'f', 'a', 'l', 's', 'e';
|
||||
|
||||
(* defn is fn but for definitions without implementations *)
|
||||
k_false ::= 'd', 'e', 'f', 'n';
|
||||
|
||||
keyword ::= k_type | k_class | k_alias | k_const | k_enum | k_record
|
||||
| k_object | k_let | k_mut | k_export | k_import
|
||||
| k_namespace | k_infix | k_fn | k_end | k_match | k_case
|
||||
| k_lambda | k_if | k_then | k_else | k_do | k_return
|
||||
| k_given | k_private | k_true | k_false;
|
||||
| k_given | k_private | k_true | k_false | k_defn;
|
||||
|
||||
(* ============== *)
|
||||
(* Literal Values *)
|
||||
|
|
|
@ -28,7 +28,7 @@ lazy val parser = project
|
|||
.settings(name := s"${gsProjectName.value}-parser-v${semVerMajor.value}")
|
||||
.settings(
|
||||
libraryDependencies ++= Seq(
|
||||
"co.fs2" %% "fs2-core" % "3.9.4",
|
||||
"co.fs2" %% "fs2-io" % "3.9.4"
|
||||
"co.fs2" %% "fs2-core" % "3.9.4" % Test,
|
||||
"co.fs2" %% "fs2-io" % "3.9.4" % Test
|
||||
)
|
||||
)
|
||||
|
|
|
@ -21,6 +21,7 @@ class CharacterReader(
|
|||
private val LookBackCapacity: Int = 16
|
||||
|
||||
// Internal buffers.
|
||||
private var currentChar: Char = 0
|
||||
private val lastChars: Ring[Char] = Ring[Char](LookBackCapacity, 0)
|
||||
private val buffer: Array[Char] = Array.fill(Capacity)(0)
|
||||
|
||||
|
@ -98,12 +99,14 @@ class CharacterReader(
|
|||
// Special case -- we have exhausted the buffer and still want to peek.
|
||||
// In this case, we consume exactly one character (if possible) and set
|
||||
// the peek state.
|
||||
peekedAhead = true
|
||||
lookAhead = input.read().toChar
|
||||
if lookAhead < 0 then
|
||||
val ch = input.read()
|
||||
if ch < 0 then
|
||||
eof = true
|
||||
None
|
||||
else Some(lookAhead)
|
||||
else
|
||||
peekedAhead = true
|
||||
lookAhead = ch.toChar
|
||||
Some(lookAhead)
|
||||
else Some(buffer(index))
|
||||
|
||||
def consume(): Option[Char] =
|
||||
|
@ -116,10 +119,10 @@ class CharacterReader(
|
|||
// buffer(0) contains what we need, but it's ALSO in lookAhead. Take it,
|
||||
// increment the index (to 1), and return the correct value.
|
||||
val selectedChar = buffer(0)
|
||||
lastChars.push(selectedChar)
|
||||
updateCurrentChar(selectedChar)
|
||||
index = index + 1
|
||||
Some(selectedChar)
|
||||
else if index == length then
|
||||
else if index >= length then
|
||||
// The buffer has been exhausted. Refill it.
|
||||
fillBuffer()
|
||||
if eof then None
|
||||
|
@ -127,15 +130,17 @@ class CharacterReader(
|
|||
// At this point, 'index' should be 0.
|
||||
val pos = index
|
||||
index = index + 1
|
||||
lastChars.push(buffer(pos))
|
||||
Some(buffer(pos))
|
||||
val ch = buffer(pos)
|
||||
updateCurrentChar(ch)
|
||||
Some(ch)
|
||||
else
|
||||
// Regular case -- the buffer is not exhausted and contains data. Get the
|
||||
// data at the current position and move our pointer.
|
||||
val pos = index
|
||||
index = index + 1
|
||||
lastChars.push(buffer(pos))
|
||||
Some(buffer(pos))
|
||||
val ch = buffer(pos)
|
||||
updateCurrentChar(ch)
|
||||
Some(ch)
|
||||
|
||||
/** @return
|
||||
* True if all data in the _buffer_ has been consumed, false otherwise.
|
||||
|
@ -150,10 +155,35 @@ class CharacterReader(
|
|||
def isEof(): Boolean =
|
||||
eof
|
||||
|
||||
def getLastChar(): Char = lastChars.newest()
|
||||
/** Get the _previous_ character. If 0 or 1 characters have been read in
|
||||
* total, this will throw an exception due to being uninitialized.
|
||||
*
|
||||
* @return
|
||||
* The previous character.
|
||||
*/
|
||||
def getPreviousChar(): Char = lastChars.newest()
|
||||
|
||||
/** Get the _current_ character. Initialized to 0.
|
||||
*
|
||||
* @return
|
||||
* The current character, or 0 if nothing has been consumed.
|
||||
*/
|
||||
def getCurrentChar(): Char = currentChar
|
||||
|
||||
/** Dump the lookback array from newest character to oldest character.
|
||||
*
|
||||
* @return
|
||||
* The lookback array, ordered from newest to oldest.
|
||||
*/
|
||||
def getLookback(): Array[Char] = lastChars.newestToOldest()
|
||||
|
||||
private def updateCurrentChar(ch: Char): Unit =
|
||||
val _ =
|
||||
if currentChar != 0 then lastChars.push(currentChar)
|
||||
else ()
|
||||
|
||||
currentChar = ch
|
||||
|
||||
object CharacterReader:
|
||||
|
||||
/** Initialize a [[CharacterReader]] for the the given file, using the UTF-8
|
||||
|
|
36
modules/parser/src/main/scala/ava/parser/Keyword.scala
Normal file
36
modules/parser/src/main/scala/ava/parser/Keyword.scala
Normal file
|
@ -0,0 +1,36 @@
|
|||
package ava.parser
|
||||
|
||||
sealed abstract class Keyword(val value: String)
|
||||
|
||||
object Keyword:
|
||||
|
||||
case object Alias extends Keyword("alias")
|
||||
case object Case extends Keyword("case")
|
||||
case object Class extends Keyword("class")
|
||||
case object Const extends Keyword("const")
|
||||
case object Defn extends Keyword("defn")
|
||||
case object Do extends Keyword("do")
|
||||
case object Else extends Keyword("else")
|
||||
case object End extends Keyword("end")
|
||||
case object Enum extends Keyword("enum")
|
||||
case object Export extends Keyword("export")
|
||||
case object False extends Keyword("false")
|
||||
case object Fn extends Keyword("fn")
|
||||
case object Given extends Keyword("given")
|
||||
case object If extends Keyword("if")
|
||||
case object Import extends Keyword("import")
|
||||
case object Infix extends Keyword("infix")
|
||||
case object Lambda extends Keyword("λ")
|
||||
case object Let extends Keyword("let")
|
||||
case object Match extends Keyword("match")
|
||||
case object Mut extends Keyword("mut")
|
||||
case object Namespace extends Keyword("namespace")
|
||||
case object Object extends Keyword("object")
|
||||
case object Private extends Keyword("private")
|
||||
case object Record extends Keyword("record")
|
||||
case object Return extends Keyword("return")
|
||||
case object Then extends Keyword("then")
|
||||
case object True extends Keyword("true")
|
||||
case object Type extends Keyword("type")
|
||||
|
||||
end Keyword
|
6
modules/parser/src/main/scala/ava/parser/Token.scala
Normal file
6
modules/parser/src/main/scala/ava/parser/Token.scala
Normal file
|
@ -0,0 +1,6 @@
|
|||
package ava.parser
|
||||
|
||||
case class Token(
|
||||
value: String,
|
||||
tokenType: TokenType
|
||||
)
|
18
modules/parser/src/main/scala/ava/parser/TokenType.scala
Normal file
18
modules/parser/src/main/scala/ava/parser/TokenType.scala
Normal file
|
@ -0,0 +1,18 @@
|
|||
package ava.parser
|
||||
|
||||
sealed trait TokenType
|
||||
|
||||
object TokenType:
|
||||
|
||||
case object Keyword extends TokenType
|
||||
case object ReservedOperator extends TokenType
|
||||
case object Name extends TokenType
|
||||
case object OpenParen extends TokenType
|
||||
case object CloseParen extends TokenType
|
||||
case object Dot extends TokenType
|
||||
case object Comma extends TokenType
|
||||
case object DoubleQuote extends TokenType
|
||||
case object Hole extends TokenType
|
||||
case object Literal extends TokenType
|
||||
|
||||
end TokenType
|
6
modules/parser/src/main/scala/ava/parser/Tokenizer.scala
Normal file
6
modules/parser/src/main/scala/ava/parser/Tokenizer.scala
Normal file
|
@ -0,0 +1,6 @@
|
|||
package ava.parser
|
||||
|
||||
class Tokenizer(private val reader: CharacterReader):
|
||||
def next(): Option[String] = None
|
||||
|
||||
def close(): Unit = reader.close()
|
|
@ -2,6 +2,7 @@ package ava.parser
|
|||
|
||||
import cats.effect.IO
|
||||
import cats.effect.unsafe.IORuntime
|
||||
import java.io.ByteArrayInputStream
|
||||
import java.io.InputStream
|
||||
import scala.io.Source
|
||||
|
||||
|
@ -25,6 +26,58 @@ class CharacterReaderTests extends munit.FunSuite:
|
|||
.unsafeRunSync()
|
||||
|
||||
assertEquals(output, expected)
|
||||
assertEquals(reader.isEof(), true)
|
||||
assertEquals(reader.isBufferExhausted(), true)
|
||||
}
|
||||
|
||||
test("should properly handle an empty stream") {
|
||||
val stream = new ByteArrayInputStream(Array[Byte]())
|
||||
val reader = CharacterReader.forInputStream(stream)
|
||||
|
||||
// Verify initial state.
|
||||
assertEquals(0, stream.available())
|
||||
assertEquals(reader.isBufferExhausted(), true)
|
||||
assertEquals(reader.isEof(), false)
|
||||
|
||||
// Trigger recognition of EOF.
|
||||
// Case 1 Peek: index == length (initial state)
|
||||
assertEquals(reader.peek(), None)
|
||||
// Case 2 Peek: eof == true (triggered state)
|
||||
assertEquals(reader.peek(), None)
|
||||
// Case: eof == true (triggered state)
|
||||
assertEquals(reader.consume(), None)
|
||||
assertEquals(reader.isBufferExhausted(), true)
|
||||
assertEquals(reader.isEof(), true)
|
||||
reader.close()
|
||||
assertEquals(reader.isBufferExhausted(), true)
|
||||
assertEquals(reader.isEof(), true)
|
||||
}
|
||||
|
||||
test("should receive the current and previous characters") {
|
||||
val stream = new ByteArrayInputStream(Array[Byte]('a', 'b'))
|
||||
val reader = CharacterReader.forInputStream(stream)
|
||||
|
||||
// Verify initial state.
|
||||
assertEquals(0, reader.getCurrentChar().toInt)
|
||||
interceptMessage[IllegalStateException](
|
||||
"This ring has not been initialized with any data."
|
||||
) {
|
||||
reader.getPreviousChar()
|
||||
}
|
||||
|
||||
// Perform this test without peaking, just consume characters.
|
||||
assertEquals(reader.consume(), Some('a'))
|
||||
assertEquals(reader.isBufferExhausted(), false)
|
||||
assertEquals(reader.isEof(), false)
|
||||
assertEquals(reader.getCurrentChar(), 'a')
|
||||
assertEquals(reader.consume(), Some('b'))
|
||||
assertEquals(reader.isBufferExhausted(), true)
|
||||
assertEquals(reader.getCurrentChar(), 'b')
|
||||
assertEquals(reader.getPreviousChar(), 'a')
|
||||
assertEquals(reader.getLookback().toList, List('a'))
|
||||
assertEquals(reader.consume(), None)
|
||||
assertEquals(reader.isBufferExhausted(), true)
|
||||
assertEquals(reader.isEof(), true)
|
||||
}
|
||||
|
||||
private def loadFileToString(name: String): String =
|
||||
|
|
Loading…
Add table
Reference in a new issue