diff --git a/ava.ebnf b/ava.ebnf index 4b2f7f5..380e363 100644 --- a/ava.ebnf +++ b/ava.ebnf @@ -102,11 +102,14 @@ k_true ::= 't', 'r', 'u', 'e'; (* false is a Boolean literal *) k_false ::= 'f', 'a', 'l', 's', 'e'; +(* defn is fn but for definitions without implementations *) +k_false ::= 'd', 'e', 'f', 'n'; + keyword ::= k_type | k_class | k_alias | k_const | k_enum | k_record | k_object | k_let | k_mut | k_export | k_import | k_namespace | k_infix | k_fn | k_end | k_match | k_case | k_lambda | k_if | k_then | k_else | k_do | k_return - | k_given | k_private | k_true | k_false; + | k_given | k_private | k_true | k_false | k_defn; (* ============== *) (* Literal Values *) diff --git a/build.sbt b/build.sbt index 8c9636b..d5dd430 100644 --- a/build.sbt +++ b/build.sbt @@ -28,7 +28,7 @@ lazy val parser = project .settings(name := s"${gsProjectName.value}-parser-v${semVerMajor.value}") .settings( libraryDependencies ++= Seq( - "co.fs2" %% "fs2-core" % "3.9.4", - "co.fs2" %% "fs2-io" % "3.9.4" + "co.fs2" %% "fs2-core" % "3.9.4" % Test, + "co.fs2" %% "fs2-io" % "3.9.4" % Test ) ) diff --git a/modules/parser/src/main/scala/ava/parser/CharacterReader.scala b/modules/parser/src/main/scala/ava/parser/CharacterReader.scala index 1c20aa8..7c93729 100644 --- a/modules/parser/src/main/scala/ava/parser/CharacterReader.scala +++ b/modules/parser/src/main/scala/ava/parser/CharacterReader.scala @@ -21,6 +21,7 @@ class CharacterReader( private val LookBackCapacity: Int = 16 // Internal buffers. + private var currentChar: Char = 0 private val lastChars: Ring[Char] = Ring[Char](LookBackCapacity, 0) private val buffer: Array[Char] = Array.fill(Capacity)(0) @@ -98,12 +99,14 @@ class CharacterReader( // Special case -- we have exhausted the buffer and still want to peek. // In this case, we consume exactly one character (if possible) and set // the peek state. - peekedAhead = true - lookAhead = input.read().toChar - if lookAhead < 0 then + val ch = input.read() + if ch < 0 then eof = true None - else Some(lookAhead) + else + peekedAhead = true + lookAhead = ch.toChar + Some(lookAhead) else Some(buffer(index)) def consume(): Option[Char] = @@ -116,10 +119,10 @@ class CharacterReader( // buffer(0) contains what we need, but it's ALSO in lookAhead. Take it, // increment the index (to 1), and return the correct value. val selectedChar = buffer(0) - lastChars.push(selectedChar) + updateCurrentChar(selectedChar) index = index + 1 Some(selectedChar) - else if index == length then + else if index >= length then // The buffer has been exhausted. Refill it. fillBuffer() if eof then None @@ -127,15 +130,17 @@ class CharacterReader( // At this point, 'index' should be 0. val pos = index index = index + 1 - lastChars.push(buffer(pos)) - Some(buffer(pos)) + val ch = buffer(pos) + updateCurrentChar(ch) + Some(ch) else // Regular case -- the buffer is not exhausted and contains data. Get the // data at the current position and move our pointer. val pos = index index = index + 1 - lastChars.push(buffer(pos)) - Some(buffer(pos)) + val ch = buffer(pos) + updateCurrentChar(ch) + Some(ch) /** @return * True if all data in the _buffer_ has been consumed, false otherwise. @@ -150,10 +155,35 @@ class CharacterReader( def isEof(): Boolean = eof - def getLastChar(): Char = lastChars.newest() + /** Get the _previous_ character. If 0 or 1 characters have been read in + * total, this will throw an exception due to being uninitialized. + * + * @return + * The previous character. + */ + def getPreviousChar(): Char = lastChars.newest() + /** Get the _current_ character. Initialized to 0. + * + * @return + * The current character, or 0 if nothing has been consumed. + */ + def getCurrentChar(): Char = currentChar + + /** Dump the lookback array from newest character to oldest character. + * + * @return + * The lookback array, ordered from newest to oldest. + */ def getLookback(): Array[Char] = lastChars.newestToOldest() + private def updateCurrentChar(ch: Char): Unit = + val _ = + if currentChar != 0 then lastChars.push(currentChar) + else () + + currentChar = ch + object CharacterReader: /** Initialize a [[CharacterReader]] for the the given file, using the UTF-8 diff --git a/modules/parser/src/main/scala/ava/parser/Keyword.scala b/modules/parser/src/main/scala/ava/parser/Keyword.scala new file mode 100644 index 0000000..6e1edce --- /dev/null +++ b/modules/parser/src/main/scala/ava/parser/Keyword.scala @@ -0,0 +1,36 @@ +package ava.parser + +sealed abstract class Keyword(val value: String) + +object Keyword: + + case object Alias extends Keyword("alias") + case object Case extends Keyword("case") + case object Class extends Keyword("class") + case object Const extends Keyword("const") + case object Defn extends Keyword("defn") + case object Do extends Keyword("do") + case object Else extends Keyword("else") + case object End extends Keyword("end") + case object Enum extends Keyword("enum") + case object Export extends Keyword("export") + case object False extends Keyword("false") + case object Fn extends Keyword("fn") + case object Given extends Keyword("given") + case object If extends Keyword("if") + case object Import extends Keyword("import") + case object Infix extends Keyword("infix") + case object Lambda extends Keyword("λ") + case object Let extends Keyword("let") + case object Match extends Keyword("match") + case object Mut extends Keyword("mut") + case object Namespace extends Keyword("namespace") + case object Object extends Keyword("object") + case object Private extends Keyword("private") + case object Record extends Keyword("record") + case object Return extends Keyword("return") + case object Then extends Keyword("then") + case object True extends Keyword("true") + case object Type extends Keyword("type") + +end Keyword diff --git a/modules/parser/src/main/scala/ava/parser/Token.scala b/modules/parser/src/main/scala/ava/parser/Token.scala new file mode 100644 index 0000000..17b6a8b --- /dev/null +++ b/modules/parser/src/main/scala/ava/parser/Token.scala @@ -0,0 +1,6 @@ +package ava.parser + +case class Token( + value: String, + tokenType: TokenType +) diff --git a/modules/parser/src/main/scala/ava/parser/TokenType.scala b/modules/parser/src/main/scala/ava/parser/TokenType.scala new file mode 100644 index 0000000..c67dfd1 --- /dev/null +++ b/modules/parser/src/main/scala/ava/parser/TokenType.scala @@ -0,0 +1,18 @@ +package ava.parser + +sealed trait TokenType + +object TokenType: + + case object Keyword extends TokenType + case object ReservedOperator extends TokenType + case object Name extends TokenType + case object OpenParen extends TokenType + case object CloseParen extends TokenType + case object Dot extends TokenType + case object Comma extends TokenType + case object DoubleQuote extends TokenType + case object Hole extends TokenType + case object Literal extends TokenType + +end TokenType diff --git a/modules/parser/src/main/scala/ava/parser/Tokenizer.scala b/modules/parser/src/main/scala/ava/parser/Tokenizer.scala new file mode 100644 index 0000000..2449d85 --- /dev/null +++ b/modules/parser/src/main/scala/ava/parser/Tokenizer.scala @@ -0,0 +1,6 @@ +package ava.parser + +class Tokenizer(private val reader: CharacterReader): + def next(): Option[String] = None + + def close(): Unit = reader.close() diff --git a/modules/parser/src/test/scala/ava/parser/CharacterReaderTests.scala b/modules/parser/src/test/scala/ava/parser/CharacterReaderTests.scala index edfd93e..fa9c51b 100644 --- a/modules/parser/src/test/scala/ava/parser/CharacterReaderTests.scala +++ b/modules/parser/src/test/scala/ava/parser/CharacterReaderTests.scala @@ -2,6 +2,7 @@ package ava.parser import cats.effect.IO import cats.effect.unsafe.IORuntime +import java.io.ByteArrayInputStream import java.io.InputStream import scala.io.Source @@ -25,6 +26,58 @@ class CharacterReaderTests extends munit.FunSuite: .unsafeRunSync() assertEquals(output, expected) + assertEquals(reader.isEof(), true) + assertEquals(reader.isBufferExhausted(), true) + } + + test("should properly handle an empty stream") { + val stream = new ByteArrayInputStream(Array[Byte]()) + val reader = CharacterReader.forInputStream(stream) + + // Verify initial state. + assertEquals(0, stream.available()) + assertEquals(reader.isBufferExhausted(), true) + assertEquals(reader.isEof(), false) + + // Trigger recognition of EOF. + // Case 1 Peek: index == length (initial state) + assertEquals(reader.peek(), None) + // Case 2 Peek: eof == true (triggered state) + assertEquals(reader.peek(), None) + // Case: eof == true (triggered state) + assertEquals(reader.consume(), None) + assertEquals(reader.isBufferExhausted(), true) + assertEquals(reader.isEof(), true) + reader.close() + assertEquals(reader.isBufferExhausted(), true) + assertEquals(reader.isEof(), true) + } + + test("should receive the current and previous characters") { + val stream = new ByteArrayInputStream(Array[Byte]('a', 'b')) + val reader = CharacterReader.forInputStream(stream) + + // Verify initial state. + assertEquals(0, reader.getCurrentChar().toInt) + interceptMessage[IllegalStateException]( + "This ring has not been initialized with any data." + ) { + reader.getPreviousChar() + } + + // Perform this test without peaking, just consume characters. + assertEquals(reader.consume(), Some('a')) + assertEquals(reader.isBufferExhausted(), false) + assertEquals(reader.isEof(), false) + assertEquals(reader.getCurrentChar(), 'a') + assertEquals(reader.consume(), Some('b')) + assertEquals(reader.isBufferExhausted(), true) + assertEquals(reader.getCurrentChar(), 'b') + assertEquals(reader.getPreviousChar(), 'a') + assertEquals(reader.getLookback().toList, List('a')) + assertEquals(reader.consume(), None) + assertEquals(reader.isBufferExhausted(), true) + assertEquals(reader.isEof(), true) } private def loadFileToString(name: String): String =