mdhender

super secret hq

A Toy Forth Interpreter

I think that it’s a good idea to start small and build using test cases. It’s tough to do that in a blog, though, because there’s a lot of overhead to carry around. So, I’ll start small and desk check frequently.

My goals for this interpreter:

  • ignore all errors
  • process a character buffer as input
  • accept strings and numbers as words
  • use “.” as the word to pop one a word off the stack and display it
  • no ability to define new words
copy command line argument into buffer
while words to process in buffer
  read next word
  if word is "."
      display the top of the stack
      pop the top entry from the stack and discard it
  elsif word is an integer
      convert word to integer
      push integer onto the stack
  else
      convert word to text
      push text onto the stack

Here’s the code:

//
//  falseforth/main.c
//
//  Created by Michael Henderson on 6/12/13.
//  Copyright (c) 2013 Michael Henderson. All rights reserved.
//

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

typedef enum { wisError, wisNumber, wisText } WORDKIND;

typedef struct WORD {
    WORDKIND kind;
    union {
        int  number;
        char text[1];
    } v;
} WORD;

int   IsNumber(const char *s, const char *endOfWord);
WORD *NextWord(const char *input, const char **endOfWord);


int main(int argc, char *argv[]) {
    int idx;
    for (idx = 1; idx < argc; idx++) {
        const char *input = argv[idx];

        while (input && *input) {
            const char *endOfWord;
            WORD       *w = NextWord(input, &endOfWord);

            // halt on error or end of input
            //
            if (!w || w->kind == wisError) {
                exit(2);
            }

            // advance input past the end of this word
            //
            input = endOfWord;

            // handle the word by executing or pushing it
            //
            if (w->kind == wisText && strcmp(w->v.text, ".") == 0) {
                //StackDump();
                printf("Execute(%s)\n", w->v.text);
            } else {
                //StackPush(w);
                if (w->kind == wisNumber) {
                    printf("StackPush(%d)\n", w->v.number);
                } else {
                    printf("StackPush(%s)\n", w->v.text);
                }
            }
        }
    }

    return 0;
}

// a number is [+-]? [0-9]+ ( '.' [0-9]+ )?
//
int IsNumber(const char *s, const char *endOfWord) {
    if ((*s == '+' || *s == '-') && isdigit(*(s+1))) {
        s++;
    }
    while (isdigit(*s)) {
        s++;
    }
    if (*s == '.' && isdigit(*(s+1))) {
        do {
            s++;
        } while (isdigit(*s));
    }
    return s < endOfWord ? 0 : 1;
}

WORD *NextWord(const char *input, const char **endOfWord) {
    WORD *w = 0;
    if (!input) {
        return w;
    }

    while (isspace(*input)) {
        input++;
    }

    if (!*input) {
        return w;
    }

    // first character in the word
    //
    const char *firstChar = input++;

    if (*firstChar == '"' || *firstChar == '\'') {
        // scan quoted text
        //
        char quote         = *firstChar;
        int  escapedQuotes = 0;

        while (*input) {
            if (*input == quote) {
                // end of quoted text?
                if (*(input + 1) != quote) {
                    break;
                }
                // escaped quote, advance past it
                input++;
                escapedQuotes++;
            }
            input++;
        }

        // input must point past end of word when we're done
        if (*input == quote) {
            input++;
        }

        // create a new word
        //
        int len = (int)(input - firstChar);
        w = (WORD *)malloc(sizeof(*w) + len);
        if (!w) {
            perror("NewWord.QuotedText");
            exit(2);
        }
        w->kind = wisText;

        // copy over the data
        //
        if (escapedQuotes == 0) {
            memcpy(w->v.text, firstChar, len);
            w->v.text[len] = 0;
        } else {
            const char *src = firstChar + 1;
            char       *tgt = w->v.text;
            while (src < input) {
                if (*src == *firstChar) {
                    src++;
                }
                *(tgt++) = *(src++);
            }
            *tgt = 0;
        }
    } else {
        // scan unquoted text
        //
        while (!isspace(*input)) {
            input++;
        }

        // determine if we have a numeric or text word
        //
        WORDKIND kind = IsNumber(firstChar, input) ? wisNumber : wisText;

        // create a new word
        //
        int len = (int)(input - firstChar);
        w = (WORD *)malloc(sizeof(*w) + (kind == wisNumber ? 0 : len));
        if (!w) {
            perror("NewWord.Text");
            exit(2);
        }
        w->kind = kind;

        // copy over the data
        //
        if (w->kind == wisText) {
            memcpy(w->v.text, firstChar, len);
            w->v.text[len] = 0;
        } else {
            if (len > 24) {
                len = 24;
            }
            char buf[25];
            memcpy(buf, firstChar, len);
            buf[len] = 0;
            w->v.number = atoi(buf);
        }
    }

    return w;
}

Comments