naive refcount gc sample

% ./fe bin.in
CMD -- add
CMD a=17
CMD b=23
CMD c(ab)
CMD dBc+
CMD d?
(+ (17 . 23))
CMD d*
=> 40
CMD -- mul
CMD a=1
CMD b=4
CMD r[ab]
CMD sSr
CMD a=4
CMD b=7
CMD r[ab]
CMD tSr
CMD c(st)
CMD dBc*
CMD d?
(* ((sum [1 .. 4]) . (sum [4 .. 7])))
CMD d*
=> 90
% cat bin.in
-- add
a=17
b=23
c(ab)
dBc+
d?
d*
-- mul
a=1
b=4
r[ab]
sSr
a=4
b=7
r[ab]
tSr
c(st)
dBc*
d?
d*

fe

{ echo n-; echo e--; cat "$@"; } | ./be

be.c

/* backend */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define N_VARS 26

// debug options

int opt_show_note = 1;
int opt_show_debug;
int opt_echo_cmd;

int g_mem;

#define err(fmt, x...) do { \
   printf("ERR " fmt "\n", ##x); } while (0)

#define note(fmt, x...) do { \
   if (opt_show_note) printf("NOTE " fmt "\n", ##x); } while (0)

#define debug(fmt, x...) do { \
   if (opt_show_debug) printf("   # " fmt "\n", ##x); \
   else putchar('\n'); } while (0)

#define echo_cmd(x) do { \
   printf("CMD %s\n", x); } while (0)

// end of debug options

// types

enum {
   NIL, VAL, CONS, // basic
   PROD, SUM, // reduce
   BIN,
   CAT, CYCLE, FIB, HEAD, MERGE, RANGE, YES, ZIP, // cons-like
   V_IF, C_IF, // ifelse
};

#define type_name(x) [x] = #x
const char *type_names[] = {
   type_name(NIL),
   type_name(VAL),
   type_name(CONS),
   type_name(PROD),
   type_name(SUM),
   type_name(BIN),
   type_name(CAT),
   type_name(CYCLE),
   type_name(FIB),
   type_name(HEAD),
   type_name(MERGE),
   type_name(RANGE),
   type_name(YES),
   type_name(ZIP),
   type_name(V_IF),
   type_name(C_IF),
};

struct obj {
   int type, count;
   union {
      int val;
      struct { struct obj *l, *r; }; // cons cycle merge zip
      struct { int n; struct obj *o; }; // cat head prod sum yes
      struct { int a, b; }; // fib, range
   };
};

int
type(const struct obj * const v)
{
   return v ? v->type : NIL;
}

int
type_class(const struct obj * const v)
{
   switch (type(v)) {
   case NIL: return NIL;
   case PROD:
   case SUM:
   case BIN:
   case V_IF:
   case VAL: return VAL;
   case CAT:
   case CYCLE:
   case FIB:
   case HEAD:
   case MERGE:
   case RANGE:
   case YES:
   case ZIP:
   case C_IF:
   case CONS: return CONS;
   }
}

// vars

struct obj *vars[N_VARS];
struct obj **gc_list;
int gc_len;

int
var(const int c)
{
   const int v = c - 'a';
   if (v < 0 || v >= N_VARS) {
      err("invalid var name.");
      return -1; }
   return v;
}

// binary arithmetic

enum { ADD, MUL, };

int bin_add(int a, int b) { return a + b; }
int bin_mul(int a, int b) { return a * b; }

struct binop {
   int (*op)(int, int);
   int sym;
} binop_table[] = {
   [ADD] = { bin_add, '+' },
   [MUL] = { bin_mul, '*' },
};
size_t binop_table_size = sizeof binop_table / sizeof *binop_table;

struct binop *
sym_to_op(const int sym)
{
   for (int i = 0; i < binop_table_size; i++)
      if (sym == binop_table[i].sym) return &binop_table[i];

   return NULL;
}

// show section

void
show_nil(void)
{
   putchar('$');
   debug("(nil)");
}

void show_cons_outer(const struct obj *v);

void
show_cons_inner(const struct obj * const v)
{
   switch (type(v)) {
   case NIL: putchar('$'); break;
   case VAL: printf("%d", v->val); break;
   case CONS: show_cons_outer(v);  break;
   case CAT: printf("(cat "); show_cons_inner(v->o); putchar(')'); break;
   case YES: putchar('['); show_cons_inner(v->o); printf(" ...]"); break;
   case RANGE: printf("[%d .. %d]", v->a, v->b); break;
   case FIB: printf("(%d %d ...)", v->a, v->b); break;
   case CYCLE: printf("(cycle "); show_cons_inner(v->l);
                   printf(" | cur="); show_cons_inner(v->r); putchar(')'); break;
   case SUM: printf("(sum "); show_cons_inner(v->o); putchar(')'); break;
   case PROD: printf("(prod "); show_cons_inner(v->o); putchar(')'); break;
   case HEAD: printf("(head %d ", v->n); show_cons_inner(v->o); putchar(')'); break;
   case V_IF: // FALLTHRU
   case C_IF: printf("(if "); show_cons_inner(v->l);
                   printf(" | "); show_cons_inner(v->r); putchar(')'); break;
   case BIN: printf("(%c ", v->n); show_cons_inner(v->o); putchar(')'); break;
   }
}

void
show_cons_outer(const struct obj *v)
{
   putchar('(');
   show_cons_inner(v->l);

   while (v = v->r) {
      putchar(' ');
      if (v->type != CONS) {
         printf(". ");
         show_cons_inner(v); break; }
      show_cons_inner(v->l); }

   putchar(')');
}

void
show_val(const struct obj * const v)
{
   show_cons_inner(v);
   debug("(%d %p)", v->count, v);
}

void
show_cons(const struct obj * const v)
{
   show_cons_inner(v);
   debug("(%d %p (%p %p))", v->count, v, v->l, v->r);
}

void
show_head(const struct obj * const v)
{
   show_cons_inner(v);
   debug("(%d %p (%p))", v->count, v, v->o);
}

void
show_obj(const struct obj * const v)
{
   switch (type(v)) {
   case NIL:  return show_nil();
   case VAL:  return show_val(v);
   case FIB:  return show_val(v);
   case RANGE: return show_val(v);
   case CAT:  return show_head(v);
   case YES:  return show_head(v);
   case BIN:  return show_head(v);
   case SUM:  return show_head(v);
   case PROD: return show_head(v);
   case HEAD: return show_head(v);
   case CONS: return show_cons(v);
   case CYCLE: return show_cons(v);
   case V_IF: return show_cons(v);
   case C_IF: return show_cons(v);
   default: err("unknown data type."); }
}

void
show_all_vars(void)
{
   for (int i = 0; i < N_VARS; i++)
      if (vars[i]) {
         printf("%c: ", i + 'a');
         show_obj(vars[i]); }
}

void
cmd_show(const char x, const char * const y)
{
   if (x == '?') return show_all_vars();
   const int n = var(x); if (n < 0) return;
   const struct obj * const v = vars[n];
   show_obj(v);
}

// end of show section

// new, discard, update section

struct obj *
new_obj(const struct obj * const o)
{
   const size_t size = sizeof *o;
   struct obj * const n = malloc(size);
   if (!n) {
      err("runtime error - out of memory.");
      return NULL; }
   g_mem++;

   *n = *o;
   return n;
}

struct obj *
new_cons(struct obj *l, struct obj *r)
{
   const struct obj t = { .type = CONS, .l = l, .r = r, };
   struct obj * const o = new_obj(&t); if (!o) return o;
   if (l) l->count++;
   if (r) r->count++;
   return o;
}

struct obj *
new_val(const int n)
{
   const struct obj t = { .type = VAL, .val = n, };
   return new_obj(&t);
}

struct obj *
new_val_lit(const char * s)
{
   int n = 0;
   for (; *s; s++) {
      int c = *s - '0';
      if (c < 0 || c > 9) break;
      n = n * 10 + c; }
   return new_val(n);
}

// move an item into gc_list
void
discard(struct obj * const o)
{
   note("%p will be free'ed", o);
   const size_t size = (gc_len + 1) * sizeof *o;
   struct obj ** const t = realloc(gc_list, size);
   if (!t) {
      err("runtime error - out of memory.");
      return; }
   gc_list = t;
   gc_list[gc_len++] = o;

   // XXX Debug
   for (int i = 0; i < gc_len; i++)
      if (gc_list[i])
         note("gc_list %3d %p [%s]", i, gc_list[i],
               type_names[gc_list[i]->type]);
}

void
unref(struct obj * const p)
{
   if (p && p->count && !--p->count) discard(p);
}

void
update_var(const int n, struct obj * const o)
{
   if (o) o->count++;
   struct obj * const p = vars[n];
   unref(p);
   vars[n] = o;
}

// end of new, discard, update section

// car, cdr, and eval section

struct obj * car(struct obj * const v);
struct obj * cdr(struct obj * const v);
struct obj *
cycle_car(const struct obj * const o)
{
   return car(o->r);
}

struct obj *
cycle_cdr(const struct obj * const o)
{
   struct obj * const p = o->r;
   struct obj * const r = cdr(p) ?: o->l;
   const struct obj t = { .type = CYCLE, .l = o->l, .r = r, };
   return new_obj(&t);
}

struct obj *
cat_car(const struct obj * const o)
{
   return car(car(o->o));
}

struct obj *
cat_cdr(const struct obj * const o)
{
   struct obj * const p = o->o;
   struct obj * const q = car(p);
   struct obj * const r = cdr(p);

   struct obj * const s =
      cdr(q) ? new_cons(cdr(q), r): r && car(r) ? r: NULL;

   if (!s) return NULL;

   const struct obj t = { .type = CAT, .o = s, };
   return new_obj(&t);
}

struct obj *
fib_car(const struct obj * const o)
{
   const struct obj t = { .type = VAL, .val = o->a, };
   return new_obj(&t);
}

struct obj *
fib_cdr(const struct obj * const o)
{
   const int a = o->b;
   const int b = o->a + o->b;
   const struct obj t = { .type = FIB, .a = a, .b = b, };
   return new_obj(&t);
}

#define range_car fib_car

struct obj *
range_cdr(const struct obj * const o)
{
   const int a = o->a + 1;
   const int b = o->b;
   if (a == b) return NULL;
   const struct obj t = { .type = RANGE, .a = a, .b = b, };
   return new_obj(&t);
}

struct obj *
head_cdr(const struct obj * const o)
{
   const int n = o->n;
   if (n - 1 <= 0) return NULL; // end

   struct obj * const r = cdr(o->o);
   if(type(r) == NIL) return NULL; // end of seq

   const struct obj t = { .type = HEAD, .n = n - 1, .o = r, };
   return new_obj(&t);
}

int eval(const struct obj *o);
int
cond_if(const struct obj *o)
{
   switch (type_class(o->l)) {
   case VAL:  return eval(o->l);
   case NIL:  return 0;
   case CONS: return 1; }

   err("notreached"); return 0;
}

struct obj * cxr(struct obj * const v, const int l);
struct obj *
cxr_cif(const struct obj *o, const int l)
{
   struct obj * const r = (cond_if(o) ? car : cdr)(o->r);
   return cxr(r, l);
}

struct obj *
cxr(struct obj * const v, const int l)
{
   struct obj * o;
   switch (type(v)) {
   case CONS:
      o = l ? v->l : v->r; break;
   case YES:
      o = l ? v->o : v;    break;
   case CAT:
      o = l ? cat_car(v) : cat_cdr(v); break;
   case FIB:
      o = l ? fib_car(v) : fib_cdr(v); break;
   case RANGE:
      o = l ? range_car(v) : range_cdr(v); break;
   case HEAD:
      o = l ? car(v->o) : head_cdr(v); break;
   case CYCLE:
      o = l ? cycle_car(v) : cycle_cdr(v); break;
   case C_IF:
      return cxr_cif(v, l);
   default:
      err("invalid data type."); return NULL; }
   return o;
}

struct obj * car(struct obj * const v) { return cxr(v, 1); }
struct obj * cdr(struct obj * const v) { return cxr(v, 0); }

int
eval_prod(const struct obj *o)
{
   int r = 1;
   for (struct obj *i = o->o; i; i = cdr(i)) {
      if (type_class(i) != CONS) {
         err("prod: not a list"); return -999; }
      if (type_class(car(i)) != VAL) {
         err("prod: not a list of val"); return -999; }
      r *= car(i)->val; }
   return r;
}

int
eval_sum(const struct obj *o)
{
   int r = 0;
   for (struct obj *i = o->o; i; i = cdr(i)) {
      if (type_class(i) != CONS) {
         err("sum: not a list"); return -999; }
      if (type_class(car(i)) != VAL) {
         err("sum: not a list of val"); return -999; }
      r += car(i)->val; }
   return r;
}

int eval(const struct obj *o);

int
eval_vif(const struct obj *o)
{
   struct obj * const r = (cond_if(o) ? car : cdr)(o->r);
   return eval(r);
}

int
eval_bin(const struct obj *o)
{
   int (*f)(int, int) = sym_to_op(o->n)->op;
   return f(eval(car(o->o)), eval(cdr(o->o)));
}

int
eval(const struct obj *o)
{
   switch (type(o)) {
   case VAL:
      return o->val;
   case PROD:
      return eval_prod(o);
   case SUM:
      return eval_sum(o);
   case BIN:
      return eval_bin(o);
   case V_IF:
      return eval_vif(o);
   default:
      err("unsupported type");
      return -999;
   }
}

// end of car, cdr, and eval section

// command section

void
cmd_cxr(const char x, const char * const y, const int a_or_d)
{
   const int n = var(x);  if (n < 0) return;
   const int m = var(*y); if (m < 0) return;
   const int l = a_or_d == 'a'; // car or cdr: true if car, false otherwise
   struct obj * const v = vars[m];
   if (type(v) == YES &&!l && n == m) return;
   struct obj * const o = cxr(v, l);
   update_var(n, o);
}

void
cmd_car(const char x, const char * const y)
{
   return cmd_cxr(x, y, 'a');
}

void
cmd_cdr(const char x, const char * const y)
{
   return cmd_cxr(x, y, 'd');
}

void
cmd_range(const char x, const char * const y)
{
   const int n = var(x);    if (n < 0) return;
   const int l = var(y[0]); if (l < 0) return;
   const int r = var(y[1]); if (r < 0) return;
   if (type_class(vars[l]) != VAL) { err("invalid range"); return; }
   if (type_class(vars[r]) != VAL) { err("invalid range"); return; }
   const int a = eval(vars[l]);
   const int b = eval(vars[r]);
   const struct obj t = { .type = RANGE, .a = a, .b = b, };
   struct obj * const o = new_obj(&t);
   if (!o) return;
   update_var(n, o);
}

void
cmd_fib(const char x, const char * const y)
{
   const int n = var(x);  if (n < 0) return;
   const struct obj t = { .type = FIB, .a = 0, .b = 1, };
   struct obj * const o = new_obj(&t); if (!o) return;
   update_var(n, o);
}

void
cmd_head(const char x, const char * const y)
{
   const int n = var(x);    if (n < 0) return;
   const int l = var(y[0]); if (l < 0) return;
   const int r = var(y[1]); if (r < 0) return;
   if (type(vars[l]) != VAL) {
      err("invalid data type."); return; }
   if (vars[l]->val < 1)
      return update_var(n, NULL);
   const struct obj t = { .type = HEAD, .n = vars[l]->val, .o = vars[r], };
   struct obj * const o = new_obj(&t); if (!o) return;

   if (t.o) t.o->count++;
   update_var(n, o);
}

void
cmd_cycle(const char x, const char * const y)
{
   const int n = var(x);  if (n < 0) return;
   const int v = var(*y); if (v < 0) return;
   if (type_class(vars[v]) != CONS) {
      err("invalid data type."); return; }
   const struct obj t = { .type = CYCLE, .l = vars[v], .r = vars[v], };
   struct obj * const o = new_obj(&t); if (!o) return;

   if (t.l) t.l->count++;
   update_var(n, o);
}

void
cmd_sum_yes(const char x, const char * const y, const int type)
{
   const int n = var(x);  if (n < 0) return;
   const int v = var(*y); if (v < 0) return;
   const struct obj t = { .type = type, .o = vars[v], };
   struct obj * const o = new_obj(&t); if (!o) return;
   if (t.o) t.o->count++;
   update_var(n, o);
}

void
cmd_yes(const char x, const char * const y)
{
   cmd_sum_yes(x, y, YES);
}

void
cmd_cat(const char x, const char * const y)
{
   cmd_sum_yes(x, y, CAT);
}

void
cmd_prod(const char x, const char * const y)
{
   cmd_sum_yes(x, y, PROD);
}

void
cmd_sum(const char x, const char * const y)
{
   cmd_sum_yes(x, y, SUM);
}

void
cmd_null(const char x, const char * const y)
{
   const int n = var(x); if (n < 0) return;
   update_var(n, NULL);
}

void
cmd_char(const char x, const char * const y)
{
   const int n = var(x); if (n < 0) return;
   if (*y == '\\') { err("backslash not supported."); return; }
   if (*y < ' ' || *y >= 0x7f) { err("char out of range."); return; }
   struct obj * const o = new_val(*y); if (!o) return;
   update_var(n, o);
}

void
cmd_setv(const char x, const char * const y)
{
   const int n = var(x); if (n < 0) return;

   struct obj *o = NULL;
   switch (*y) {
   case '0'...'9': o = new_val_lit(y); break;
   case 'a'...'z': o = vars[var(*y)];  break;
   default: err("unknown data type."); return; }

   update_var(n, o);
}

void
cmd_bin(const char x, const char * const y)
{
   const int n = var(x);    if (n < 0) return;
   const int l = var(y[0]); if (l < 0) return;
   const int r = y[1]; if (!sym_to_op(r)) return;
   struct obj * const c = vars[l]; // operands
   if (type(c) != CONS) { err("bin takes CONS."); return; }
   const int t0 = type_class(car(c)); // type of 1st operand
   const int t1 = type_class(cdr(c)); // type of 2nd operand
   if (t0 != VAL || t1 != VAL) { err("bin: operands must be VALs."); return; }
   const struct obj t = { .type = BIN, .n = r, .o = c, };
   struct obj * const o = new_obj(&t); if (!o) return;
   c->count++;
   update_var(n, o);
}

void
cmd_if(const char x, const char * const y)
{
   const int n = var(x);    if (n < 0) return;
   const int l = var(y[0]); if (l < 0) return;
   const int r = var(y[1]); if (r < 0) return;
   struct obj * const te = vars[r]; // then and else
   if (type(te) != CONS) { err("if takes VAL CONS."); return; }
   const int t = type_class(car(te)); // type of then
   const int e = type_class(cdr(te)); // type of else
   if (t != e) { err("if: then and else must be same type."); return; }
   struct obj * const o = new_cons(vars[l], vars[r]); if (!o) return;
   o->type = t == VAL ? V_IF : C_IF;
   update_var(n, o);
}

void
cmd_cons(const char x, const char * const y)
{
   const int n = var(x);    if (n < 0) return;
   const int l = var(y[0]); if (l < 0) return;
   const int r = var(y[1]); if (r < 0) return;
   struct obj * const o = new_cons(vars[l], vars[r]);
   if (!o) return;

   update_var(n, o);
}

// special commands

void
cmd_eval(const char x, const char * const y)
{
   const int n = var(x); if (n < 0) return;
   const struct obj *o = vars[n];
   int val = eval(o);
   printf("=> %d\n", val);
}

void
cmd_str(const char x, const char * const y)
{
   const int n = var(x); if (n < 0) return;
   for (struct obj *i = vars[n]; i; i = cdr(i)) {
      if (type_class(i) != CONS) {
         err("str: not a list"); return; }
      if (type_class(car(i)) != VAL) {
         err("str: not a list of val"); return; }
      int val = eval(car(i));
      if (val >= ' ' && val < 0x7f)
         putchar(val);
      else
         putchar('*'); }
   putchar('\n');
}

void
cmd_gc(const char x, const char * const y)
{
   note("freeing %d items. memory consumption = %d", gc_len, g_mem);

   // cannot use pointer as loop counter -- gc_list may move when discard.
   // also gc_len may grow when discard.
   for (int i = 0; i < gc_len; i++) {
      struct obj * const p = gc_list[i];
      if (!p) continue;
      note("freeing %p...", p);
      if (p->type == CONS) {
         unref(p->l);
         unref(p->r); }
      if (p->type == YES)
         unref(p->o);
      free(p); gc_list[i] = NULL; g_mem--; }
   note("%d items free'ed. memory consumption = %d", gc_len, g_mem);

   free(gc_list); gc_list = NULL;
   gc_len = 0;
}

void
cmd_opt(const char x, const char * const y)
{
   switch (x) {
   case '-': break; // -- comment
   case 'e': opt_echo_cmd   = !!*y; break;
   case 'n': opt_show_note  = !!*y; break;
   case 'd': opt_show_debug = !!*y; break;
   default: err("unknown option"); }
}

void
cmd_cmd_type(const char x)
{
   const int n = var(x); if (n < 0) return;
   const int t = type(vars[n]);
   const int c = type_class(vars[n]);
   printf("%c %s (%s)\n", x, type_names[t], type_names[c]);
}

void
cmd_cmd(const char x, const char * const y)
{
   switch (*y) {
   case 't': cmd_cmd_type(x); break;
   default: err("unknown subcommand"); }
}

// end of command section

void
sub(const char *s)
{
   const char * const t = &s[2];

   switch (s[1]) {
   case '?': return cmd_show(*s, t);
   case '!': return cmd_str (*s, t);
   case '|': return cmd_if  (*s, t);
   case '=': return cmd_setv(*s, t);
   case '\'':return cmd_char(*s, t);
   case '$': return cmd_null(*s, t);
   case '(': return cmd_cons(*s, t);
   case '[': return cmd_range(*s, t);
   case '^': return cmd_head(*s, t);
   case '"': return cmd_cat (*s, t);
   case '@': return cmd_yes (*s, t);
   case 'B': return cmd_bin (*s, t);
   case 'C': return cmd_cycle(*s, t);
   case 'F': return cmd_fib (*s, t);
   case 'P': return cmd_prod(*s, t);
   case 'S': return cmd_sum (*s, t);
   case '<': return cmd_car (*s, t);
   case '>': return cmd_cdr (*s, t);
   case '*': return cmd_eval(*s, t);
   case '+': return cmd_gc  (*s, t);
   case '-': return cmd_opt (*s, t);
   case ':': return cmd_cmd (*s, t);
   }
   err("unknown command.");
}

// this chomp is evil ... delete everything after the first newline.
void
chomp(char *s)
{
   for (; *s; s++)
      if (*s == '\n') {
         *s = '\0';
         return; }
}

void
prompt(const char *s)
{
   if (isatty(STDIN_FILENO)) fputs(s, stderr);
}

int
main(void)
{
   char buf[80];

   while (prompt("* "), fgets(buf, sizeof buf, stdin)) {
      chomp(buf);
      if (!*buf) continue;
      if (opt_echo_cmd) echo_cmd(buf);
      sub(buf); }

   prompt("EXIT\n");
   return 0;
}