#
#codebase.txt
# a mass work in dumping out thoughts on operational concepts, and stuff
#
# Author:  Adam Jason Pickett
# Created: 2009.04.16 21:54 EST (in thought, this started yrs. ago)
# Revised: 2010.10.23 18:05 EDT - spelling fixes, discontinuation notice
#
#09.05.10 - v.9 release
#
#No license/warranty are carried.
#
#Description:
# This is to fix the mistake of starting code projects without planning.
# Opening my mind to excel.
#
# *NOTE*: I've discontinued the title 'CIP', and have since advanced the
#interpreter to a simpler form.  The 'C' stuff here is deprecated.
# The functional interpr4.zip is available in the 'downloads'.
#
# This may help whomever is reading this.
#
#History:
#
# Over the years, I've written dozens of paper fragments w/ binary encodings,
#processor-type binary sequences written on off-time from the computer.
# I've threw out all o'the lined paper (in rage tribulations regarding "proof"
#to my mother that I wasn't doing this out of ego - she isn't very bright).
# O, how I love the MC680x0 series (not so much the barely capable 68000).
#The disassembly of code resources, burning everything in, then writing my own
#HyperTalk-based disassembler.  For this I'm a geek, certainly.
#
# One other thing worth mentioning is how much more I work on building structure
#than culling lists.  Yes, a lot is lost in the search for magic.
#In the writing of this text, I've been in conflict with the symbols to use for
#the operation/instruction names, at first conforming the number of letters to
#three.  And, of course, bounding line lengths, obsessing on blah.
# All crammed into a box, reducing everything to four or eight per block.
# Calculating total lengths.  1st 256 characters end w/ "No license is carried.".
# Last but not least, each saving, manually updating the date/time text...
#Frequently.
#
# Tabs are set to 8x6px width Monaco-9 characters (48px).

/*
Logical Concepts
 AND and OR are interchangeable:
	a|b	==	!(!a&!b)
	a&b	==	!(!a|!b)

 Binary NOT is equal to ExORing with binary -1 (all one bits):
	!a	==	a^(-1)

 Binary representation of Exclusive OR: ~(a&b)&(a|b)


Arithmetic Concepts
 Negation: subtraction from zero; binary equivalent to: NOT, increment one.
	Auxiliary Carry is set to: term is non-zero.

 Overflow: resulting bits don't fit the correct integer result, esp. the sign bit.


The Operations in Classic Interpreter Plus form (a name for my ideal script)
 A bare instruction operation counts for a parsed (raw, text) operation.

Symbol	Name				Form
 simplified operation base, the basic eight; logical, arithmetical
NOT	binary NOT (unary)		~(a)
AND	binary AND			(a)&(b)
IOR	inclusive OR			(a)|(b)
EOR	exclusive OR			(a)^(b)
ADD	add				(a)+(b)
SUB	subtract			(a)-(b)
MUL	multiply			(a)*(b)		|	#times;
DIV	divide				(a)/(b)		|	#divide;

 arithmetic/basic operations
MOD	modulus				(a)%(b)
NEG	negate (unary)			-(a)		|	–(a)	| #ndash; | #minus;
 (high level in compilation)
INC	increment (unary)		++a		|	#8710;
DEC	decrement (unary)		--a

 sign/mask operations
ABS	absolute value (unary)		+(a)
EXTS	extract sign (unary)		*<(a)		signed high byte>>7
CPYS	copy sign			(a)-^(b)	int: ^= -=; real: move to sign bit

 placement operations
SHL	shift left			(a)<<(b)
SHR	shift right			(a)>>(b)
ROL	rotate left			(a)~<(b)
ROR	rotate right			(a)~>(b)

 comparative operations
ORD	ordered (real) (unary)		(a)=(a)
V	overflow (signed)		((a)-(b)<0)!=((a)<(b))
NE	not equal to			(a)!=(b)	|	#ne;	|	#8800;
EQ	equal to			(a)=(b)
LT	less than			(a)<(b)		|	#lt;
GT	greater than			(a)>(b)		|	#gt;
LE	lesser or equal to		(a)<=(b)	|	#le;	|	#8804;
GE	greater or equal to		(a)>=(b)	|	#ge;	|	#8805;

EQV	equivalent to			(a)==(b)	|	#8781;
NEV	not equivalent to		(a)~=(b)	|	#8813;

 boolean operations
SNZ	set not zero (unary)		!(a)		|	�	|	#not;
BAND	boolean and			(a)&&(b)	|	#and;	|	#8743;
BOR	boolean inclusive or		(a)||(b)	|	#or;	|	#8744;

 exponent operations
SQRT	square root (unary)		√(a)		|	#radic;	|	#8730;
POW	power (exponentiation)		(a)**(b)
LOGB	base logarithm			(a)%%(b)

 memory operations
R	read				(a).b
W	write				->a
LR	load reference (unary)		@(a)		-- for cip, only a pointer is obtained

 program control
BR	branch				.>

BC	branch on condition (A!=0)	=>		|	? {then}:{else};
JR	jump any routine		.{name}
RT	return from routine		.<		-- value left in A is the result

TP	trap				[what makes an exception]

NOP	no operation (used for storing comments)

 stack operations
LI	load immediate			{value}
PH	push				,
PP	pop				[n/a]
LNK	link				(		-- pushes A onto data, F onto link
ULK	unlink				)		-- A->X, (-L)->F, (-S)->A

 high level
	swap				<>a		-- unimplemented
	attach condition (loop)		?>
	do while			<?

Interpreter
 Much of the virtual machine is based on the 650x instruction set for simplicity.

Registers
 A register: accumulator; used in all resulting values.
  Revised in effort to make it possible for the interpreter to function without a data stack;
  Previously "Current stack position is used for accumulator."
 X register is used for storing the only seperate operand.
 S register represents the data stack pointer.
 L register represents the link stack pointer.
 F register represents the current link/data frame;
  F.insnoffset = the instruction pointer, and F.dataoffset = S.
  (the pointer for insnoffset�s linked name can be cached)

 Null is recognized for any list type where the pointer is NULL.
*/

/* start of C code */

enum{ /* [what] values (zeros implied for unrepresented bytes) */
 w_integer,	/* 1-8 bytes */
 w_uinteger,	/* 1-8 bytes */
 w_single,	/* 1-4 bytes */
 w_double,	/* 1-8 bytes */
 w_string,	/* 1-4 bytes/character, unpacked (no surrogates) */
 w_list,	/* array of cipobject after cipoffset */
 w_pointer,	/* offset permutation -- used for functions, function-offset labels */
 w_link		/* link permutation */
};

typedef cipoffset struct/*(8)*/{
 uint32		offset;
 uint32		length;		/* keeping it within bounds */
} cipoffset;

typedef ciplink struct/*(8)*/{
 uint32		insnoffset;	/* instruction offset */
 uint32		dataoffset;
} ciplink;

typedef union cipdata/*(8)*/{
  uint8		raw[8];
  int64		integer;
  uint64	uinteger;
  float		real32;		/* 32 bit IEEE float */
  double	real64;		/* 64 bit IEEE float */
  cipoffset	offset;		/* sig/hash, global bit point to context */
  ciplink	link;		/* sig/hash, global bit point to context */
  cipoffset	*pointer;	/* for data, string, list */
} cipdata;

typedef cipobject struct/*(16)*/{
 uint32		signature;	/* CRC32 of linking name */
 uint16		hash;		/* calc'd hash of linking name (uniqueness is important) */
 uint8		type;		/* [global.1] [constant.1] [size.2] [allocated.1] [what.3] */
 uint8		code;		/* charset or link op. (executed at UNX/ULX); 0=none */
 cipdata	data;
} cipobject;

enum{/* charset (used in .code for the string type) */
 /* next byte up (16 bit+) makes the encoding cat. (e.g., 8895-1: 00 xx; 8895-2: 01 xx) */
 cipc_iso_8895,			/* up to 8895-16 (0F xx) */
 cipc_macroman,			/* any number of fonts may be used here */
 cipc_windows_125x,		/* up to 1258 (08 xx) */

 cipc_shift_jis		= 32,

 cipc_iso_2022_jp	= 64,	/* Japanese */
 cipc_iso_2022_kr,		/* Korean */

 /* unicode transformation format */
 /* note that UTF-8 is used for storage, and UTF-32 is used for raw (conversion is automatic) */
 cipc_utf_7		= 0xFE,	/* high sizes are invalid */
 cipc_utf_xx		= 0xFF	/* depending on size, UTF-8, UTF-16, or UTF-32 */
};

/*
  Stacks are forward-moving (pre-increment for push, post-decrement for pull);
this way, each stack can be automatically reallocated without a hitch.
  A data stack entry is either a cipobject w/ the sig/hash init'd to zero
or 10 bytes each on systems that don't require alignment.
  A link stack entry carries the current function sig/hash and global bit of the current.
*/

typedef cipname struct{
 uint32		signature;
 uint16		hash;
 utf8		*string;
} cipname;

typedef cipregisters struct/*(52)*/{
 cipobject	A, X, F;	/* S is F.dataoffset */
 uint32		L;
} cipregisters;

typedef cipstate struct{
 cipregisters	registers;

 uint32		linkcount;
 cipobject	*linkstack;	/* where everything [level] goes */

 uint32		datacount;
 uint8		*datastack;	/* where everything [value] goes */

 cipname	**names[8];	/* master name list(s) (low 3 bits of CRC32 determines which) */

 void		*(*allocator)(void *p, uint32 z); /* automatic realloc function; zero=free */
} cipstate;

enum{ /* instructions */
 /*
 mode bit 7: 0=low (zeros implied, unsigned)/single; 1=high/double
 type bit 6: 0=integer/always; 1=floating/conditionally
 */

 /* for unary operations, bit 7 determines whether register X or A is affected */

 /* only first 8 carry 4/8 permutation bytes */
 /* the rest start with �F� under floating analogue */
/*0x00/000 - permutation */
 i_NOP, /* no-op, skip (pre-compiled, invalid in execution); literal/comment */
 i_TBC, /* trap, branch; always/conditionally; [T/B][RA/CA] */
 i_LIA,
 i_LIX,
/*0x04/004 - load reference A/X (sig/hash/offset), read/write F-relative */
 i_LRA,
 i_LRX,
 i_RFA,
 i_WFA,
/*0x08/010 - read/write word long/double; ?L?, ?D? */
 i_RWA, /* X:=@A */
 i_RWX, /* A:=@X */
 i_WWA, /* X->@A */
 i_WWX, /* A->@X */
/*0x0C/014 - read/write short (8/16 bits); ?B?, ?H? */
 i_RSA,
 i_RSX,
 i_WSA,
 i_WSX,
/*0x10/020 - push/pop data */
 i_PHA,
 i_PHX,
 i_PPA,
 i_PPX,
/*0x14/024 - transfer a:=b */
 i_TXA,
 i_TAX,
 i_TXS,
 i_TSX,
/*0x18/030 - increment/decrement */
 i_INC,
 i_INX,
 i_DEC,
 i_DEX,
/*0x1C/034 - program */
 i_RET, /* return; subroutine (RTS)/interrupt (RTI; traps return using this) */
 i_JRA, /* jump to A (expecting link-offset permutation); always/conditionally */
 i_LNK, /* link, link w/ X-op (LNX); pop/list (LL?) */
 i_UNK, /* unlink, unlink w/ X-op (UNX); pop/list (UL?) */
/*0x20/040*/
 i_SBA,
 i_SBO,
 i_SNE,
 i_SEQ,
/*0x24/044*/
 i_SGE,
 i_SLT,
 i_SGT,
 i_SLE,
/*0x28/050*/
 i_EQV,
 i_NEV,
 i_EXS,
 i_CPS,
/*0x2C/054*/
 i_SNZ, /* SNX, SNA */
 i_SRT, /* SRX, SRA */
 i_ABS, /* ABX, ABA */
 i_NEG, /* NEX, NEA */
/*0x30/060*/
 i_NOT, /* NOX, NOA */
 i_AND,
 i_IOR,
 i_EOR,
/*0x34/064*/
 i_ADD,
 i_SUB,
 i_MUL,
 i_DIV,
/*0x38/070*/
 i_MOD,
 i_CPS,
 i_POW,
 i_LGB,
/*0x3C/074*/
 i_SHL,
 i_SHR,
 i_ROL,
 i_ROR,
};

/*
 Demonstration
  Expressions are evaluated left-to-right (simplicity/performance):
	5+2*3		; result: 21 (for algebraic evaluation, use parentheses)

 �5+(2*3)� compiles and executes as:
	LIA	5	; A: 5;				S:+0
	LNK		; A: 5;				S:..{5}
	LIA	2	; A: 2;				S:..{5}
	LIX	3	; A: 2;		X: 3;		S:..{5}
	MUL		; A: 6;		X: 3;		S:..{5}
	UNK		; A: 5;		X: 6;		S:+0
	ADD		; A: 11;	X: 6;		S:+0

 Block
  () and {} are encoded using the same link instructions.
  The only difference is that {} builds a list object after completion (bit 7).
  Reverse-engineering is used as reformatting here.

 Function Calls
  For a name placed before parentheses, a function call is implied: �foo(4)�.
  Manual function calls may be supported: �(4,.foo)�.
  They each compile as:
	�foo(4)�
	LRX	foo		| 05 [CRC32/hash of �foo�]
	LNX			| 1E	;following byte is linked
	JRA			| 1D
	LIA	4		| 02 00 00 00 04
	PHA			| 10
	UNX			| 1F	;JRA operation is called for pushed X
	�(4,.foo)�
	LNK			| 1E
	LIA	4		| 02 00 00 00 04
	PHA			| 10
	LRA	foo		| 04 [CRC32/hash of �foo�]
	JRA			| 1D
	UNK			| 1F
   Mid-parameter routine calls can be used any number of times; for example:
    �(4,.foo,.goo,"who",.fubar)�
   Note: since functions may modify their parameters, this method is a bit odd.

 Implied Multiplication
  For a number placed before (), multiplication is implied.
	as �a * -9(x/3)� is equivalent to �a*�9*(x/3)�
  They compile as:
	LRA	a		| 04 [CRC32/hash of �a�]
	RWA			| x08
	LIX	9		| 02 00 00 00 09
	NEX			| 2F
	MUL			| 36
	LNK			| x36
	LRA	x		| 04 [CRC32/hash of �x�]
	RWA			| x08
	LIX	3		| 02 00 00 00 03
	DIV			| 37
	UNK			| x37
	MUL			| 36

 Function Parameters
  The F register is not only used for the instruction pointer (or program counter),
   it also is used for function parameters and variables.
	inline function �sqrt(v) {√v}�
  May compile as (unoptimized method may use X, then transfer):
	RFA	v		| 06 00 00 00 00
	SRA			| ED
	RTS			| 1C
   Note: SRT is encoded with the floating-point type by default.

 Numerics
  In the case that different number types are used, the major type is used, followed
   by a conversion to the type called for by the instruction.
	in �a+b�, if either a or b are floating point, then both are made into a
	 float64, following a floating-point add operation.
	The result (in register A) is converted to a uint32.
  To maximize performance, keep all of the variables the same in type.

 Quotes
  Single quotes signify a symbol, while double quotes signify text.
	'this is an identifier if left alone'
	"this is strictly text; #ldquo;quotes within quotes!#rdquo;"

 Labels and Lists
  Just about anything can have a label attached to it; so instead of records, items
   in list can have labels (optional).
  The only difference between �{thing:8}� and �{8}� is that a label (sig/hash) is
   attached to the first item, named �thing�, recorded in the names list.

  Accessing items/fields is done with a dot: �mylist.thing� or �mylist.0�.
  Even labels with spaces or non-ASCII characters: �mylist:={'me too':3}; mylist.'me too'�.
*/

/*
 Accessing C structures and arrays as cip-objects
  Macros and Structure types, followed by pointers to structures can be used to access
   structures and arrays as cip-objects with a conversion call.
  The structures are set up as lists with the pointers to the fields, along with the
   allocated and constant bits set to one so that the script neither allows script
   modifications, nor is �free� called.

  struct struct_foos{
   double	fighter;
   float	little1;
   int		little2;
  } foos;

  struct struct_foos2{
   char		embed[64];
   bool		lean;
  } foos2 = {"i'm embeded!", true};

  cip_struct cip_foos2 = {
   cip_string(foos2,embed)	/* {"embed", sizeof(foos2.embed[0]), &foos2.embed} */
   cip_uint8(foos2,lean)	/* {"lean", sizeof(foos2.lean), &foos2.lean} */
  };

  cip_struct cip_foos = {
   cip_real(foos,fighter),	/* {"fighter", sizeof(foos.fighter), &foos.fighter} */
   cip_real(foos,little1),	/* {"little1", sizeof(foos.little1), &foos.little1} */
   cip_int(foos,little2),	/* {"little2", sizeof(foos.little2), &foos.little2} */
   cip_list(cip_foos2)
  };

  cip_makelist(mycipstate,cip_foos);	/* result: (cipobject*) */
*/

/*
 Comments
	## this is an end of line comment (must start with two ##)
	foo(x) {
	 [this is a block comment]	++x + 2
	}

 Entities
  Markup entities are supported so any Unicode symbol can be used.
	XML:		cip:
	&lt;		#lt;
	&#146;		#146;
	&#xFF;		#%FF;

  Note: because they can be placed anywhere, explicit #s need to be escaped (e.g., \#).

 Operators
  Any symbol can be used to represent an operator, just as all operators are set up in
   the default pre-compiled header!  Every operator is customizable, and can represent
   any code instruction, or function name.  It can be one or two characters.
	factorial(x) {1->r; x>0 <? {r*=x;--x}; r}
	#*operator 0	! .factorial	## for factorial, unary-following (unimplemented)

	#*operator 1	� *neg		## negate (unary-preceding)
	#*operator 2	+ *add		## add (dyadic)
*/