# #codebase.txt # a mass work in dumping out thoughts on operational concepts, and stuff # # Author: Adam Jason Pickett # Created: 2009.04.16 21:54 EST (in thought, this started yrs. ago) # Revised: 2010.10.23 18:05 EDT - spelling fixes, discontinuation notice # #09.05.10 - v.9 release # #No license/warranty are carried. # #Description: # This is to fix the mistake of starting code projects without planning. # Opening my mind to excel. # # *NOTE*: I've discontinued the title 'CIP', and have since advanced the #interpreter to a simpler form. The 'C' stuff here is deprecated. # The functional interpr4.zip is available in the 'downloads'. # # This may help whomever is reading this. # #History: # # Over the years, I've written dozens of paper fragments w/ binary encodings, #processor-type binary sequences written on off-time from the computer. # I've threw out all o'the lined paper (in rage tribulations regarding "proof" #to my mother that I wasn't doing this out of ego - she isn't very bright). # O, how I love the MC680x0 series (not so much the barely capable 68000). #The disassembly of code resources, burning everything in, then writing my own #HyperTalk-based disassembler. For this I'm a geek, certainly. # # One other thing worth mentioning is how much more I work on building structure #than culling lists. Yes, a lot is lost in the search for magic. #In the writing of this text, I've been in conflict with the symbols to use for #the operation/instruction names, at first conforming the number of letters to #three. And, of course, bounding line lengths, obsessing on blah. # All crammed into a box, reducing everything to four or eight per block. # Calculating total lengths. 1st 256 characters end w/ "No license is carried.". # Last but not least, each saving, manually updating the date/time text... #Frequently. # # Tabs are set to 8x6px width Monaco-9 characters (48px). /* Logical Concepts AND and OR are interchangeable: a|b == !(!a&!b) a&b == !(!a|!b) Binary NOT is equal to ExORing with binary -1 (all one bits): !a == a^(-1) Binary representation of Exclusive OR: ~(a&b)&(a|b) Arithmetic Concepts Negation: subtraction from zero; binary equivalent to: NOT, increment one. Auxiliary Carry is set to: term is non-zero. Overflow: resulting bits don't fit the correct integer result, esp. the sign bit. The Operations in Classic Interpreter Plus form (a name for my ideal script) A bare instruction operation counts for a parsed (raw, text) operation. Symbol Name Form simplified operation base, the basic eight; logical, arithmetical NOT binary NOT (unary) ~(a) AND binary AND (a)&(b) IOR inclusive OR (a)|(b) EOR exclusive OR (a)^(b) ADD add (a)+(b) SUB subtract (a)-(b) MUL multiply (a)*(b) | #times; DIV divide (a)/(b) | #divide; arithmetic/basic operations MOD modulus (a)%(b) NEG negate (unary) -(a) | –(a) | #ndash; | #minus; (high level in compilation) INC increment (unary) ++a | #8710; DEC decrement (unary) --a sign/mask operations ABS absolute value (unary) +(a) EXTS extract sign (unary) *<(a) signed high byte>>7 CPYS copy sign (a)-^(b) int: ^= -=; real: move to sign bit placement operations SHL shift left (a)<<(b) SHR shift right (a)>>(b) ROL rotate left (a)~<(b) ROR rotate right (a)~>(b) comparative operations ORD ordered (real) (unary) (a)=(a) V overflow (signed) ((a)-(b)<0)!=((a)<(b)) NE not equal to (a)!=(b) | #ne; | #8800; EQ equal to (a)=(b) LT less than (a)<(b) | #lt; GT greater than (a)>(b) | #gt; LE lesser or equal to (a)<=(b) | #le; | #8804; GE greater or equal to (a)>=(b) | #ge; | #8805; EQV equivalent to (a)==(b) | #8781; NEV not equivalent to (a)~=(b) | #8813; boolean operations SNZ set not zero (unary) !(a) | � | #not; BAND boolean and (a)&&(b) | #and; | #8743; BOR boolean inclusive or (a)||(b) | #or; | #8744; exponent operations SQRT square root (unary) √(a) | #radic; | #8730; POW power (exponentiation) (a)**(b) LOGB base logarithm (a)%%(b) memory operations R read (a).b W write ->a LR load reference (unary) @(a) -- for cip, only a pointer is obtained program control BR branch .> BC branch on condition (A!=0) => | ? {then}:{else}; JR jump any routine .{name} RT return from routine .< -- value left in A is the result TP trap [what makes an exception] NOP no operation (used for storing comments) stack operations LI load immediate {value} PH push , PP pop [n/a] LNK link ( -- pushes A onto data, F onto link ULK unlink ) -- A->X, (-L)->F, (-S)->A high level swap <>a -- unimplemented attach condition (loop) ?> do while <? Interpreter Much of the virtual machine is based on the 650x instruction set for simplicity. Registers A register: accumulator; used in all resulting values. Revised in effort to make it possible for the interpreter to function without a data stack; Previously "Current stack position is used for accumulator." X register is used for storing the only seperate operand. S register represents the data stack pointer. L register represents the link stack pointer. F register represents the current link/data frame; F.insnoffset = the instruction pointer, and F.dataoffset = S. (the pointer for insnoffset�s linked name can be cached) Null is recognized for any list type where the pointer is NULL. */ /* start of C code */ enum{ /* [what] values (zeros implied for unrepresented bytes) */ w_integer, /* 1-8 bytes */ w_uinteger, /* 1-8 bytes */ w_single, /* 1-4 bytes */ w_double, /* 1-8 bytes */ w_string, /* 1-4 bytes/character, unpacked (no surrogates) */ w_list, /* array of cipobject after cipoffset */ w_pointer, /* offset permutation -- used for functions, function-offset labels */ w_link /* link permutation */ }; typedef cipoffset struct/*(8)*/{ uint32 offset; uint32 length; /* keeping it within bounds */ } cipoffset; typedef ciplink struct/*(8)*/{ uint32 insnoffset; /* instruction offset */ uint32 dataoffset; } ciplink; typedef union cipdata/*(8)*/{ uint8 raw[8]; int64 integer; uint64 uinteger; float real32; /* 32 bit IEEE float */ double real64; /* 64 bit IEEE float */ cipoffset offset; /* sig/hash, global bit point to context */ ciplink link; /* sig/hash, global bit point to context */ cipoffset *pointer; /* for data, string, list */ } cipdata; typedef cipobject struct/*(16)*/{ uint32 signature; /* CRC32 of linking name */ uint16 hash; /* calc'd hash of linking name (uniqueness is important) */ uint8 type; /* [global.1] [constant.1] [size.2] [allocated.1] [what.3] */ uint8 code; /* charset or link op. (executed at UNX/ULX); 0=none */ cipdata data; } cipobject; enum{/* charset (used in .code for the string type) */ /* next byte up (16 bit+) makes the encoding cat. (e.g., 8895-1: 00 xx; 8895-2: 01 xx) */ cipc_iso_8895, /* up to 8895-16 (0F xx) */ cipc_macroman, /* any number of fonts may be used here */ cipc_windows_125x, /* up to 1258 (08 xx) */ cipc_shift_jis = 32, cipc_iso_2022_jp = 64, /* Japanese */ cipc_iso_2022_kr, /* Korean */ /* unicode transformation format */ /* note that UTF-8 is used for storage, and UTF-32 is used for raw (conversion is automatic) */ cipc_utf_7 = 0xFE, /* high sizes are invalid */ cipc_utf_xx = 0xFF /* depending on size, UTF-8, UTF-16, or UTF-32 */ }; /* Stacks are forward-moving (pre-increment for push, post-decrement for pull); this way, each stack can be automatically reallocated without a hitch. A data stack entry is either a cipobject w/ the sig/hash init'd to zero or 10 bytes each on systems that don't require alignment. A link stack entry carries the current function sig/hash and global bit of the current. */ typedef cipname struct{ uint32 signature; uint16 hash; utf8 *string; } cipname; typedef cipregisters struct/*(52)*/{ cipobject A, X, F; /* S is F.dataoffset */ uint32 L; } cipregisters; typedef cipstate struct{ cipregisters registers; uint32 linkcount; cipobject *linkstack; /* where everything [level] goes */ uint32 datacount; uint8 *datastack; /* where everything [value] goes */ cipname **names[8]; /* master name list(s) (low 3 bits of CRC32 determines which) */ void *(*allocator)(void *p, uint32 z); /* automatic realloc function; zero=free */ } cipstate; enum{ /* instructions */ /* mode bit 7: 0=low (zeros implied, unsigned)/single; 1=high/double type bit 6: 0=integer/always; 1=floating/conditionally */ /* for unary operations, bit 7 determines whether register X or A is affected */ /* only first 8 carry 4/8 permutation bytes */ /* the rest start with �F� under floating analogue */ /*0x00/000 - permutation */ i_NOP, /* no-op, skip (pre-compiled, invalid in execution); literal/comment */ i_TBC, /* trap, branch; always/conditionally; [T/B][RA/CA] */ i_LIA, i_LIX, /*0x04/004 - load reference A/X (sig/hash/offset), read/write F-relative */ i_LRA, i_LRX, i_RFA, i_WFA, /*0x08/010 - read/write word long/double; ?L?, ?D? */ i_RWA, /* X:=@A */ i_RWX, /* A:=@X */ i_WWA, /* X->@A */ i_WWX, /* A->@X */ /*0x0C/014 - read/write short (8/16 bits); ?B?, ?H? */ i_RSA, i_RSX, i_WSA, i_WSX, /*0x10/020 - push/pop data */ i_PHA, i_PHX, i_PPA, i_PPX, /*0x14/024 - transfer a:=b */ i_TXA, i_TAX, i_TXS, i_TSX, /*0x18/030 - increment/decrement */ i_INC, i_INX, i_DEC, i_DEX, /*0x1C/034 - program */ i_RET, /* return; subroutine (RTS)/interrupt (RTI; traps return using this) */ i_JRA, /* jump to A (expecting link-offset permutation); always/conditionally */ i_LNK, /* link, link w/ X-op (LNX); pop/list (LL?) */ i_UNK, /* unlink, unlink w/ X-op (UNX); pop/list (UL?) */ /*0x20/040*/ i_SBA, i_SBO, i_SNE, i_SEQ, /*0x24/044*/ i_SGE, i_SLT, i_SGT, i_SLE, /*0x28/050*/ i_EQV, i_NEV, i_EXS, i_CPS, /*0x2C/054*/ i_SNZ, /* SNX, SNA */ i_SRT, /* SRX, SRA */ i_ABS, /* ABX, ABA */ i_NEG, /* NEX, NEA */ /*0x30/060*/ i_NOT, /* NOX, NOA */ i_AND, i_IOR, i_EOR, /*0x34/064*/ i_ADD, i_SUB, i_MUL, i_DIV, /*0x38/070*/ i_MOD, i_CPS, i_POW, i_LGB, /*0x3C/074*/ i_SHL, i_SHR, i_ROL, i_ROR, }; /* Demonstration Expressions are evaluated left-to-right (simplicity/performance): 5+2*3 ; result: 21 (for algebraic evaluation, use parentheses) �5+(2*3)� compiles and executes as: LIA 5 ; A: 5; S:+0 LNK ; A: 5; S:..{5} LIA 2 ; A: 2; S:..{5} LIX 3 ; A: 2; X: 3; S:..{5} MUL ; A: 6; X: 3; S:..{5} UNK ; A: 5; X: 6; S:+0 ADD ; A: 11; X: 6; S:+0 Block () and {} are encoded using the same link instructions. The only difference is that {} builds a list object after completion (bit 7). Reverse-engineering is used as reformatting here. Function Calls For a name placed before parentheses, a function call is implied: �foo(4)�. Manual function calls may be supported: �(4,.foo)�. They each compile as: �foo(4)� LRX foo | 05 [CRC32/hash of �foo�] LNX | 1E ;following byte is linked JRA | 1D LIA 4 | 02 00 00 00 04 PHA | 10 UNX | 1F ;JRA operation is called for pushed X �(4,.foo)� LNK | 1E LIA 4 | 02 00 00 00 04 PHA | 10 LRA foo | 04 [CRC32/hash of �foo�] JRA | 1D UNK | 1F Mid-parameter routine calls can be used any number of times; for example: �(4,.foo,.goo,"who",.fubar)� Note: since functions may modify their parameters, this method is a bit odd. Implied Multiplication For a number placed before (), multiplication is implied. as �a * -9(x/3)� is equivalent to �a*�9*(x/3)� They compile as: LRA a | 04 [CRC32/hash of �a�] RWA | x08 LIX 9 | 02 00 00 00 09 NEX | 2F MUL | 36 LNK | x36 LRA x | 04 [CRC32/hash of �x�] RWA | x08 LIX 3 | 02 00 00 00 03 DIV | 37 UNK | x37 MUL | 36 Function Parameters The F register is not only used for the instruction pointer (or program counter), it also is used for function parameters and variables. inline function �sqrt(v) {√v}� May compile as (unoptimized method may use X, then transfer): RFA v | 06 00 00 00 00 SRA | ED RTS | 1C Note: SRT is encoded with the floating-point type by default. Numerics In the case that different number types are used, the major type is used, followed by a conversion to the type called for by the instruction. in �a+b�, if either a or b are floating point, then both are made into a float64, following a floating-point add operation. The result (in register A) is converted to a uint32. To maximize performance, keep all of the variables the same in type. Quotes Single quotes signify a symbol, while double quotes signify text. 'this is an identifier if left alone' "this is strictly text; #ldquo;quotes within quotes!#rdquo;" Labels and Lists Just about anything can have a label attached to it; so instead of records, items in list can have labels (optional). The only difference between �{thing:8}� and �{8}� is that a label (sig/hash) is attached to the first item, named �thing�, recorded in the names list. Accessing items/fields is done with a dot: �mylist.thing� or �mylist.0�. Even labels with spaces or non-ASCII characters: �mylist:={'me too':3}; mylist.'me too'�. */ /* Accessing C structures and arrays as cip-objects Macros and Structure types, followed by pointers to structures can be used to access structures and arrays as cip-objects with a conversion call. The structures are set up as lists with the pointers to the fields, along with the allocated and constant bits set to one so that the script neither allows script modifications, nor is �free� called. struct struct_foos{ double fighter; float little1; int little2; } foos; struct struct_foos2{ char embed[64]; bool lean; } foos2 = {"i'm embeded!", true}; cip_struct cip_foos2 = { cip_string(foos2,embed) /* {"embed", sizeof(foos2.embed[0]), &foos2.embed} */ cip_uint8(foos2,lean) /* {"lean", sizeof(foos2.lean), &foos2.lean} */ }; cip_struct cip_foos = { cip_real(foos,fighter), /* {"fighter", sizeof(foos.fighter), &foos.fighter} */ cip_real(foos,little1), /* {"little1", sizeof(foos.little1), &foos.little1} */ cip_int(foos,little2), /* {"little2", sizeof(foos.little2), &foos.little2} */ cip_list(cip_foos2) }; cip_makelist(mycipstate,cip_foos); /* result: (cipobject*) */ */ /* Comments ## this is an end of line comment (must start with two ##) foo(x) { [this is a block comment] ++x + 2 } Entities Markup entities are supported so any Unicode symbol can be used. XML: cip: < #lt; ’ #146; ÿ #%FF; Note: because they can be placed anywhere, explicit #s need to be escaped (e.g., \#). Operators Any symbol can be used to represent an operator, just as all operators are set up in the default pre-compiled header! Every operator is customizable, and can represent any code instruction, or function name. It can be one or two characters. factorial(x) {1->r; x>0 <? {r*=x;--x}; r} #*operator 0 ! .factorial ## for factorial, unary-following (unimplemented) #*operator 1 � *neg ## negate (unary-preceding) #*operator 2 + *add ## add (dyadic) */