Virtual Machine in C to run C

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
3
down vote

favorite












Basically ANOTHER follow up to this review here but I've set about in completing the software project. Changes from the previous review is that I've added a complete script format with embedding API, changed the VM to register-based, and formed the calling convention similar to x64's. As an aspiring software engineer, this would be my first ever serious project that I've completed.



Remaining code on GitHub



tagha.h



#ifndef TAGHA_H_INCLUDED
#define TAGHA_H_INCLUDED


#ifdef __cplusplus
extern "C"
#endif

#include <inttypes.h>
#include <stdbool.h>
#include <iso646.h>

/* For Colored Debugging Printing! */
#define KNRM "x1B[0m" // Normal
#define KRED "x1B[31m"
#define KGRN "x1B[32m"
#define KYEL "x1B[33m"
#define KBLU "x1B[34m"
#define KMAG "x1B[35m"
#define KCYN "x1B[36m"
#define KWHT "x1B[37m"
#define RESET "33[0m" // Reset obviously

#define LOOP_COUNTER 1800000000

/*
* type generic Hashmap (uses 64-bit int as pointers to accomodate 32-bit and 64-bit)
*/
typedef struct KeyNode
uint64_t m_pData;
const char *m_strKey;
struct KeyNode *m_pNext;
KeyNode;

typedef struct Hashmap
uint32_t size, count;
struct KeyNode **m_ppTable;
Hashmap;

struct Tagha;
typedef struct Tagha Tagha;


/* the most basic values in C.
* In ALL of programming, there's only 4 fundamental data:
* Integers
* Floats
* Strings
* References
*/
typedef union CValue
bool Bool, *BoolPtr, BoolArr[8];
int8_t Char, *CharPtr, CharArr[8];
int16_t Short, *ShortPtr, ShortArr[4];
int32_t Int32, *Int32Ptr, Int32Arr[2];
int64_t Int64, *Int64Ptr;

uint8_t UChar, *UCharPtr, UCharArr[8];
uint16_t UShort, *UShortPtr, UShortArr[4];
uint32_t UInt32, *UInt32Ptr, UInt32Arr[2];
uint64_t UInt64, *UInt64Ptr;

float Float, *FloatPtr, FloatArr[2];
double Double, *DoublePtr;

void *Ptr, **PtrPtr;
const char *String, **StringPtr;
char *Str, **StrPtr;
union CValue *SelfPtr;
CValue;

/* // Just gonna leave this here in case we ever need it.
#define SIMD_BYTES 32
typedef union SIMDCValue
union CValue cvalue;
bool BoolSIMD[SIMD_BYTES];
int8_t CharSIMD[SIMD_BYTES];
int16_t ShortSIMD[SIMD_BYTES/2];
int32_t Int32SIMD[SIMD_BYTES/4];
int64_t Int64SIMD[SIMD_BYTES/8];

uint8_t UCharSIMD[SIMD_BYTES];
uint16_t UShortSIMD[SIMD_BYTES/2];
uint32_t UInt32SIMD[SIMD_BYTES/4];
uint64_t UInt64SIMD[SIMD_BYTES/8];

float FloatIMD[SIMD_BYTES/4];
double DoubleSIMD[SIMD_BYTES/8];
SIMDCValue;
*/

// API for scripts to call C/C++ host functions.
typedef void (*fnNative_t)(struct Tagha *const pEnv, union CValue params, union CValue *const pRetval, uint32_t uiArgc);

typedef struct NativeInfo
const char *strName; // use as string literals
fnNative_t pFunc;
NativeInfo;


/* addressing modes
* immediate - simple constant value.
* register - register holds the exact data.
* register indirect - register holds memory address and dereferenced. Can be used as displacement as well.
* IPRelative - instruction ptr + offset. required for static data like global vars.
*/
enum AddrMode
Immediate = 1,
Register = 2,
RegIndirect = 4,
//IPRelative = 8, // unused, will be replaced in the future with useful addr mode.
Byte = 16,
TwoBytes = 32,
FourBytes = 64,
EightBytes = 128,
;

// Register ID list
// 13 general purpose use registers + 3 reserved use.
enum RegID
// 'ras' is gen. purpose + accumulator
// all native and tagha func return data that fits within 64-bits goes here.
// natives can only return a single 8-byte piece of data.
// if you need to return larger than 8 bytes...
// use ras, rbs, and rcs. otherwise, return as pointer in ras.
ras=0,rbs,rcs,

// 12 more gen. purpose regs for whatever use.
// when passing arguments, use registers rds to rms
// since params are passed right to left.
// put the rightmost arg in rms.
// thus if you passed 10 args, the 1st arg would be in rds and 10th arg in rms.
rds,
res,rfs,rgs,
rhs,ris,rjs,
rks,rls,rms,

// do not modify after this. Add more registers, if u need, above.
rsp,rbp, // stack ptrs, do not touch
rip, // instr ptr, do not touch as well.
regsize // for lazily updating RegID list
;


// for interactive mode.
/*
typedef struct TokenLine

struct TokenLine *m_pNext;
uint8_t *m_ucBytecode;
uint32_t m_uiNumBytes;
TokenLine;
*/

/* C global definitions.
* usually C modules contain either functions or global vars visibly.
* static variables and functions should NEVER be listed here as
* static data of all types have internal linkage.
* so a static local var, though "global", shouldn't come up in global var data
*/
enum DefType
DefGlobal=0,
DefFunction=1,
;

typedef struct TaghaCDef
uint32_t m_uiOffset; // where is func or global var location in memory?
uint8_t m_ucDefType; // type of definition, true if function.
TaghaCDef;

struct Tagha
union CValue m_Regs[regsize];
uint8_t
*m_pMemory, // script memory, entirely aligned by 8 bytes.
*m_pStackSegment, // stack segment ptr where the stack's lowest address lies.
*m_pDataSegment, // data segment is the address AFTER the stack segment ptr. Aligned by 8 bytes.
*m_pTextSegment // text segment is the address after the last global variable AKA the last opcode.
;
// stores a C/C++ function ptr using the script-side name as the key.
char **m_pstrNativeCalls; // natives string table.
struct Hashmap
*m_pmapNatives, // native C/C++ interface hashmap.
*m_pmapCDefs // stores C definitions data like global vars and functions.
;
union CValue *m_pArgv; // using union to force char** size to 8 bytes.
uint32_t
m_uiMemsize, // total size of m_pMemory
m_uiInstrSize, // size of the text segment
m_uiMaxInstrs, // max amount of instrs a script can execute.
m_uiNatives, // amount of natives the script uses.
m_uiFuncs, // how many functions the script has.
m_uiGlobals // how many globals variables the script has.
;
int32_t m_iArgc;
bool
m_bSafeMode : 1, // does the script want bounds checking?
m_bDebugMode : 1, // print debug info.
m_bZeroFlag : 1 // conditional zero flag.
;
;

/*
* I think you may wanna spend a bit thinking about what scope you want. A VM running 1 "script" (properly called a program, process, or thread) blurs the line between VM and interpreter. Having multiple programs means an OS program has to be built on top of the VM allowing it to run multiple programs concurrently.
*
* There's no reason not to make a good VM, provide one or two compilers/interpreters in its native language. You don't have to write an OS to write code for the VM.
If it's generic enough, somebody can come along later and build an OS on top
*
* Yeah, if you want it to be embedable, then just write an interpreter for one language with hooks to call it in other languages. If you need to run other languages on top, then go for a VM.
*
* There you go, so you don't even really need a VM to embed C
*
* You don't embed clang though. You build a backend so you can write binaries for tagha in any llvm language. You write an os kernel for tagha, allowing the compiler to be run in the machine.
*
* A kernel is a program written for the machine that manages the filesystem, peripherals, and programs running on the machine. With a kernel, you can run compiling systems that are entirely contained in the machine.
Otherwise, you use an external machine to compile the binary, then move the binary into the machine to be run as its program.
*
* Probably the most direct way to do an REPL interpreter is to do the same thing you do compiling; collect text from the script until you have enough to compile a block of code and execute it. It'll be slow because it lacks optimization, but it shouldn't need many changes to your code.
*/


// tagha_exec.c
int32_t Tagha_Exec(struct Tagha *const pSys);
const char *RegIDToStr(enum RegID id);


// tagha_api.c
struct Tagha *Tagha_New(void);
void Tagha_Init(struct Tagha *pSys);
void Tagha_LoadScriptByName(struct Tagha *pSys, char *filename);
void Tagha_LoadScriptFromMemory(struct Tagha *pSys, void *pMemory, uint64_t memsize);
bool Tagha_RegisterNatives(const struct Tagha *pSys, struct NativeInfo arrNatives);
void Tagha_Free(struct Tagha *pSys);
int32_t Tagha_RunScript(struct Tagha *pSys);
int32_t Tagha_CallFunc(struct Tagha *pSys, const char *strFunc);

#ifndef FREE_MEM
#define FREE_MEM(ptr) if( (ptr) ) free( (ptr) ), (ptr)=NULL
#endif

void Tagha_BuildFromFile(struct Tagha *pSys, const char *strFilename);
void Tagha_BuildFromPtr(struct Tagha *pSys, void *pProgram, uint64_t Programsize);

void Tagha_PrintPtrs(const struct Tagha *pSys);
void Tagha_PrintStack(const struct Tagha *pSys);
void Tagha_PrintData(const struct Tagha *pSys);
void Tagha_PrintInstrs(const struct Tagha *pSys);
void Tagha_PrintRegData(const struct Tagha *pSys);
void Tagha_Reset(struct Tagha *pSys);

void *Tagha_GetGlobalByName(const struct Tagha *pSys, const char *strGlobalName);
void Tagha_PushValues(struct Tagha *pSys, uint32_t uiArgs, union CValue values);
union CValue Tagha_PopValue(struct Tagha *pSys);
void Tagha_SetCmdArgs(struct Tagha *pSys, char *argv);

uint32_t Tagha_GetMemSize(const struct Tagha *pSys);
uint32_t Tagha_GetInstrSize(const struct Tagha *pSys);
uint32_t Tagha_GetMaxInstrs(const struct Tagha *pSys);
uint32_t Tagha_GetNativeCount(const struct Tagha *pSys);
uint32_t Tagha_GetFuncCount(const struct Tagha *pSys);
uint32_t Tagha_GetGlobalsCount(const struct Tagha *pSys);
bool Tagha_IsSafemodeActive(const struct Tagha *pSys);
bool Tagha_IsDebugActive(const struct Tagha *pSys);
void Tagha_PrintErr(const struct Tagha *pSys, const char *funcname, const char *err, ...);

// ds.c
struct Hashmap *Map_New(void);
void Map_Init(struct Hashmap *map);
void Map_Free(struct Hashmap *map);
uint64_t Map_Len(const struct Hashmap *map);

void Map_Rehash(struct Hashmap *map);
bool Map_Insert(struct Hashmap *map, const char *strKey, uint64_t pData);
uint64_t Map_Get(const struct Hashmap *map, const char *strKey);
void Map_Set(const struct Hashmap *map, const char *strKey, uint64_t pData);
void Map_Delete(struct Hashmap *map, const char *strKey);
bool Map_HasKey(const struct Hashmap *map, const char *strKey);
const char *Map_GetKey(const struct Hashmap *map, const char *strKey);

/*
void Map_Rehash_int(struct Hashmap *);
bool Map_Insert_int(struct Hashmap *, const uint64_t, void *);
void *Map_Get_int(const struct Hashmap *, const uint64_t);
void Map_Delete_int(struct Hashmap *, const uint64_t);
bool Map_HasKey_int(const struct Hashmap *, const uint64_t);
*/
uint64_t gethash64(const char *strKey);
uint32_t gethash32(const char *strKey);
uint64_t int64hash(uint64_t x);
uint32_t int32hash(uint32_t x);


/*
* r = register is first operand
* m = memory address is first operand
*/
#define INSTR_SET
X(halt)
/* single operand opcodes */
/* stack ops */
X(push) X(pop)
/* unary arithmetic and bitwise ops */
X(neg) X(inc) X(dec) X(bnot)
/* jump ops */
X(jmp) X(jz) X(jnz)

/* subroutine ops */
X(call) X(ret) X(callnat)

/* two operand opcodes */
X(movr) X(movm) X(lea)
/* signed and unsigned integer arithmetic ops */
X(addr) X(addm) X(uaddr) X(uaddm)
X(subr) X(subm) X(usubr) X(usubm)
X(mulr) X(mulm) X(umulr) X(umulm)
X(divr) X(divm) X(udivr) X(udivm)
X(modr) X(modm) X(umodr) X(umodm)
/* bitwise ops */
X(shrr) X(shrm) X(shlr) X(shlm)
X(andr) X(andm) X(orr) X(orm) X(xorr) X(xorm)
/* comparison ops */
X(ltr) X(ltm) X(ultr) X(ultm)
X(gtr) X(gtm) X(ugtr) X(ugtm)
X(cmpr) X(cmpm) X(ucmpr) X(ucmpm)
X(neqr) X(neqm) X(uneqr) X(uneqm)
X(reset)

/* floating point opcodes */
X(int2float) X(int2dbl) X(float2dbl) X(dbl2float)
X(faddr) X(faddm) X(fsubr) X(fsubm) X(fmulr) X(fmulm) X(fdivr) X(fdivm)
X(fneg) X(fltr) X(fltm) X(fgtr) X(fgtm) X(fcmpr) X(fcmpm) X(fneqr) X(fneqm)
/* misc opcodes */
X(nop)

#define X(x) x,
enum InstrSet INSTR_SET ;
#undef X

#ifdef __cplusplus

#endif

#endif // TAGHA_H_INCLUDED


tagha_api.c



#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "tagha.h"


/* Tagha File Structure (Dec 23, 2017)
* ------------------------------ start of header ------------------------------
* 2 bytes: magic verifier ==> 0xC0DE
* 4 bytes: stack segment size (aligned by 8 bytes)
* 4 bytes: data segment size
* 1 byte: safemode and debugmode flags
* ------------------------------ end of header ------------------------------
* .natives table
* 4 bytes: amount of natives
* n bytes: native table
* 4 bytes: string size + '' of native string
* n bytes: native string.
*
* .functions table
* 4 bytes: amount of functions
* n bytes: functions table
* 4 bytes: string size + '' of func string
* n bytes: function string
* 4 bytes: offset
*
* .globalvars table
* 4 bytes: amount of global vars
* n bytes: global vars table
* 4 bytes: string size + '' of global var string
* n bytes: global var string
* 4 bytes: offset
*
* n bytes: .data section initial values.
* n bytes: .text section
*/

static uint64_t get_file_size(FILE *pFile);
static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile);


struct Tagha *Tagha_New(void)

struct Tagha *pNewVM = calloc(1, sizeof(struct Tagha));
Tagha_Init(pNewVM);
return pNewVM;


void Tagha_Init(struct Tagha *restrict const pSys)

if( !pSys )
return;

*pSys = (struct Tagha)0;

if( !pSys->m_pmapCDefs )
pSys->m_pmapCDefs = Map_New();
// if we can't allocate our C Definitions
// we can't run code in general as the definitions
// contain functions and global vars!
if( !pSys->m_pmapCDefs )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize C Definitions Map%s ****n", KRED, RESET, KGRN, RESET);
return;

else Map_Init(pSys->m_pmapCDefs);

if( !pSys->m_pmapNatives )
pSys->m_pmapNatives = Map_New();
if( !pSys->m_pmapNatives )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Native Map%s ****n", KRED, RESET, KGRN, RESET);
else Map_Init(pSys->m_pmapNatives);

if( !pSys->m_pArgv )
pSys->m_iArgc = 0;
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));
if( !pSys->m_pArgv )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Command-line Args String Vector%s ****n", KRED, RESET, KGRN, RESET);
else pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



static bool is_c_file(const char *restrict filename)

if( !filename )
return false;

// iterate to end of string and then check backwards.
while( *++filename );
int16_t i = *(int16_t *)(filename-2);
return( i==0x632E

static bool is_tbc_file(const char *restrict filename)
i==0x4342542E );



void Tagha_LoadScriptByName(struct Tagha *const pSys, char *restrict strFilename)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromFile(pSys, strFilename);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;



void Tagha_LoadScriptFromMemory(struct Tagha *const pSys, void *restrict pMemory, const uint64_t memsize)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromPtr(pSys, pMemory, memsize);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;




bool Tagha_RegisterNatives(const struct Tagha *restrict const pSys, struct NativeInfo arrNatives)

if( !pSys or !pSys->m_pmapNatives or !arrNatives )
return false;

for( struct NativeInfo *n=arrNatives ; n->pFunc and n->strName ; n++ )
Map_Insert(pSys->m_pmapNatives, n->strName, (uintptr_t)n->pFunc);
return true;



int32_t Tagha_RunScript(struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return -1;

// make sure 'main' exists.
else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call main with a NULL definition table!");
return -1;

// make sure we have the memory for running.
else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-32) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;

// get instruction offset to main.
struct TaghaCDef *pMainData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, "main");
if( !pMainData or pMainData->m_ucDefType != DefFunction )
Tagha_PrintErr(pSys, __func__, "function 'main' doesn't exist!");
return -1;

pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pMainData->m_uiOffset;

// push argv and argc to registers.
// use 'uintptr_t' so we can force 4-byte pointers as 8-byte.
pSys->m_Regs[res].UInt64 = (uintptr_t)pSys->m_pArgv;
pSys->m_Regs[rds].Int64 = pSys->m_iArgc;

(--pSys->m_Regs[rsp].SelfPtr)->Int64 = -1L; // push bullshit ret address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64; // push rbp

if( pSys->m_bDebugMode )
printf("Tagha_RunScript :: pushed argc: %" PRIi32 " and argv %pn", pSys->m_iArgc, pSys->m_pArgv);

return Tagha_Exec(pSys);


int32_t Tagha_CallFunc(struct Tagha *restrict const pSys, const char *restrict strFunc)

if( !pSys or !strFunc )
return -1;

else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call functions using a NULL function table!");
return -1;

else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-16) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;


struct TaghaCDef *pFuncData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strFunc);
if( !pFuncData or pFuncData->m_ucDefType != DefFunction)
Tagha_PrintErr(pSys, __func__, "function '%s' doesn't exist!", strFunc);
return -1;


// save return address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = (uintptr_t)pSys->m_Regs[rip].UCharPtr+1;

// jump to the function entry address.
pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pFuncData->m_uiOffset;

// push bp and copy sp to bp.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64;

return Tagha_Exec(pSys);



// need this to determine the text segment size.
static uint64_t get_file_size(FILE *restrict pFile)

uint64_t size = 0L;
if( !pFile )
return size;

if( !fseek(pFile, 0, SEEK_END) )
size = (uint64_t)ftell(pFile);
rewind(pFile);

return size;


static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignores = 0;

// see if the script is using any natives.
pSys->m_pstrNativeCalls = NULL;
ignores = fread(&pSys->m_uiNatives, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Natives: '%" PRIu32 "'n", pSys->m_uiNatives);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiNatives )
return bytecount;

// script has natives? Copy their names so we can use them on VM natives hashmap later.
pSys->m_pstrNativeCalls = calloc(pSys->m_uiNatives, sizeof(char *));
if( !pSys->m_pstrNativeCalls )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native Table%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


for( uint32_t i=0 ; i<pSys->m_uiNatives ; i++ )
uint32_t str_size;
ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate memory to hold the native's name.
pSys->m_pstrNativeCalls[i] = calloc(str_size, sizeof(char));
if( !pSys->m_pstrNativeCalls[i] )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


// read in the native's name.
ignores = fread(pSys->m_pstrNativeCalls[i], sizeof(char), str_size, *ppFile);
bytecount += str_size;
printf("[Tagha Load Script] :: Copied Native Name: '%s' @ %pn", pSys->m_pstrNativeCalls[i], pSys->m_pstrNativeCalls+i);

pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// see if the script has its own functions.
// This table is so host or other script can call these functions by name or address.
ignore_warns = fread(&pSys->m_uiFuncs, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiFuncs )
return bytecount;

// copy the function data from the header.
for( uint32_t i=0 ; i<pSys->m_uiFuncs ; i++ )
uint32_t str_size;
ignore_warns = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate the hashmap function key.
char *strFunc = calloc(str_size, sizeof(char));
if( !strFunc )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Table String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */
ignore_warns = fread(strFunc, sizeof(char), str_size, *ppFile);
bytecount += str_size;

// copy func's header data to our table
// then store the table to our function hashmap with the key
// we allocated earlier.
struct TaghaCDef *pFuncData = calloc(1, sizeof(struct TaghaCDef));
if( !pFuncData )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Data%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */

ignore_warns = fread(&pFuncData->m_uiOffset, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
printf("[Tagha Load Script] :: Copied Function name '%s' /* for */
pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// check if the script has global variables.
ignore_warns = fread(&pSys->m_uiGlobals, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Global Vars: %" PRIu32 "n", pSys->m_uiGlobals);
bytecount += sizeof(uint32_t);
uint32_t globalbytes = 0;
if( !pSys->m_uiGlobals )
return bytecount;

for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) offset: %" PRIu32 "n", strGlobal, pGlobalData->m_uiOffset);

// insert the global var's table to our hashmap.
pGlobalData->m_ucDefType = DefGlobal;
Map_Insert(pSys->m_pmapCDefs, strGlobal, (uintptr_t)pGlobalData);
strGlobal = NULL; pGlobalData = NULL;
/* for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) */
pSys = NULL;
return bytecount;


void Tagha_BuildFromFile(struct Tagha *pSys, const char *restrict strFilename)
highest address


void Tagha_BuildFromPtr(struct Tagha *restrict pSys, void *pProgram, const uint64_t Programsize)
data segment

void Tagha_Free(struct Tagha *restrict const pSys)

if( !pSys )
return;

// kill memory
FREE_MEM(pSys->m_pMemory);

// free our native table
uint32_t i, Size;
if( pSys->m_pstrNativeCalls )
for( i=0 ; i<pSys->m_uiNatives ; i++ )
FREE_MEM(pSys->m_pstrNativeCalls[i]);

memset(pSys->m_pstrNativeCalls, 0, pSys->m_uiNatives);
FREE_MEM(pSys->m_pstrNativeCalls);

// free our C definitions hashmap and all the tables in it.
if( pSys->m_pmapCDefs )
struct KeyNode
*restrict kv = NULL,
*next = NULL
;
Size = pSys->m_pmapCDefs->size;
for( i=0 ; i<Size ; i++ )
for( kv = pSys->m_pmapCDefs->m_ppTable[i] ; kv ; kv = next )
next = kv->m_pNext;
if( kv->m_pData )
free((struct TaghaCDef *)(uintptr_t)kv->m_pData), kv->m_pData = 0;
if( kv->m_strKey )
free((char *)kv->m_strKey), kv->m_strKey = NULL;


Map_Free(pSys->m_pmapCDefs);
FREE_MEM(pSys->m_pmapCDefs);


// since the system's native hashmap has nothing allocated,
// we just free the hashmap's internal data and then the hashmap itself.
if( pSys->m_pmapNatives )
Map_Free(pSys->m_pmapNatives);
FREE_MEM(pSys->m_pmapNatives);


// free our script argument vector.
if( pSys->m_pArgv )
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);
FREE_MEM(pSys->m_pArgv);


// set our stack pointer pointers to NULL
pSys->m_Regs[rip].UCharPtr = pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = NULL;



void Tagha_Reset(struct Tagha *restrict const pSys)

if( !pSys )
return;

// resets the script without crashing Tagha and the host.
memset(pSys->m_pTextSegment+1, 0, pSys->m_uiMemsize-pSys->m_uiInstrSize);
pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = pSys->m_pMemory + (pSys->m_uiMemsize-1);

memset(pSys->m_Regs, 0, sizeof(union CValue) * rsp);
// TODO: reset global variable data to original values?



void *Tagha_GetGlobalByName(const struct Tagha *restrict const pSys, const char *restrict strGlobalName)

if( !pSys or !pSys->m_pmapCDefs )
return NULL;

// get the global's .data segment offset then return the pointer to that offset.
struct TaghaCDef *pOffset = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strGlobalName);
return pOffset and pOffset->m_ucDefType==DefGlobal ? (pSys->m_pTextSegment+1)+ pOffset->m_uiOffset : NULL;


void Tagha_PushValues(struct Tagha *restrict const pSys, const uint32_t uiArgs, union CValue values)

if( !pSys or !pSys->m_pMemory )
return;

// remember that arguments must be passed right to left.
// we have enough args to fit in registers.
if( uiArgs <= 10 )
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*uiArgs);

// we have too many args, use both regs and stack.
else if( uiArgs>10 )
// first push args into reg.
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*10);

// next, push the remaining values from last to first.
if( pSys->m_bSafeMode and (pSys->m_Regs[rsp].SelfPtr-(uiArgs-10)) < (union CValue *)pSys->m_pStackSegment )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return;

memcpy(pSys->m_Regs[rsp].SelfPtr, values+10, sizeof(union CValue)*(uiArgs-10));
pSys->m_Regs[rsp].SelfPtr -= (uiArgs-10);



union CValue Tagha_PopValue(struct Tagha *restrict const pSys)

union CValue val= .UInt64=0L ;
if( !pSys or !pSys->m_pMemory )
printf("[%sTagha Pop%s]: **** %pSys is NULL%s ****n", KRED, RESET, KGRN, RESET);
return val;

return pSys->m_Regs[ras];


void Tagha_SetCmdArgs(struct Tagha *restrict const pSys, char *argv)

if( !pSys or !pSys->m_pMemory or !argv )
return;

// clear old arguments, if any.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);

// get the size of argument vector
uint32_t newargc = 0;
while( argv[++newargc] != NULL );

// resize our system's argument vector.
if( pSys->m_iArgc != newargc )
pSys->m_iArgc = newargc;
FREE_MEM(pSys->m_pArgv);
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));


/* For Implementing 'int argc' and 'char *argv'
* C Standards dictates the following...
* - The value of argc shall be nonnegative.
* - The parameters argc and argv and the strings pointed to by the argv array shall be modifiable by the program
* - argv[argc] shall be a null pointer.
* - If the value of argc is greater than zero, the array members argv[0] through argv[argc-1] inclusive shall contain pointers to strings, which are given implementation-defined values by the host environment prior to program startup.
* - If the value of argc is greater than zero, the string pointed to by argv[0] represents the program name; argv[0][0] shall be the null character if the program name is not available from the host environment. If the value of argc is greater than one, the strings pointed to by argv[1] through argv[argc-1] represent the program parameters.
*/

// Copy down our argument vector's strings.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
size_t strsize = strlen(argv[i])+1;
pSys->m_pArgv[i].Str = calloc(strsize, sizeof(char));

if( pSys->m_pArgv[i].Str )
strncpy(pSys->m_pArgv[i].Str, argv[i], strsize);
pSys->m_pArgv[i].Str[strsize-1] = 0;


pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



uint32_t Tagha_GetMemSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMemsize;

uint32_t Tagha_GetInstrSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiInstrSize;

uint32_t Tagha_GetMaxInstrs(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMaxInstrs;

uint32_t Tagha_GetNativeCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiNatives;

uint32_t Tagha_GetFuncCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiFuncs;

uint32_t Tagha_GetGlobalsCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiGlobals;

bool Tagha_IsSafemodeActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bSafeMode;

bool Tagha_IsDebugActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bDebugMode;




void Tagha_PrintStack(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .stack Segmentn");

uint32_t size = pSys->m_uiMemsize;
union CValue *p = (union CValue *)(pSys->m_pMemory + (size-1));

while( (uint8_t *)p >= pSys->m_pStackSegment )
if( pSys->m_Regs[rsp].SelfPtr == p )
printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
--p;
/* while( p>=pSys->m_pStackSegment ) */
puts("n");


void Tagha_PrintData(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .data Segmentn");
for( uint8_t *p = pSys->m_pDataSegment ; p > pSys->m_pTextSegment ; --p )
printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintInstrs(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .text Segmentn");
for( uint8_t *p = pSys->m_pMemory ; p <= pSys->m_pTextSegment ; p++ )
printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintPtrs(const struct Tagha *restrict const pSys)

if( !pSys )
return;

puts("DEBUG ...---===---... Printing Pointers...n");
printf("Instruction Ptr: %p
nStack Ptr: %p
nStack Frame Ptr: %pn", pSys->m_Regs[rip].UCharPtr, pSys->m_Regs[rsp].UCharPtr, pSys->m_Regs[rbp].UCharPtr);
puts("n");


void Tagha_PrintErr(const struct Tagha *restrict const pSys, const char *restrict funcname, const char *restrict err, ...)

if( !pSys or !err )
return;

va_list args;
va_start(args, err);
printf("[%sTagha Error%s]: **** %s reported: '", KRED, KNRM, funcname);
vprintf(err, args);
va_end(args);
printf("' ****nCurrent Instr Addr: %s%p

void Tagha_PrintRegData(const struct Tagha *restrict const pSys)

puts("ntPRINTING REGISTER DATA ==========================n");
for( uint8_t i=0 ; i<regsize ; i++ )
printf("register[%s] == %" PRIu64 "n", RegIDToStr(i), pSys->m_Regs[i].UInt64);
puts("tEND OF PRINTING REGISTER DATA ===============n");







share|improve this question





















  • What do you want from this review?
    – chux
    Jan 4 at 4:49










  • @chux well I posted on the OP that I wanted a review of the overall design and structure for the entire software but a mod removed that for some reason.
    – Nergal
    Jan 4 at 5:31










  • Why restrict in is_c_file()?
    – chux
    Jan 6 at 3:12






  • 1




    Code does not compile as tagha.h is missing from this post. Off-site material is not sufficient. Better to include enough code here so it may compiled even if it can not linked.
    – chux
    Jan 6 at 3:16










  • @chux tagha.h is in the github repository. Alright, I'll add it to the OP.
    – Nergal
    Jan 7 at 3:36
















up vote
3
down vote

favorite












Basically ANOTHER follow up to this review here but I've set about in completing the software project. Changes from the previous review is that I've added a complete script format with embedding API, changed the VM to register-based, and formed the calling convention similar to x64's. As an aspiring software engineer, this would be my first ever serious project that I've completed.



Remaining code on GitHub



tagha.h



#ifndef TAGHA_H_INCLUDED
#define TAGHA_H_INCLUDED


#ifdef __cplusplus
extern "C"
#endif

#include <inttypes.h>
#include <stdbool.h>
#include <iso646.h>

/* For Colored Debugging Printing! */
#define KNRM "x1B[0m" // Normal
#define KRED "x1B[31m"
#define KGRN "x1B[32m"
#define KYEL "x1B[33m"
#define KBLU "x1B[34m"
#define KMAG "x1B[35m"
#define KCYN "x1B[36m"
#define KWHT "x1B[37m"
#define RESET "33[0m" // Reset obviously

#define LOOP_COUNTER 1800000000

/*
* type generic Hashmap (uses 64-bit int as pointers to accomodate 32-bit and 64-bit)
*/
typedef struct KeyNode
uint64_t m_pData;
const char *m_strKey;
struct KeyNode *m_pNext;
KeyNode;

typedef struct Hashmap
uint32_t size, count;
struct KeyNode **m_ppTable;
Hashmap;

struct Tagha;
typedef struct Tagha Tagha;


/* the most basic values in C.
* In ALL of programming, there's only 4 fundamental data:
* Integers
* Floats
* Strings
* References
*/
typedef union CValue
bool Bool, *BoolPtr, BoolArr[8];
int8_t Char, *CharPtr, CharArr[8];
int16_t Short, *ShortPtr, ShortArr[4];
int32_t Int32, *Int32Ptr, Int32Arr[2];
int64_t Int64, *Int64Ptr;

uint8_t UChar, *UCharPtr, UCharArr[8];
uint16_t UShort, *UShortPtr, UShortArr[4];
uint32_t UInt32, *UInt32Ptr, UInt32Arr[2];
uint64_t UInt64, *UInt64Ptr;

float Float, *FloatPtr, FloatArr[2];
double Double, *DoublePtr;

void *Ptr, **PtrPtr;
const char *String, **StringPtr;
char *Str, **StrPtr;
union CValue *SelfPtr;
CValue;

/* // Just gonna leave this here in case we ever need it.
#define SIMD_BYTES 32
typedef union SIMDCValue
union CValue cvalue;
bool BoolSIMD[SIMD_BYTES];
int8_t CharSIMD[SIMD_BYTES];
int16_t ShortSIMD[SIMD_BYTES/2];
int32_t Int32SIMD[SIMD_BYTES/4];
int64_t Int64SIMD[SIMD_BYTES/8];

uint8_t UCharSIMD[SIMD_BYTES];
uint16_t UShortSIMD[SIMD_BYTES/2];
uint32_t UInt32SIMD[SIMD_BYTES/4];
uint64_t UInt64SIMD[SIMD_BYTES/8];

float FloatIMD[SIMD_BYTES/4];
double DoubleSIMD[SIMD_BYTES/8];
SIMDCValue;
*/

// API for scripts to call C/C++ host functions.
typedef void (*fnNative_t)(struct Tagha *const pEnv, union CValue params, union CValue *const pRetval, uint32_t uiArgc);

typedef struct NativeInfo
const char *strName; // use as string literals
fnNative_t pFunc;
NativeInfo;


/* addressing modes
* immediate - simple constant value.
* register - register holds the exact data.
* register indirect - register holds memory address and dereferenced. Can be used as displacement as well.
* IPRelative - instruction ptr + offset. required for static data like global vars.
*/
enum AddrMode
Immediate = 1,
Register = 2,
RegIndirect = 4,
//IPRelative = 8, // unused, will be replaced in the future with useful addr mode.
Byte = 16,
TwoBytes = 32,
FourBytes = 64,
EightBytes = 128,
;

// Register ID list
// 13 general purpose use registers + 3 reserved use.
enum RegID
// 'ras' is gen. purpose + accumulator
// all native and tagha func return data that fits within 64-bits goes here.
// natives can only return a single 8-byte piece of data.
// if you need to return larger than 8 bytes...
// use ras, rbs, and rcs. otherwise, return as pointer in ras.
ras=0,rbs,rcs,

// 12 more gen. purpose regs for whatever use.
// when passing arguments, use registers rds to rms
// since params are passed right to left.
// put the rightmost arg in rms.
// thus if you passed 10 args, the 1st arg would be in rds and 10th arg in rms.
rds,
res,rfs,rgs,
rhs,ris,rjs,
rks,rls,rms,

// do not modify after this. Add more registers, if u need, above.
rsp,rbp, // stack ptrs, do not touch
rip, // instr ptr, do not touch as well.
regsize // for lazily updating RegID list
;


// for interactive mode.
/*
typedef struct TokenLine

struct TokenLine *m_pNext;
uint8_t *m_ucBytecode;
uint32_t m_uiNumBytes;
TokenLine;
*/

/* C global definitions.
* usually C modules contain either functions or global vars visibly.
* static variables and functions should NEVER be listed here as
* static data of all types have internal linkage.
* so a static local var, though "global", shouldn't come up in global var data
*/
enum DefType
DefGlobal=0,
DefFunction=1,
;

typedef struct TaghaCDef
uint32_t m_uiOffset; // where is func or global var location in memory?
uint8_t m_ucDefType; // type of definition, true if function.
TaghaCDef;

struct Tagha
union CValue m_Regs[regsize];
uint8_t
*m_pMemory, // script memory, entirely aligned by 8 bytes.
*m_pStackSegment, // stack segment ptr where the stack's lowest address lies.
*m_pDataSegment, // data segment is the address AFTER the stack segment ptr. Aligned by 8 bytes.
*m_pTextSegment // text segment is the address after the last global variable AKA the last opcode.
;
// stores a C/C++ function ptr using the script-side name as the key.
char **m_pstrNativeCalls; // natives string table.
struct Hashmap
*m_pmapNatives, // native C/C++ interface hashmap.
*m_pmapCDefs // stores C definitions data like global vars and functions.
;
union CValue *m_pArgv; // using union to force char** size to 8 bytes.
uint32_t
m_uiMemsize, // total size of m_pMemory
m_uiInstrSize, // size of the text segment
m_uiMaxInstrs, // max amount of instrs a script can execute.
m_uiNatives, // amount of natives the script uses.
m_uiFuncs, // how many functions the script has.
m_uiGlobals // how many globals variables the script has.
;
int32_t m_iArgc;
bool
m_bSafeMode : 1, // does the script want bounds checking?
m_bDebugMode : 1, // print debug info.
m_bZeroFlag : 1 // conditional zero flag.
;
;

/*
* I think you may wanna spend a bit thinking about what scope you want. A VM running 1 "script" (properly called a program, process, or thread) blurs the line between VM and interpreter. Having multiple programs means an OS program has to be built on top of the VM allowing it to run multiple programs concurrently.
*
* There's no reason not to make a good VM, provide one or two compilers/interpreters in its native language. You don't have to write an OS to write code for the VM.
If it's generic enough, somebody can come along later and build an OS on top
*
* Yeah, if you want it to be embedable, then just write an interpreter for one language with hooks to call it in other languages. If you need to run other languages on top, then go for a VM.
*
* There you go, so you don't even really need a VM to embed C
*
* You don't embed clang though. You build a backend so you can write binaries for tagha in any llvm language. You write an os kernel for tagha, allowing the compiler to be run in the machine.
*
* A kernel is a program written for the machine that manages the filesystem, peripherals, and programs running on the machine. With a kernel, you can run compiling systems that are entirely contained in the machine.
Otherwise, you use an external machine to compile the binary, then move the binary into the machine to be run as its program.
*
* Probably the most direct way to do an REPL interpreter is to do the same thing you do compiling; collect text from the script until you have enough to compile a block of code and execute it. It'll be slow because it lacks optimization, but it shouldn't need many changes to your code.
*/


// tagha_exec.c
int32_t Tagha_Exec(struct Tagha *const pSys);
const char *RegIDToStr(enum RegID id);


// tagha_api.c
struct Tagha *Tagha_New(void);
void Tagha_Init(struct Tagha *pSys);
void Tagha_LoadScriptByName(struct Tagha *pSys, char *filename);
void Tagha_LoadScriptFromMemory(struct Tagha *pSys, void *pMemory, uint64_t memsize);
bool Tagha_RegisterNatives(const struct Tagha *pSys, struct NativeInfo arrNatives);
void Tagha_Free(struct Tagha *pSys);
int32_t Tagha_RunScript(struct Tagha *pSys);
int32_t Tagha_CallFunc(struct Tagha *pSys, const char *strFunc);

#ifndef FREE_MEM
#define FREE_MEM(ptr) if( (ptr) ) free( (ptr) ), (ptr)=NULL
#endif

void Tagha_BuildFromFile(struct Tagha *pSys, const char *strFilename);
void Tagha_BuildFromPtr(struct Tagha *pSys, void *pProgram, uint64_t Programsize);

void Tagha_PrintPtrs(const struct Tagha *pSys);
void Tagha_PrintStack(const struct Tagha *pSys);
void Tagha_PrintData(const struct Tagha *pSys);
void Tagha_PrintInstrs(const struct Tagha *pSys);
void Tagha_PrintRegData(const struct Tagha *pSys);
void Tagha_Reset(struct Tagha *pSys);

void *Tagha_GetGlobalByName(const struct Tagha *pSys, const char *strGlobalName);
void Tagha_PushValues(struct Tagha *pSys, uint32_t uiArgs, union CValue values);
union CValue Tagha_PopValue(struct Tagha *pSys);
void Tagha_SetCmdArgs(struct Tagha *pSys, char *argv);

uint32_t Tagha_GetMemSize(const struct Tagha *pSys);
uint32_t Tagha_GetInstrSize(const struct Tagha *pSys);
uint32_t Tagha_GetMaxInstrs(const struct Tagha *pSys);
uint32_t Tagha_GetNativeCount(const struct Tagha *pSys);
uint32_t Tagha_GetFuncCount(const struct Tagha *pSys);
uint32_t Tagha_GetGlobalsCount(const struct Tagha *pSys);
bool Tagha_IsSafemodeActive(const struct Tagha *pSys);
bool Tagha_IsDebugActive(const struct Tagha *pSys);
void Tagha_PrintErr(const struct Tagha *pSys, const char *funcname, const char *err, ...);

// ds.c
struct Hashmap *Map_New(void);
void Map_Init(struct Hashmap *map);
void Map_Free(struct Hashmap *map);
uint64_t Map_Len(const struct Hashmap *map);

void Map_Rehash(struct Hashmap *map);
bool Map_Insert(struct Hashmap *map, const char *strKey, uint64_t pData);
uint64_t Map_Get(const struct Hashmap *map, const char *strKey);
void Map_Set(const struct Hashmap *map, const char *strKey, uint64_t pData);
void Map_Delete(struct Hashmap *map, const char *strKey);
bool Map_HasKey(const struct Hashmap *map, const char *strKey);
const char *Map_GetKey(const struct Hashmap *map, const char *strKey);

/*
void Map_Rehash_int(struct Hashmap *);
bool Map_Insert_int(struct Hashmap *, const uint64_t, void *);
void *Map_Get_int(const struct Hashmap *, const uint64_t);
void Map_Delete_int(struct Hashmap *, const uint64_t);
bool Map_HasKey_int(const struct Hashmap *, const uint64_t);
*/
uint64_t gethash64(const char *strKey);
uint32_t gethash32(const char *strKey);
uint64_t int64hash(uint64_t x);
uint32_t int32hash(uint32_t x);


/*
* r = register is first operand
* m = memory address is first operand
*/
#define INSTR_SET
X(halt)
/* single operand opcodes */
/* stack ops */
X(push) X(pop)
/* unary arithmetic and bitwise ops */
X(neg) X(inc) X(dec) X(bnot)
/* jump ops */
X(jmp) X(jz) X(jnz)

/* subroutine ops */
X(call) X(ret) X(callnat)

/* two operand opcodes */
X(movr) X(movm) X(lea)
/* signed and unsigned integer arithmetic ops */
X(addr) X(addm) X(uaddr) X(uaddm)
X(subr) X(subm) X(usubr) X(usubm)
X(mulr) X(mulm) X(umulr) X(umulm)
X(divr) X(divm) X(udivr) X(udivm)
X(modr) X(modm) X(umodr) X(umodm)
/* bitwise ops */
X(shrr) X(shrm) X(shlr) X(shlm)
X(andr) X(andm) X(orr) X(orm) X(xorr) X(xorm)
/* comparison ops */
X(ltr) X(ltm) X(ultr) X(ultm)
X(gtr) X(gtm) X(ugtr) X(ugtm)
X(cmpr) X(cmpm) X(ucmpr) X(ucmpm)
X(neqr) X(neqm) X(uneqr) X(uneqm)
X(reset)

/* floating point opcodes */
X(int2float) X(int2dbl) X(float2dbl) X(dbl2float)
X(faddr) X(faddm) X(fsubr) X(fsubm) X(fmulr) X(fmulm) X(fdivr) X(fdivm)
X(fneg) X(fltr) X(fltm) X(fgtr) X(fgtm) X(fcmpr) X(fcmpm) X(fneqr) X(fneqm)
/* misc opcodes */
X(nop)

#define X(x) x,
enum InstrSet INSTR_SET ;
#undef X

#ifdef __cplusplus

#endif

#endif // TAGHA_H_INCLUDED


tagha_api.c



#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "tagha.h"


/* Tagha File Structure (Dec 23, 2017)
* ------------------------------ start of header ------------------------------
* 2 bytes: magic verifier ==> 0xC0DE
* 4 bytes: stack segment size (aligned by 8 bytes)
* 4 bytes: data segment size
* 1 byte: safemode and debugmode flags
* ------------------------------ end of header ------------------------------
* .natives table
* 4 bytes: amount of natives
* n bytes: native table
* 4 bytes: string size + '' of native string
* n bytes: native string.
*
* .functions table
* 4 bytes: amount of functions
* n bytes: functions table
* 4 bytes: string size + '' of func string
* n bytes: function string
* 4 bytes: offset
*
* .globalvars table
* 4 bytes: amount of global vars
* n bytes: global vars table
* 4 bytes: string size + '' of global var string
* n bytes: global var string
* 4 bytes: offset
*
* n bytes: .data section initial values.
* n bytes: .text section
*/

static uint64_t get_file_size(FILE *pFile);
static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile);


struct Tagha *Tagha_New(void)

struct Tagha *pNewVM = calloc(1, sizeof(struct Tagha));
Tagha_Init(pNewVM);
return pNewVM;


void Tagha_Init(struct Tagha *restrict const pSys)

if( !pSys )
return;

*pSys = (struct Tagha)0;

if( !pSys->m_pmapCDefs )
pSys->m_pmapCDefs = Map_New();
// if we can't allocate our C Definitions
// we can't run code in general as the definitions
// contain functions and global vars!
if( !pSys->m_pmapCDefs )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize C Definitions Map%s ****n", KRED, RESET, KGRN, RESET);
return;

else Map_Init(pSys->m_pmapCDefs);

if( !pSys->m_pmapNatives )
pSys->m_pmapNatives = Map_New();
if( !pSys->m_pmapNatives )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Native Map%s ****n", KRED, RESET, KGRN, RESET);
else Map_Init(pSys->m_pmapNatives);

if( !pSys->m_pArgv )
pSys->m_iArgc = 0;
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));
if( !pSys->m_pArgv )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Command-line Args String Vector%s ****n", KRED, RESET, KGRN, RESET);
else pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



static bool is_c_file(const char *restrict filename)

if( !filename )
return false;

// iterate to end of string and then check backwards.
while( *++filename );
int16_t i = *(int16_t *)(filename-2);
return( i==0x632E

static bool is_tbc_file(const char *restrict filename)
i==0x4342542E );



void Tagha_LoadScriptByName(struct Tagha *const pSys, char *restrict strFilename)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromFile(pSys, strFilename);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;



void Tagha_LoadScriptFromMemory(struct Tagha *const pSys, void *restrict pMemory, const uint64_t memsize)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromPtr(pSys, pMemory, memsize);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;




bool Tagha_RegisterNatives(const struct Tagha *restrict const pSys, struct NativeInfo arrNatives)

if( !pSys or !pSys->m_pmapNatives or !arrNatives )
return false;

for( struct NativeInfo *n=arrNatives ; n->pFunc and n->strName ; n++ )
Map_Insert(pSys->m_pmapNatives, n->strName, (uintptr_t)n->pFunc);
return true;



int32_t Tagha_RunScript(struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return -1;

// make sure 'main' exists.
else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call main with a NULL definition table!");
return -1;

// make sure we have the memory for running.
else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-32) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;

// get instruction offset to main.
struct TaghaCDef *pMainData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, "main");
if( !pMainData or pMainData->m_ucDefType != DefFunction )
Tagha_PrintErr(pSys, __func__, "function 'main' doesn't exist!");
return -1;

pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pMainData->m_uiOffset;

// push argv and argc to registers.
// use 'uintptr_t' so we can force 4-byte pointers as 8-byte.
pSys->m_Regs[res].UInt64 = (uintptr_t)pSys->m_pArgv;
pSys->m_Regs[rds].Int64 = pSys->m_iArgc;

(--pSys->m_Regs[rsp].SelfPtr)->Int64 = -1L; // push bullshit ret address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64; // push rbp

if( pSys->m_bDebugMode )
printf("Tagha_RunScript :: pushed argc: %" PRIi32 " and argv %pn", pSys->m_iArgc, pSys->m_pArgv);

return Tagha_Exec(pSys);


int32_t Tagha_CallFunc(struct Tagha *restrict const pSys, const char *restrict strFunc)

if( !pSys or !strFunc )
return -1;

else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call functions using a NULL function table!");
return -1;

else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-16) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;


struct TaghaCDef *pFuncData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strFunc);
if( !pFuncData or pFuncData->m_ucDefType != DefFunction)
Tagha_PrintErr(pSys, __func__, "function '%s' doesn't exist!", strFunc);
return -1;


// save return address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = (uintptr_t)pSys->m_Regs[rip].UCharPtr+1;

// jump to the function entry address.
pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pFuncData->m_uiOffset;

// push bp and copy sp to bp.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64;

return Tagha_Exec(pSys);



// need this to determine the text segment size.
static uint64_t get_file_size(FILE *restrict pFile)

uint64_t size = 0L;
if( !pFile )
return size;

if( !fseek(pFile, 0, SEEK_END) )
size = (uint64_t)ftell(pFile);
rewind(pFile);

return size;


static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignores = 0;

// see if the script is using any natives.
pSys->m_pstrNativeCalls = NULL;
ignores = fread(&pSys->m_uiNatives, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Natives: '%" PRIu32 "'n", pSys->m_uiNatives);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiNatives )
return bytecount;

// script has natives? Copy their names so we can use them on VM natives hashmap later.
pSys->m_pstrNativeCalls = calloc(pSys->m_uiNatives, sizeof(char *));
if( !pSys->m_pstrNativeCalls )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native Table%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


for( uint32_t i=0 ; i<pSys->m_uiNatives ; i++ )
uint32_t str_size;
ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate memory to hold the native's name.
pSys->m_pstrNativeCalls[i] = calloc(str_size, sizeof(char));
if( !pSys->m_pstrNativeCalls[i] )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


// read in the native's name.
ignores = fread(pSys->m_pstrNativeCalls[i], sizeof(char), str_size, *ppFile);
bytecount += str_size;
printf("[Tagha Load Script] :: Copied Native Name: '%s' @ %pn", pSys->m_pstrNativeCalls[i], pSys->m_pstrNativeCalls+i);

pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// see if the script has its own functions.
// This table is so host or other script can call these functions by name or address.
ignore_warns = fread(&pSys->m_uiFuncs, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiFuncs )
return bytecount;

// copy the function data from the header.
for( uint32_t i=0 ; i<pSys->m_uiFuncs ; i++ )
uint32_t str_size;
ignore_warns = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate the hashmap function key.
char *strFunc = calloc(str_size, sizeof(char));
if( !strFunc )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Table String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */
ignore_warns = fread(strFunc, sizeof(char), str_size, *ppFile);
bytecount += str_size;

// copy func's header data to our table
// then store the table to our function hashmap with the key
// we allocated earlier.
struct TaghaCDef *pFuncData = calloc(1, sizeof(struct TaghaCDef));
if( !pFuncData )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Data%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */

ignore_warns = fread(&pFuncData->m_uiOffset, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
printf("[Tagha Load Script] :: Copied Function name '%s' /* for */
pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// check if the script has global variables.
ignore_warns = fread(&pSys->m_uiGlobals, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Global Vars: %" PRIu32 "n", pSys->m_uiGlobals);
bytecount += sizeof(uint32_t);
uint32_t globalbytes = 0;
if( !pSys->m_uiGlobals )
return bytecount;

for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) offset: %" PRIu32 "n", strGlobal, pGlobalData->m_uiOffset);

// insert the global var's table to our hashmap.
pGlobalData->m_ucDefType = DefGlobal;
Map_Insert(pSys->m_pmapCDefs, strGlobal, (uintptr_t)pGlobalData);
strGlobal = NULL; pGlobalData = NULL;
/* for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) */
pSys = NULL;
return bytecount;


void Tagha_BuildFromFile(struct Tagha *pSys, const char *restrict strFilename)
highest address


void Tagha_BuildFromPtr(struct Tagha *restrict pSys, void *pProgram, const uint64_t Programsize)
data segment

void Tagha_Free(struct Tagha *restrict const pSys)

if( !pSys )
return;

// kill memory
FREE_MEM(pSys->m_pMemory);

// free our native table
uint32_t i, Size;
if( pSys->m_pstrNativeCalls )
for( i=0 ; i<pSys->m_uiNatives ; i++ )
FREE_MEM(pSys->m_pstrNativeCalls[i]);

memset(pSys->m_pstrNativeCalls, 0, pSys->m_uiNatives);
FREE_MEM(pSys->m_pstrNativeCalls);

// free our C definitions hashmap and all the tables in it.
if( pSys->m_pmapCDefs )
struct KeyNode
*restrict kv = NULL,
*next = NULL
;
Size = pSys->m_pmapCDefs->size;
for( i=0 ; i<Size ; i++ )
for( kv = pSys->m_pmapCDefs->m_ppTable[i] ; kv ; kv = next )
next = kv->m_pNext;
if( kv->m_pData )
free((struct TaghaCDef *)(uintptr_t)kv->m_pData), kv->m_pData = 0;
if( kv->m_strKey )
free((char *)kv->m_strKey), kv->m_strKey = NULL;


Map_Free(pSys->m_pmapCDefs);
FREE_MEM(pSys->m_pmapCDefs);


// since the system's native hashmap has nothing allocated,
// we just free the hashmap's internal data and then the hashmap itself.
if( pSys->m_pmapNatives )
Map_Free(pSys->m_pmapNatives);
FREE_MEM(pSys->m_pmapNatives);


// free our script argument vector.
if( pSys->m_pArgv )
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);
FREE_MEM(pSys->m_pArgv);


// set our stack pointer pointers to NULL
pSys->m_Regs[rip].UCharPtr = pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = NULL;



void Tagha_Reset(struct Tagha *restrict const pSys)

if( !pSys )
return;

// resets the script without crashing Tagha and the host.
memset(pSys->m_pTextSegment+1, 0, pSys->m_uiMemsize-pSys->m_uiInstrSize);
pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = pSys->m_pMemory + (pSys->m_uiMemsize-1);

memset(pSys->m_Regs, 0, sizeof(union CValue) * rsp);
// TODO: reset global variable data to original values?



void *Tagha_GetGlobalByName(const struct Tagha *restrict const pSys, const char *restrict strGlobalName)

if( !pSys or !pSys->m_pmapCDefs )
return NULL;

// get the global's .data segment offset then return the pointer to that offset.
struct TaghaCDef *pOffset = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strGlobalName);
return pOffset and pOffset->m_ucDefType==DefGlobal ? (pSys->m_pTextSegment+1)+ pOffset->m_uiOffset : NULL;


void Tagha_PushValues(struct Tagha *restrict const pSys, const uint32_t uiArgs, union CValue values)

if( !pSys or !pSys->m_pMemory )
return;

// remember that arguments must be passed right to left.
// we have enough args to fit in registers.
if( uiArgs <= 10 )
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*uiArgs);

// we have too many args, use both regs and stack.
else if( uiArgs>10 )
// first push args into reg.
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*10);

// next, push the remaining values from last to first.
if( pSys->m_bSafeMode and (pSys->m_Regs[rsp].SelfPtr-(uiArgs-10)) < (union CValue *)pSys->m_pStackSegment )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return;

memcpy(pSys->m_Regs[rsp].SelfPtr, values+10, sizeof(union CValue)*(uiArgs-10));
pSys->m_Regs[rsp].SelfPtr -= (uiArgs-10);



union CValue Tagha_PopValue(struct Tagha *restrict const pSys)

union CValue val= .UInt64=0L ;
if( !pSys or !pSys->m_pMemory )
printf("[%sTagha Pop%s]: **** %pSys is NULL%s ****n", KRED, RESET, KGRN, RESET);
return val;

return pSys->m_Regs[ras];


void Tagha_SetCmdArgs(struct Tagha *restrict const pSys, char *argv)

if( !pSys or !pSys->m_pMemory or !argv )
return;

// clear old arguments, if any.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);

// get the size of argument vector
uint32_t newargc = 0;
while( argv[++newargc] != NULL );

// resize our system's argument vector.
if( pSys->m_iArgc != newargc )
pSys->m_iArgc = newargc;
FREE_MEM(pSys->m_pArgv);
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));


/* For Implementing 'int argc' and 'char *argv'
* C Standards dictates the following...
* - The value of argc shall be nonnegative.
* - The parameters argc and argv and the strings pointed to by the argv array shall be modifiable by the program
* - argv[argc] shall be a null pointer.
* - If the value of argc is greater than zero, the array members argv[0] through argv[argc-1] inclusive shall contain pointers to strings, which are given implementation-defined values by the host environment prior to program startup.
* - If the value of argc is greater than zero, the string pointed to by argv[0] represents the program name; argv[0][0] shall be the null character if the program name is not available from the host environment. If the value of argc is greater than one, the strings pointed to by argv[1] through argv[argc-1] represent the program parameters.
*/

// Copy down our argument vector's strings.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
size_t strsize = strlen(argv[i])+1;
pSys->m_pArgv[i].Str = calloc(strsize, sizeof(char));

if( pSys->m_pArgv[i].Str )
strncpy(pSys->m_pArgv[i].Str, argv[i], strsize);
pSys->m_pArgv[i].Str[strsize-1] = 0;


pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



uint32_t Tagha_GetMemSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMemsize;

uint32_t Tagha_GetInstrSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiInstrSize;

uint32_t Tagha_GetMaxInstrs(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMaxInstrs;

uint32_t Tagha_GetNativeCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiNatives;

uint32_t Tagha_GetFuncCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiFuncs;

uint32_t Tagha_GetGlobalsCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiGlobals;

bool Tagha_IsSafemodeActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bSafeMode;

bool Tagha_IsDebugActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bDebugMode;




void Tagha_PrintStack(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .stack Segmentn");

uint32_t size = pSys->m_uiMemsize;
union CValue *p = (union CValue *)(pSys->m_pMemory + (size-1));

while( (uint8_t *)p >= pSys->m_pStackSegment )
if( pSys->m_Regs[rsp].SelfPtr == p )
printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
--p;
/* while( p>=pSys->m_pStackSegment ) */
puts("n");


void Tagha_PrintData(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .data Segmentn");
for( uint8_t *p = pSys->m_pDataSegment ; p > pSys->m_pTextSegment ; --p )
printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintInstrs(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .text Segmentn");
for( uint8_t *p = pSys->m_pMemory ; p <= pSys->m_pTextSegment ; p++ )
printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintPtrs(const struct Tagha *restrict const pSys)

if( !pSys )
return;

puts("DEBUG ...---===---... Printing Pointers...n");
printf("Instruction Ptr: %p
nStack Ptr: %p
nStack Frame Ptr: %pn", pSys->m_Regs[rip].UCharPtr, pSys->m_Regs[rsp].UCharPtr, pSys->m_Regs[rbp].UCharPtr);
puts("n");


void Tagha_PrintErr(const struct Tagha *restrict const pSys, const char *restrict funcname, const char *restrict err, ...)

if( !pSys or !err )
return;

va_list args;
va_start(args, err);
printf("[%sTagha Error%s]: **** %s reported: '", KRED, KNRM, funcname);
vprintf(err, args);
va_end(args);
printf("' ****nCurrent Instr Addr: %s%p

void Tagha_PrintRegData(const struct Tagha *restrict const pSys)

puts("ntPRINTING REGISTER DATA ==========================n");
for( uint8_t i=0 ; i<regsize ; i++ )
printf("register[%s] == %" PRIu64 "n", RegIDToStr(i), pSys->m_Regs[i].UInt64);
puts("tEND OF PRINTING REGISTER DATA ===============n");







share|improve this question





















  • What do you want from this review?
    – chux
    Jan 4 at 4:49










  • @chux well I posted on the OP that I wanted a review of the overall design and structure for the entire software but a mod removed that for some reason.
    – Nergal
    Jan 4 at 5:31










  • Why restrict in is_c_file()?
    – chux
    Jan 6 at 3:12






  • 1




    Code does not compile as tagha.h is missing from this post. Off-site material is not sufficient. Better to include enough code here so it may compiled even if it can not linked.
    – chux
    Jan 6 at 3:16










  • @chux tagha.h is in the github repository. Alright, I'll add it to the OP.
    – Nergal
    Jan 7 at 3:36












up vote
3
down vote

favorite









up vote
3
down vote

favorite











Basically ANOTHER follow up to this review here but I've set about in completing the software project. Changes from the previous review is that I've added a complete script format with embedding API, changed the VM to register-based, and formed the calling convention similar to x64's. As an aspiring software engineer, this would be my first ever serious project that I've completed.



Remaining code on GitHub



tagha.h



#ifndef TAGHA_H_INCLUDED
#define TAGHA_H_INCLUDED


#ifdef __cplusplus
extern "C"
#endif

#include <inttypes.h>
#include <stdbool.h>
#include <iso646.h>

/* For Colored Debugging Printing! */
#define KNRM "x1B[0m" // Normal
#define KRED "x1B[31m"
#define KGRN "x1B[32m"
#define KYEL "x1B[33m"
#define KBLU "x1B[34m"
#define KMAG "x1B[35m"
#define KCYN "x1B[36m"
#define KWHT "x1B[37m"
#define RESET "33[0m" // Reset obviously

#define LOOP_COUNTER 1800000000

/*
* type generic Hashmap (uses 64-bit int as pointers to accomodate 32-bit and 64-bit)
*/
typedef struct KeyNode
uint64_t m_pData;
const char *m_strKey;
struct KeyNode *m_pNext;
KeyNode;

typedef struct Hashmap
uint32_t size, count;
struct KeyNode **m_ppTable;
Hashmap;

struct Tagha;
typedef struct Tagha Tagha;


/* the most basic values in C.
* In ALL of programming, there's only 4 fundamental data:
* Integers
* Floats
* Strings
* References
*/
typedef union CValue
bool Bool, *BoolPtr, BoolArr[8];
int8_t Char, *CharPtr, CharArr[8];
int16_t Short, *ShortPtr, ShortArr[4];
int32_t Int32, *Int32Ptr, Int32Arr[2];
int64_t Int64, *Int64Ptr;

uint8_t UChar, *UCharPtr, UCharArr[8];
uint16_t UShort, *UShortPtr, UShortArr[4];
uint32_t UInt32, *UInt32Ptr, UInt32Arr[2];
uint64_t UInt64, *UInt64Ptr;

float Float, *FloatPtr, FloatArr[2];
double Double, *DoublePtr;

void *Ptr, **PtrPtr;
const char *String, **StringPtr;
char *Str, **StrPtr;
union CValue *SelfPtr;
CValue;

/* // Just gonna leave this here in case we ever need it.
#define SIMD_BYTES 32
typedef union SIMDCValue
union CValue cvalue;
bool BoolSIMD[SIMD_BYTES];
int8_t CharSIMD[SIMD_BYTES];
int16_t ShortSIMD[SIMD_BYTES/2];
int32_t Int32SIMD[SIMD_BYTES/4];
int64_t Int64SIMD[SIMD_BYTES/8];

uint8_t UCharSIMD[SIMD_BYTES];
uint16_t UShortSIMD[SIMD_BYTES/2];
uint32_t UInt32SIMD[SIMD_BYTES/4];
uint64_t UInt64SIMD[SIMD_BYTES/8];

float FloatIMD[SIMD_BYTES/4];
double DoubleSIMD[SIMD_BYTES/8];
SIMDCValue;
*/

// API for scripts to call C/C++ host functions.
typedef void (*fnNative_t)(struct Tagha *const pEnv, union CValue params, union CValue *const pRetval, uint32_t uiArgc);

typedef struct NativeInfo
const char *strName; // use as string literals
fnNative_t pFunc;
NativeInfo;


/* addressing modes
* immediate - simple constant value.
* register - register holds the exact data.
* register indirect - register holds memory address and dereferenced. Can be used as displacement as well.
* IPRelative - instruction ptr + offset. required for static data like global vars.
*/
enum AddrMode
Immediate = 1,
Register = 2,
RegIndirect = 4,
//IPRelative = 8, // unused, will be replaced in the future with useful addr mode.
Byte = 16,
TwoBytes = 32,
FourBytes = 64,
EightBytes = 128,
;

// Register ID list
// 13 general purpose use registers + 3 reserved use.
enum RegID
// 'ras' is gen. purpose + accumulator
// all native and tagha func return data that fits within 64-bits goes here.
// natives can only return a single 8-byte piece of data.
// if you need to return larger than 8 bytes...
// use ras, rbs, and rcs. otherwise, return as pointer in ras.
ras=0,rbs,rcs,

// 12 more gen. purpose regs for whatever use.
// when passing arguments, use registers rds to rms
// since params are passed right to left.
// put the rightmost arg in rms.
// thus if you passed 10 args, the 1st arg would be in rds and 10th arg in rms.
rds,
res,rfs,rgs,
rhs,ris,rjs,
rks,rls,rms,

// do not modify after this. Add more registers, if u need, above.
rsp,rbp, // stack ptrs, do not touch
rip, // instr ptr, do not touch as well.
regsize // for lazily updating RegID list
;


// for interactive mode.
/*
typedef struct TokenLine

struct TokenLine *m_pNext;
uint8_t *m_ucBytecode;
uint32_t m_uiNumBytes;
TokenLine;
*/

/* C global definitions.
* usually C modules contain either functions or global vars visibly.
* static variables and functions should NEVER be listed here as
* static data of all types have internal linkage.
* so a static local var, though "global", shouldn't come up in global var data
*/
enum DefType
DefGlobal=0,
DefFunction=1,
;

typedef struct TaghaCDef
uint32_t m_uiOffset; // where is func or global var location in memory?
uint8_t m_ucDefType; // type of definition, true if function.
TaghaCDef;

struct Tagha
union CValue m_Regs[regsize];
uint8_t
*m_pMemory, // script memory, entirely aligned by 8 bytes.
*m_pStackSegment, // stack segment ptr where the stack's lowest address lies.
*m_pDataSegment, // data segment is the address AFTER the stack segment ptr. Aligned by 8 bytes.
*m_pTextSegment // text segment is the address after the last global variable AKA the last opcode.
;
// stores a C/C++ function ptr using the script-side name as the key.
char **m_pstrNativeCalls; // natives string table.
struct Hashmap
*m_pmapNatives, // native C/C++ interface hashmap.
*m_pmapCDefs // stores C definitions data like global vars and functions.
;
union CValue *m_pArgv; // using union to force char** size to 8 bytes.
uint32_t
m_uiMemsize, // total size of m_pMemory
m_uiInstrSize, // size of the text segment
m_uiMaxInstrs, // max amount of instrs a script can execute.
m_uiNatives, // amount of natives the script uses.
m_uiFuncs, // how many functions the script has.
m_uiGlobals // how many globals variables the script has.
;
int32_t m_iArgc;
bool
m_bSafeMode : 1, // does the script want bounds checking?
m_bDebugMode : 1, // print debug info.
m_bZeroFlag : 1 // conditional zero flag.
;
;

/*
* I think you may wanna spend a bit thinking about what scope you want. A VM running 1 "script" (properly called a program, process, or thread) blurs the line between VM and interpreter. Having multiple programs means an OS program has to be built on top of the VM allowing it to run multiple programs concurrently.
*
* There's no reason not to make a good VM, provide one or two compilers/interpreters in its native language. You don't have to write an OS to write code for the VM.
If it's generic enough, somebody can come along later and build an OS on top
*
* Yeah, if you want it to be embedable, then just write an interpreter for one language with hooks to call it in other languages. If you need to run other languages on top, then go for a VM.
*
* There you go, so you don't even really need a VM to embed C
*
* You don't embed clang though. You build a backend so you can write binaries for tagha in any llvm language. You write an os kernel for tagha, allowing the compiler to be run in the machine.
*
* A kernel is a program written for the machine that manages the filesystem, peripherals, and programs running on the machine. With a kernel, you can run compiling systems that are entirely contained in the machine.
Otherwise, you use an external machine to compile the binary, then move the binary into the machine to be run as its program.
*
* Probably the most direct way to do an REPL interpreter is to do the same thing you do compiling; collect text from the script until you have enough to compile a block of code and execute it. It'll be slow because it lacks optimization, but it shouldn't need many changes to your code.
*/


// tagha_exec.c
int32_t Tagha_Exec(struct Tagha *const pSys);
const char *RegIDToStr(enum RegID id);


// tagha_api.c
struct Tagha *Tagha_New(void);
void Tagha_Init(struct Tagha *pSys);
void Tagha_LoadScriptByName(struct Tagha *pSys, char *filename);
void Tagha_LoadScriptFromMemory(struct Tagha *pSys, void *pMemory, uint64_t memsize);
bool Tagha_RegisterNatives(const struct Tagha *pSys, struct NativeInfo arrNatives);
void Tagha_Free(struct Tagha *pSys);
int32_t Tagha_RunScript(struct Tagha *pSys);
int32_t Tagha_CallFunc(struct Tagha *pSys, const char *strFunc);

#ifndef FREE_MEM
#define FREE_MEM(ptr) if( (ptr) ) free( (ptr) ), (ptr)=NULL
#endif

void Tagha_BuildFromFile(struct Tagha *pSys, const char *strFilename);
void Tagha_BuildFromPtr(struct Tagha *pSys, void *pProgram, uint64_t Programsize);

void Tagha_PrintPtrs(const struct Tagha *pSys);
void Tagha_PrintStack(const struct Tagha *pSys);
void Tagha_PrintData(const struct Tagha *pSys);
void Tagha_PrintInstrs(const struct Tagha *pSys);
void Tagha_PrintRegData(const struct Tagha *pSys);
void Tagha_Reset(struct Tagha *pSys);

void *Tagha_GetGlobalByName(const struct Tagha *pSys, const char *strGlobalName);
void Tagha_PushValues(struct Tagha *pSys, uint32_t uiArgs, union CValue values);
union CValue Tagha_PopValue(struct Tagha *pSys);
void Tagha_SetCmdArgs(struct Tagha *pSys, char *argv);

uint32_t Tagha_GetMemSize(const struct Tagha *pSys);
uint32_t Tagha_GetInstrSize(const struct Tagha *pSys);
uint32_t Tagha_GetMaxInstrs(const struct Tagha *pSys);
uint32_t Tagha_GetNativeCount(const struct Tagha *pSys);
uint32_t Tagha_GetFuncCount(const struct Tagha *pSys);
uint32_t Tagha_GetGlobalsCount(const struct Tagha *pSys);
bool Tagha_IsSafemodeActive(const struct Tagha *pSys);
bool Tagha_IsDebugActive(const struct Tagha *pSys);
void Tagha_PrintErr(const struct Tagha *pSys, const char *funcname, const char *err, ...);

// ds.c
struct Hashmap *Map_New(void);
void Map_Init(struct Hashmap *map);
void Map_Free(struct Hashmap *map);
uint64_t Map_Len(const struct Hashmap *map);

void Map_Rehash(struct Hashmap *map);
bool Map_Insert(struct Hashmap *map, const char *strKey, uint64_t pData);
uint64_t Map_Get(const struct Hashmap *map, const char *strKey);
void Map_Set(const struct Hashmap *map, const char *strKey, uint64_t pData);
void Map_Delete(struct Hashmap *map, const char *strKey);
bool Map_HasKey(const struct Hashmap *map, const char *strKey);
const char *Map_GetKey(const struct Hashmap *map, const char *strKey);

/*
void Map_Rehash_int(struct Hashmap *);
bool Map_Insert_int(struct Hashmap *, const uint64_t, void *);
void *Map_Get_int(const struct Hashmap *, const uint64_t);
void Map_Delete_int(struct Hashmap *, const uint64_t);
bool Map_HasKey_int(const struct Hashmap *, const uint64_t);
*/
uint64_t gethash64(const char *strKey);
uint32_t gethash32(const char *strKey);
uint64_t int64hash(uint64_t x);
uint32_t int32hash(uint32_t x);


/*
* r = register is first operand
* m = memory address is first operand
*/
#define INSTR_SET
X(halt)
/* single operand opcodes */
/* stack ops */
X(push) X(pop)
/* unary arithmetic and bitwise ops */
X(neg) X(inc) X(dec) X(bnot)
/* jump ops */
X(jmp) X(jz) X(jnz)

/* subroutine ops */
X(call) X(ret) X(callnat)

/* two operand opcodes */
X(movr) X(movm) X(lea)
/* signed and unsigned integer arithmetic ops */
X(addr) X(addm) X(uaddr) X(uaddm)
X(subr) X(subm) X(usubr) X(usubm)
X(mulr) X(mulm) X(umulr) X(umulm)
X(divr) X(divm) X(udivr) X(udivm)
X(modr) X(modm) X(umodr) X(umodm)
/* bitwise ops */
X(shrr) X(shrm) X(shlr) X(shlm)
X(andr) X(andm) X(orr) X(orm) X(xorr) X(xorm)
/* comparison ops */
X(ltr) X(ltm) X(ultr) X(ultm)
X(gtr) X(gtm) X(ugtr) X(ugtm)
X(cmpr) X(cmpm) X(ucmpr) X(ucmpm)
X(neqr) X(neqm) X(uneqr) X(uneqm)
X(reset)

/* floating point opcodes */
X(int2float) X(int2dbl) X(float2dbl) X(dbl2float)
X(faddr) X(faddm) X(fsubr) X(fsubm) X(fmulr) X(fmulm) X(fdivr) X(fdivm)
X(fneg) X(fltr) X(fltm) X(fgtr) X(fgtm) X(fcmpr) X(fcmpm) X(fneqr) X(fneqm)
/* misc opcodes */
X(nop)

#define X(x) x,
enum InstrSet INSTR_SET ;
#undef X

#ifdef __cplusplus

#endif

#endif // TAGHA_H_INCLUDED


tagha_api.c



#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "tagha.h"


/* Tagha File Structure (Dec 23, 2017)
* ------------------------------ start of header ------------------------------
* 2 bytes: magic verifier ==> 0xC0DE
* 4 bytes: stack segment size (aligned by 8 bytes)
* 4 bytes: data segment size
* 1 byte: safemode and debugmode flags
* ------------------------------ end of header ------------------------------
* .natives table
* 4 bytes: amount of natives
* n bytes: native table
* 4 bytes: string size + '' of native string
* n bytes: native string.
*
* .functions table
* 4 bytes: amount of functions
* n bytes: functions table
* 4 bytes: string size + '' of func string
* n bytes: function string
* 4 bytes: offset
*
* .globalvars table
* 4 bytes: amount of global vars
* n bytes: global vars table
* 4 bytes: string size + '' of global var string
* n bytes: global var string
* 4 bytes: offset
*
* n bytes: .data section initial values.
* n bytes: .text section
*/

static uint64_t get_file_size(FILE *pFile);
static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile);


struct Tagha *Tagha_New(void)

struct Tagha *pNewVM = calloc(1, sizeof(struct Tagha));
Tagha_Init(pNewVM);
return pNewVM;


void Tagha_Init(struct Tagha *restrict const pSys)

if( !pSys )
return;

*pSys = (struct Tagha)0;

if( !pSys->m_pmapCDefs )
pSys->m_pmapCDefs = Map_New();
// if we can't allocate our C Definitions
// we can't run code in general as the definitions
// contain functions and global vars!
if( !pSys->m_pmapCDefs )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize C Definitions Map%s ****n", KRED, RESET, KGRN, RESET);
return;

else Map_Init(pSys->m_pmapCDefs);

if( !pSys->m_pmapNatives )
pSys->m_pmapNatives = Map_New();
if( !pSys->m_pmapNatives )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Native Map%s ****n", KRED, RESET, KGRN, RESET);
else Map_Init(pSys->m_pmapNatives);

if( !pSys->m_pArgv )
pSys->m_iArgc = 0;
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));
if( !pSys->m_pArgv )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Command-line Args String Vector%s ****n", KRED, RESET, KGRN, RESET);
else pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



static bool is_c_file(const char *restrict filename)

if( !filename )
return false;

// iterate to end of string and then check backwards.
while( *++filename );
int16_t i = *(int16_t *)(filename-2);
return( i==0x632E

static bool is_tbc_file(const char *restrict filename)
i==0x4342542E );



void Tagha_LoadScriptByName(struct Tagha *const pSys, char *restrict strFilename)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromFile(pSys, strFilename);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;



void Tagha_LoadScriptFromMemory(struct Tagha *const pSys, void *restrict pMemory, const uint64_t memsize)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromPtr(pSys, pMemory, memsize);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;




bool Tagha_RegisterNatives(const struct Tagha *restrict const pSys, struct NativeInfo arrNatives)

if( !pSys or !pSys->m_pmapNatives or !arrNatives )
return false;

for( struct NativeInfo *n=arrNatives ; n->pFunc and n->strName ; n++ )
Map_Insert(pSys->m_pmapNatives, n->strName, (uintptr_t)n->pFunc);
return true;



int32_t Tagha_RunScript(struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return -1;

// make sure 'main' exists.
else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call main with a NULL definition table!");
return -1;

// make sure we have the memory for running.
else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-32) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;

// get instruction offset to main.
struct TaghaCDef *pMainData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, "main");
if( !pMainData or pMainData->m_ucDefType != DefFunction )
Tagha_PrintErr(pSys, __func__, "function 'main' doesn't exist!");
return -1;

pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pMainData->m_uiOffset;

// push argv and argc to registers.
// use 'uintptr_t' so we can force 4-byte pointers as 8-byte.
pSys->m_Regs[res].UInt64 = (uintptr_t)pSys->m_pArgv;
pSys->m_Regs[rds].Int64 = pSys->m_iArgc;

(--pSys->m_Regs[rsp].SelfPtr)->Int64 = -1L; // push bullshit ret address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64; // push rbp

if( pSys->m_bDebugMode )
printf("Tagha_RunScript :: pushed argc: %" PRIi32 " and argv %pn", pSys->m_iArgc, pSys->m_pArgv);

return Tagha_Exec(pSys);


int32_t Tagha_CallFunc(struct Tagha *restrict const pSys, const char *restrict strFunc)

if( !pSys or !strFunc )
return -1;

else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call functions using a NULL function table!");
return -1;

else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-16) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;


struct TaghaCDef *pFuncData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strFunc);
if( !pFuncData or pFuncData->m_ucDefType != DefFunction)
Tagha_PrintErr(pSys, __func__, "function '%s' doesn't exist!", strFunc);
return -1;


// save return address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = (uintptr_t)pSys->m_Regs[rip].UCharPtr+1;

// jump to the function entry address.
pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pFuncData->m_uiOffset;

// push bp and copy sp to bp.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64;

return Tagha_Exec(pSys);



// need this to determine the text segment size.
static uint64_t get_file_size(FILE *restrict pFile)

uint64_t size = 0L;
if( !pFile )
return size;

if( !fseek(pFile, 0, SEEK_END) )
size = (uint64_t)ftell(pFile);
rewind(pFile);

return size;


static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignores = 0;

// see if the script is using any natives.
pSys->m_pstrNativeCalls = NULL;
ignores = fread(&pSys->m_uiNatives, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Natives: '%" PRIu32 "'n", pSys->m_uiNatives);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiNatives )
return bytecount;

// script has natives? Copy their names so we can use them on VM natives hashmap later.
pSys->m_pstrNativeCalls = calloc(pSys->m_uiNatives, sizeof(char *));
if( !pSys->m_pstrNativeCalls )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native Table%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


for( uint32_t i=0 ; i<pSys->m_uiNatives ; i++ )
uint32_t str_size;
ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate memory to hold the native's name.
pSys->m_pstrNativeCalls[i] = calloc(str_size, sizeof(char));
if( !pSys->m_pstrNativeCalls[i] )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


// read in the native's name.
ignores = fread(pSys->m_pstrNativeCalls[i], sizeof(char), str_size, *ppFile);
bytecount += str_size;
printf("[Tagha Load Script] :: Copied Native Name: '%s' @ %pn", pSys->m_pstrNativeCalls[i], pSys->m_pstrNativeCalls+i);

pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// see if the script has its own functions.
// This table is so host or other script can call these functions by name or address.
ignore_warns = fread(&pSys->m_uiFuncs, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiFuncs )
return bytecount;

// copy the function data from the header.
for( uint32_t i=0 ; i<pSys->m_uiFuncs ; i++ )
uint32_t str_size;
ignore_warns = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate the hashmap function key.
char *strFunc = calloc(str_size, sizeof(char));
if( !strFunc )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Table String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */
ignore_warns = fread(strFunc, sizeof(char), str_size, *ppFile);
bytecount += str_size;

// copy func's header data to our table
// then store the table to our function hashmap with the key
// we allocated earlier.
struct TaghaCDef *pFuncData = calloc(1, sizeof(struct TaghaCDef));
if( !pFuncData )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Data%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */

ignore_warns = fread(&pFuncData->m_uiOffset, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
printf("[Tagha Load Script] :: Copied Function name '%s' /* for */
pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// check if the script has global variables.
ignore_warns = fread(&pSys->m_uiGlobals, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Global Vars: %" PRIu32 "n", pSys->m_uiGlobals);
bytecount += sizeof(uint32_t);
uint32_t globalbytes = 0;
if( !pSys->m_uiGlobals )
return bytecount;

for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) offset: %" PRIu32 "n", strGlobal, pGlobalData->m_uiOffset);

// insert the global var's table to our hashmap.
pGlobalData->m_ucDefType = DefGlobal;
Map_Insert(pSys->m_pmapCDefs, strGlobal, (uintptr_t)pGlobalData);
strGlobal = NULL; pGlobalData = NULL;
/* for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) */
pSys = NULL;
return bytecount;


void Tagha_BuildFromFile(struct Tagha *pSys, const char *restrict strFilename)
highest address


void Tagha_BuildFromPtr(struct Tagha *restrict pSys, void *pProgram, const uint64_t Programsize)
data segment

void Tagha_Free(struct Tagha *restrict const pSys)

if( !pSys )
return;

// kill memory
FREE_MEM(pSys->m_pMemory);

// free our native table
uint32_t i, Size;
if( pSys->m_pstrNativeCalls )
for( i=0 ; i<pSys->m_uiNatives ; i++ )
FREE_MEM(pSys->m_pstrNativeCalls[i]);

memset(pSys->m_pstrNativeCalls, 0, pSys->m_uiNatives);
FREE_MEM(pSys->m_pstrNativeCalls);

// free our C definitions hashmap and all the tables in it.
if( pSys->m_pmapCDefs )
struct KeyNode
*restrict kv = NULL,
*next = NULL
;
Size = pSys->m_pmapCDefs->size;
for( i=0 ; i<Size ; i++ )
for( kv = pSys->m_pmapCDefs->m_ppTable[i] ; kv ; kv = next )
next = kv->m_pNext;
if( kv->m_pData )
free((struct TaghaCDef *)(uintptr_t)kv->m_pData), kv->m_pData = 0;
if( kv->m_strKey )
free((char *)kv->m_strKey), kv->m_strKey = NULL;


Map_Free(pSys->m_pmapCDefs);
FREE_MEM(pSys->m_pmapCDefs);


// since the system's native hashmap has nothing allocated,
// we just free the hashmap's internal data and then the hashmap itself.
if( pSys->m_pmapNatives )
Map_Free(pSys->m_pmapNatives);
FREE_MEM(pSys->m_pmapNatives);


// free our script argument vector.
if( pSys->m_pArgv )
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);
FREE_MEM(pSys->m_pArgv);


// set our stack pointer pointers to NULL
pSys->m_Regs[rip].UCharPtr = pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = NULL;



void Tagha_Reset(struct Tagha *restrict const pSys)

if( !pSys )
return;

// resets the script without crashing Tagha and the host.
memset(pSys->m_pTextSegment+1, 0, pSys->m_uiMemsize-pSys->m_uiInstrSize);
pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = pSys->m_pMemory + (pSys->m_uiMemsize-1);

memset(pSys->m_Regs, 0, sizeof(union CValue) * rsp);
// TODO: reset global variable data to original values?



void *Tagha_GetGlobalByName(const struct Tagha *restrict const pSys, const char *restrict strGlobalName)

if( !pSys or !pSys->m_pmapCDefs )
return NULL;

// get the global's .data segment offset then return the pointer to that offset.
struct TaghaCDef *pOffset = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strGlobalName);
return pOffset and pOffset->m_ucDefType==DefGlobal ? (pSys->m_pTextSegment+1)+ pOffset->m_uiOffset : NULL;


void Tagha_PushValues(struct Tagha *restrict const pSys, const uint32_t uiArgs, union CValue values)

if( !pSys or !pSys->m_pMemory )
return;

// remember that arguments must be passed right to left.
// we have enough args to fit in registers.
if( uiArgs <= 10 )
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*uiArgs);

// we have too many args, use both regs and stack.
else if( uiArgs>10 )
// first push args into reg.
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*10);

// next, push the remaining values from last to first.
if( pSys->m_bSafeMode and (pSys->m_Regs[rsp].SelfPtr-(uiArgs-10)) < (union CValue *)pSys->m_pStackSegment )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return;

memcpy(pSys->m_Regs[rsp].SelfPtr, values+10, sizeof(union CValue)*(uiArgs-10));
pSys->m_Regs[rsp].SelfPtr -= (uiArgs-10);



union CValue Tagha_PopValue(struct Tagha *restrict const pSys)

union CValue val= .UInt64=0L ;
if( !pSys or !pSys->m_pMemory )
printf("[%sTagha Pop%s]: **** %pSys is NULL%s ****n", KRED, RESET, KGRN, RESET);
return val;

return pSys->m_Regs[ras];


void Tagha_SetCmdArgs(struct Tagha *restrict const pSys, char *argv)

if( !pSys or !pSys->m_pMemory or !argv )
return;

// clear old arguments, if any.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);

// get the size of argument vector
uint32_t newargc = 0;
while( argv[++newargc] != NULL );

// resize our system's argument vector.
if( pSys->m_iArgc != newargc )
pSys->m_iArgc = newargc;
FREE_MEM(pSys->m_pArgv);
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));


/* For Implementing 'int argc' and 'char *argv'
* C Standards dictates the following...
* - The value of argc shall be nonnegative.
* - The parameters argc and argv and the strings pointed to by the argv array shall be modifiable by the program
* - argv[argc] shall be a null pointer.
* - If the value of argc is greater than zero, the array members argv[0] through argv[argc-1] inclusive shall contain pointers to strings, which are given implementation-defined values by the host environment prior to program startup.
* - If the value of argc is greater than zero, the string pointed to by argv[0] represents the program name; argv[0][0] shall be the null character if the program name is not available from the host environment. If the value of argc is greater than one, the strings pointed to by argv[1] through argv[argc-1] represent the program parameters.
*/

// Copy down our argument vector's strings.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
size_t strsize = strlen(argv[i])+1;
pSys->m_pArgv[i].Str = calloc(strsize, sizeof(char));

if( pSys->m_pArgv[i].Str )
strncpy(pSys->m_pArgv[i].Str, argv[i], strsize);
pSys->m_pArgv[i].Str[strsize-1] = 0;


pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



uint32_t Tagha_GetMemSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMemsize;

uint32_t Tagha_GetInstrSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiInstrSize;

uint32_t Tagha_GetMaxInstrs(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMaxInstrs;

uint32_t Tagha_GetNativeCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiNatives;

uint32_t Tagha_GetFuncCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiFuncs;

uint32_t Tagha_GetGlobalsCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiGlobals;

bool Tagha_IsSafemodeActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bSafeMode;

bool Tagha_IsDebugActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bDebugMode;




void Tagha_PrintStack(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .stack Segmentn");

uint32_t size = pSys->m_uiMemsize;
union CValue *p = (union CValue *)(pSys->m_pMemory + (size-1));

while( (uint8_t *)p >= pSys->m_pStackSegment )
if( pSys->m_Regs[rsp].SelfPtr == p )
printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
--p;
/* while( p>=pSys->m_pStackSegment ) */
puts("n");


void Tagha_PrintData(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .data Segmentn");
for( uint8_t *p = pSys->m_pDataSegment ; p > pSys->m_pTextSegment ; --p )
printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintInstrs(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .text Segmentn");
for( uint8_t *p = pSys->m_pMemory ; p <= pSys->m_pTextSegment ; p++ )
printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintPtrs(const struct Tagha *restrict const pSys)

if( !pSys )
return;

puts("DEBUG ...---===---... Printing Pointers...n");
printf("Instruction Ptr: %p
nStack Ptr: %p
nStack Frame Ptr: %pn", pSys->m_Regs[rip].UCharPtr, pSys->m_Regs[rsp].UCharPtr, pSys->m_Regs[rbp].UCharPtr);
puts("n");


void Tagha_PrintErr(const struct Tagha *restrict const pSys, const char *restrict funcname, const char *restrict err, ...)

if( !pSys or !err )
return;

va_list args;
va_start(args, err);
printf("[%sTagha Error%s]: **** %s reported: '", KRED, KNRM, funcname);
vprintf(err, args);
va_end(args);
printf("' ****nCurrent Instr Addr: %s%p

void Tagha_PrintRegData(const struct Tagha *restrict const pSys)

puts("ntPRINTING REGISTER DATA ==========================n");
for( uint8_t i=0 ; i<regsize ; i++ )
printf("register[%s] == %" PRIu64 "n", RegIDToStr(i), pSys->m_Regs[i].UInt64);
puts("tEND OF PRINTING REGISTER DATA ===============n");







share|improve this question













Basically ANOTHER follow up to this review here but I've set about in completing the software project. Changes from the previous review is that I've added a complete script format with embedding API, changed the VM to register-based, and formed the calling convention similar to x64's. As an aspiring software engineer, this would be my first ever serious project that I've completed.



Remaining code on GitHub



tagha.h



#ifndef TAGHA_H_INCLUDED
#define TAGHA_H_INCLUDED


#ifdef __cplusplus
extern "C"
#endif

#include <inttypes.h>
#include <stdbool.h>
#include <iso646.h>

/* For Colored Debugging Printing! */
#define KNRM "x1B[0m" // Normal
#define KRED "x1B[31m"
#define KGRN "x1B[32m"
#define KYEL "x1B[33m"
#define KBLU "x1B[34m"
#define KMAG "x1B[35m"
#define KCYN "x1B[36m"
#define KWHT "x1B[37m"
#define RESET "33[0m" // Reset obviously

#define LOOP_COUNTER 1800000000

/*
* type generic Hashmap (uses 64-bit int as pointers to accomodate 32-bit and 64-bit)
*/
typedef struct KeyNode
uint64_t m_pData;
const char *m_strKey;
struct KeyNode *m_pNext;
KeyNode;

typedef struct Hashmap
uint32_t size, count;
struct KeyNode **m_ppTable;
Hashmap;

struct Tagha;
typedef struct Tagha Tagha;


/* the most basic values in C.
* In ALL of programming, there's only 4 fundamental data:
* Integers
* Floats
* Strings
* References
*/
typedef union CValue
bool Bool, *BoolPtr, BoolArr[8];
int8_t Char, *CharPtr, CharArr[8];
int16_t Short, *ShortPtr, ShortArr[4];
int32_t Int32, *Int32Ptr, Int32Arr[2];
int64_t Int64, *Int64Ptr;

uint8_t UChar, *UCharPtr, UCharArr[8];
uint16_t UShort, *UShortPtr, UShortArr[4];
uint32_t UInt32, *UInt32Ptr, UInt32Arr[2];
uint64_t UInt64, *UInt64Ptr;

float Float, *FloatPtr, FloatArr[2];
double Double, *DoublePtr;

void *Ptr, **PtrPtr;
const char *String, **StringPtr;
char *Str, **StrPtr;
union CValue *SelfPtr;
CValue;

/* // Just gonna leave this here in case we ever need it.
#define SIMD_BYTES 32
typedef union SIMDCValue
union CValue cvalue;
bool BoolSIMD[SIMD_BYTES];
int8_t CharSIMD[SIMD_BYTES];
int16_t ShortSIMD[SIMD_BYTES/2];
int32_t Int32SIMD[SIMD_BYTES/4];
int64_t Int64SIMD[SIMD_BYTES/8];

uint8_t UCharSIMD[SIMD_BYTES];
uint16_t UShortSIMD[SIMD_BYTES/2];
uint32_t UInt32SIMD[SIMD_BYTES/4];
uint64_t UInt64SIMD[SIMD_BYTES/8];

float FloatIMD[SIMD_BYTES/4];
double DoubleSIMD[SIMD_BYTES/8];
SIMDCValue;
*/

// API for scripts to call C/C++ host functions.
typedef void (*fnNative_t)(struct Tagha *const pEnv, union CValue params, union CValue *const pRetval, uint32_t uiArgc);

typedef struct NativeInfo
const char *strName; // use as string literals
fnNative_t pFunc;
NativeInfo;


/* addressing modes
* immediate - simple constant value.
* register - register holds the exact data.
* register indirect - register holds memory address and dereferenced. Can be used as displacement as well.
* IPRelative - instruction ptr + offset. required for static data like global vars.
*/
enum AddrMode
Immediate = 1,
Register = 2,
RegIndirect = 4,
//IPRelative = 8, // unused, will be replaced in the future with useful addr mode.
Byte = 16,
TwoBytes = 32,
FourBytes = 64,
EightBytes = 128,
;

// Register ID list
// 13 general purpose use registers + 3 reserved use.
enum RegID
// 'ras' is gen. purpose + accumulator
// all native and tagha func return data that fits within 64-bits goes here.
// natives can only return a single 8-byte piece of data.
// if you need to return larger than 8 bytes...
// use ras, rbs, and rcs. otherwise, return as pointer in ras.
ras=0,rbs,rcs,

// 12 more gen. purpose regs for whatever use.
// when passing arguments, use registers rds to rms
// since params are passed right to left.
// put the rightmost arg in rms.
// thus if you passed 10 args, the 1st arg would be in rds and 10th arg in rms.
rds,
res,rfs,rgs,
rhs,ris,rjs,
rks,rls,rms,

// do not modify after this. Add more registers, if u need, above.
rsp,rbp, // stack ptrs, do not touch
rip, // instr ptr, do not touch as well.
regsize // for lazily updating RegID list
;


// for interactive mode.
/*
typedef struct TokenLine

struct TokenLine *m_pNext;
uint8_t *m_ucBytecode;
uint32_t m_uiNumBytes;
TokenLine;
*/

/* C global definitions.
* usually C modules contain either functions or global vars visibly.
* static variables and functions should NEVER be listed here as
* static data of all types have internal linkage.
* so a static local var, though "global", shouldn't come up in global var data
*/
enum DefType
DefGlobal=0,
DefFunction=1,
;

typedef struct TaghaCDef
uint32_t m_uiOffset; // where is func or global var location in memory?
uint8_t m_ucDefType; // type of definition, true if function.
TaghaCDef;

struct Tagha
union CValue m_Regs[regsize];
uint8_t
*m_pMemory, // script memory, entirely aligned by 8 bytes.
*m_pStackSegment, // stack segment ptr where the stack's lowest address lies.
*m_pDataSegment, // data segment is the address AFTER the stack segment ptr. Aligned by 8 bytes.
*m_pTextSegment // text segment is the address after the last global variable AKA the last opcode.
;
// stores a C/C++ function ptr using the script-side name as the key.
char **m_pstrNativeCalls; // natives string table.
struct Hashmap
*m_pmapNatives, // native C/C++ interface hashmap.
*m_pmapCDefs // stores C definitions data like global vars and functions.
;
union CValue *m_pArgv; // using union to force char** size to 8 bytes.
uint32_t
m_uiMemsize, // total size of m_pMemory
m_uiInstrSize, // size of the text segment
m_uiMaxInstrs, // max amount of instrs a script can execute.
m_uiNatives, // amount of natives the script uses.
m_uiFuncs, // how many functions the script has.
m_uiGlobals // how many globals variables the script has.
;
int32_t m_iArgc;
bool
m_bSafeMode : 1, // does the script want bounds checking?
m_bDebugMode : 1, // print debug info.
m_bZeroFlag : 1 // conditional zero flag.
;
;

/*
* I think you may wanna spend a bit thinking about what scope you want. A VM running 1 "script" (properly called a program, process, or thread) blurs the line between VM and interpreter. Having multiple programs means an OS program has to be built on top of the VM allowing it to run multiple programs concurrently.
*
* There's no reason not to make a good VM, provide one or two compilers/interpreters in its native language. You don't have to write an OS to write code for the VM.
If it's generic enough, somebody can come along later and build an OS on top
*
* Yeah, if you want it to be embedable, then just write an interpreter for one language with hooks to call it in other languages. If you need to run other languages on top, then go for a VM.
*
* There you go, so you don't even really need a VM to embed C
*
* You don't embed clang though. You build a backend so you can write binaries for tagha in any llvm language. You write an os kernel for tagha, allowing the compiler to be run in the machine.
*
* A kernel is a program written for the machine that manages the filesystem, peripherals, and programs running on the machine. With a kernel, you can run compiling systems that are entirely contained in the machine.
Otherwise, you use an external machine to compile the binary, then move the binary into the machine to be run as its program.
*
* Probably the most direct way to do an REPL interpreter is to do the same thing you do compiling; collect text from the script until you have enough to compile a block of code and execute it. It'll be slow because it lacks optimization, but it shouldn't need many changes to your code.
*/


// tagha_exec.c
int32_t Tagha_Exec(struct Tagha *const pSys);
const char *RegIDToStr(enum RegID id);


// tagha_api.c
struct Tagha *Tagha_New(void);
void Tagha_Init(struct Tagha *pSys);
void Tagha_LoadScriptByName(struct Tagha *pSys, char *filename);
void Tagha_LoadScriptFromMemory(struct Tagha *pSys, void *pMemory, uint64_t memsize);
bool Tagha_RegisterNatives(const struct Tagha *pSys, struct NativeInfo arrNatives);
void Tagha_Free(struct Tagha *pSys);
int32_t Tagha_RunScript(struct Tagha *pSys);
int32_t Tagha_CallFunc(struct Tagha *pSys, const char *strFunc);

#ifndef FREE_MEM
#define FREE_MEM(ptr) if( (ptr) ) free( (ptr) ), (ptr)=NULL
#endif

void Tagha_BuildFromFile(struct Tagha *pSys, const char *strFilename);
void Tagha_BuildFromPtr(struct Tagha *pSys, void *pProgram, uint64_t Programsize);

void Tagha_PrintPtrs(const struct Tagha *pSys);
void Tagha_PrintStack(const struct Tagha *pSys);
void Tagha_PrintData(const struct Tagha *pSys);
void Tagha_PrintInstrs(const struct Tagha *pSys);
void Tagha_PrintRegData(const struct Tagha *pSys);
void Tagha_Reset(struct Tagha *pSys);

void *Tagha_GetGlobalByName(const struct Tagha *pSys, const char *strGlobalName);
void Tagha_PushValues(struct Tagha *pSys, uint32_t uiArgs, union CValue values);
union CValue Tagha_PopValue(struct Tagha *pSys);
void Tagha_SetCmdArgs(struct Tagha *pSys, char *argv);

uint32_t Tagha_GetMemSize(const struct Tagha *pSys);
uint32_t Tagha_GetInstrSize(const struct Tagha *pSys);
uint32_t Tagha_GetMaxInstrs(const struct Tagha *pSys);
uint32_t Tagha_GetNativeCount(const struct Tagha *pSys);
uint32_t Tagha_GetFuncCount(const struct Tagha *pSys);
uint32_t Tagha_GetGlobalsCount(const struct Tagha *pSys);
bool Tagha_IsSafemodeActive(const struct Tagha *pSys);
bool Tagha_IsDebugActive(const struct Tagha *pSys);
void Tagha_PrintErr(const struct Tagha *pSys, const char *funcname, const char *err, ...);

// ds.c
struct Hashmap *Map_New(void);
void Map_Init(struct Hashmap *map);
void Map_Free(struct Hashmap *map);
uint64_t Map_Len(const struct Hashmap *map);

void Map_Rehash(struct Hashmap *map);
bool Map_Insert(struct Hashmap *map, const char *strKey, uint64_t pData);
uint64_t Map_Get(const struct Hashmap *map, const char *strKey);
void Map_Set(const struct Hashmap *map, const char *strKey, uint64_t pData);
void Map_Delete(struct Hashmap *map, const char *strKey);
bool Map_HasKey(const struct Hashmap *map, const char *strKey);
const char *Map_GetKey(const struct Hashmap *map, const char *strKey);

/*
void Map_Rehash_int(struct Hashmap *);
bool Map_Insert_int(struct Hashmap *, const uint64_t, void *);
void *Map_Get_int(const struct Hashmap *, const uint64_t);
void Map_Delete_int(struct Hashmap *, const uint64_t);
bool Map_HasKey_int(const struct Hashmap *, const uint64_t);
*/
uint64_t gethash64(const char *strKey);
uint32_t gethash32(const char *strKey);
uint64_t int64hash(uint64_t x);
uint32_t int32hash(uint32_t x);


/*
* r = register is first operand
* m = memory address is first operand
*/
#define INSTR_SET
X(halt)
/* single operand opcodes */
/* stack ops */
X(push) X(pop)
/* unary arithmetic and bitwise ops */
X(neg) X(inc) X(dec) X(bnot)
/* jump ops */
X(jmp) X(jz) X(jnz)

/* subroutine ops */
X(call) X(ret) X(callnat)

/* two operand opcodes */
X(movr) X(movm) X(lea)
/* signed and unsigned integer arithmetic ops */
X(addr) X(addm) X(uaddr) X(uaddm)
X(subr) X(subm) X(usubr) X(usubm)
X(mulr) X(mulm) X(umulr) X(umulm)
X(divr) X(divm) X(udivr) X(udivm)
X(modr) X(modm) X(umodr) X(umodm)
/* bitwise ops */
X(shrr) X(shrm) X(shlr) X(shlm)
X(andr) X(andm) X(orr) X(orm) X(xorr) X(xorm)
/* comparison ops */
X(ltr) X(ltm) X(ultr) X(ultm)
X(gtr) X(gtm) X(ugtr) X(ugtm)
X(cmpr) X(cmpm) X(ucmpr) X(ucmpm)
X(neqr) X(neqm) X(uneqr) X(uneqm)
X(reset)

/* floating point opcodes */
X(int2float) X(int2dbl) X(float2dbl) X(dbl2float)
X(faddr) X(faddm) X(fsubr) X(fsubm) X(fmulr) X(fmulm) X(fdivr) X(fdivm)
X(fneg) X(fltr) X(fltm) X(fgtr) X(fgtm) X(fcmpr) X(fcmpm) X(fneqr) X(fneqm)
/* misc opcodes */
X(nop)

#define X(x) x,
enum InstrSet INSTR_SET ;
#undef X

#ifdef __cplusplus

#endif

#endif // TAGHA_H_INCLUDED


tagha_api.c



#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "tagha.h"


/* Tagha File Structure (Dec 23, 2017)
* ------------------------------ start of header ------------------------------
* 2 bytes: magic verifier ==> 0xC0DE
* 4 bytes: stack segment size (aligned by 8 bytes)
* 4 bytes: data segment size
* 1 byte: safemode and debugmode flags
* ------------------------------ end of header ------------------------------
* .natives table
* 4 bytes: amount of natives
* n bytes: native table
* 4 bytes: string size + '' of native string
* n bytes: native string.
*
* .functions table
* 4 bytes: amount of functions
* n bytes: functions table
* 4 bytes: string size + '' of func string
* n bytes: function string
* 4 bytes: offset
*
* .globalvars table
* 4 bytes: amount of global vars
* n bytes: global vars table
* 4 bytes: string size + '' of global var string
* n bytes: global var string
* 4 bytes: offset
*
* n bytes: .data section initial values.
* n bytes: .text section
*/

static uint64_t get_file_size(FILE *pFile);
static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile);
static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile);


struct Tagha *Tagha_New(void)

struct Tagha *pNewVM = calloc(1, sizeof(struct Tagha));
Tagha_Init(pNewVM);
return pNewVM;


void Tagha_Init(struct Tagha *restrict const pSys)

if( !pSys )
return;

*pSys = (struct Tagha)0;

if( !pSys->m_pmapCDefs )
pSys->m_pmapCDefs = Map_New();
// if we can't allocate our C Definitions
// we can't run code in general as the definitions
// contain functions and global vars!
if( !pSys->m_pmapCDefs )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize C Definitions Map%s ****n", KRED, RESET, KGRN, RESET);
return;

else Map_Init(pSys->m_pmapCDefs);

if( !pSys->m_pmapNatives )
pSys->m_pmapNatives = Map_New();
if( !pSys->m_pmapNatives )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Native Map%s ****n", KRED, RESET, KGRN, RESET);
else Map_Init(pSys->m_pmapNatives);

if( !pSys->m_pArgv )
pSys->m_iArgc = 0;
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));
if( !pSys->m_pArgv )
printf("[%sTagha Init Error%s]: **** %sUnable to initialize Command-line Args String Vector%s ****n", KRED, RESET, KGRN, RESET);
else pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



static bool is_c_file(const char *restrict filename)

if( !filename )
return false;

// iterate to end of string and then check backwards.
while( *++filename );
int16_t i = *(int16_t *)(filename-2);
return( i==0x632E

static bool is_tbc_file(const char *restrict filename)
i==0x4342542E );



void Tagha_LoadScriptByName(struct Tagha *const pSys, char *restrict strFilename)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromFile(pSys, strFilename);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;



void Tagha_LoadScriptFromMemory(struct Tagha *const pSys, void *restrict pMemory, const uint64_t memsize)

if( !pSys )
return;

// allocate our script.
Tagha_BuildFromPtr(pSys, pMemory, memsize);

// set up our standard I/O streams
// and global self-referencing script ptr
// Downside is that the script-side host var MUST be a pointer.
if( pSys->m_pmapCDefs )
FILE **ppFile=Tagha_GetGlobalByName(pSys, "stdin");
if( ppFile )
*ppFile = stdin;

ppFile = Tagha_GetGlobalByName(pSys, "stderr");
if( ppFile )
*ppFile = stderr;

ppFile = Tagha_GetGlobalByName(pSys, "stdout");
if( ppFile )
*ppFile = stdout;

struct Tagha **ppSelf=Tagha_GetGlobalByName(pSys, "self");
if( ppSelf )
*ppSelf = pSys;




bool Tagha_RegisterNatives(const struct Tagha *restrict const pSys, struct NativeInfo arrNatives)

if( !pSys or !pSys->m_pmapNatives or !arrNatives )
return false;

for( struct NativeInfo *n=arrNatives ; n->pFunc and n->strName ; n++ )
Map_Insert(pSys->m_pmapNatives, n->strName, (uintptr_t)n->pFunc);
return true;



int32_t Tagha_RunScript(struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return -1;

// make sure 'main' exists.
else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call main with a NULL definition table!");
return -1;

// make sure we have the memory for running.
else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-32) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;

// get instruction offset to main.
struct TaghaCDef *pMainData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, "main");
if( !pMainData or pMainData->m_ucDefType != DefFunction )
Tagha_PrintErr(pSys, __func__, "function 'main' doesn't exist!");
return -1;

pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pMainData->m_uiOffset;

// push argv and argc to registers.
// use 'uintptr_t' so we can force 4-byte pointers as 8-byte.
pSys->m_Regs[res].UInt64 = (uintptr_t)pSys->m_pArgv;
pSys->m_Regs[rds].Int64 = pSys->m_iArgc;

(--pSys->m_Regs[rsp].SelfPtr)->Int64 = -1L; // push bullshit ret address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64; // push rbp

if( pSys->m_bDebugMode )
printf("Tagha_RunScript :: pushed argc: %" PRIi32 " and argv %pn", pSys->m_iArgc, pSys->m_pArgv);

return Tagha_Exec(pSys);


int32_t Tagha_CallFunc(struct Tagha *restrict const pSys, const char *restrict strFunc)

if( !pSys or !strFunc )
return -1;

else if( !pSys->m_pmapCDefs )
Tagha_PrintErr(pSys, __func__, "Cannot call functions using a NULL function table!");
return -1;

else if( ((pSys->m_Regs[rsp].UCharPtr-pSys->m_pMemory)-16) >= pSys->m_uiMemsize )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return -1;


struct TaghaCDef *pFuncData = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strFunc);
if( !pFuncData or pFuncData->m_ucDefType != DefFunction)
Tagha_PrintErr(pSys, __func__, "function '%s' doesn't exist!", strFunc);
return -1;


// save return address.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = (uintptr_t)pSys->m_Regs[rip].UCharPtr+1;

// jump to the function entry address.
pSys->m_Regs[rip].UCharPtr = pSys->m_pMemory + pFuncData->m_uiOffset;

// push bp and copy sp to bp.
(--pSys->m_Regs[rsp].SelfPtr)->UInt64 = pSys->m_Regs[rbp].UInt64;

return Tagha_Exec(pSys);



// need this to determine the text segment size.
static uint64_t get_file_size(FILE *restrict pFile)

uint64_t size = 0L;
if( !pFile )
return size;

if( !fseek(pFile, 0, SEEK_END) )
size = (uint64_t)ftell(pFile);
rewind(pFile);

return size;


static uint32_t scripthdr_read_natives_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignores = 0;

// see if the script is using any natives.
pSys->m_pstrNativeCalls = NULL;
ignores = fread(&pSys->m_uiNatives, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Natives: '%" PRIu32 "'n", pSys->m_uiNatives);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiNatives )
return bytecount;

// script has natives? Copy their names so we can use them on VM natives hashmap later.
pSys->m_pstrNativeCalls = calloc(pSys->m_uiNatives, sizeof(char *));
if( !pSys->m_pstrNativeCalls )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native Table%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


for( uint32_t i=0 ; i<pSys->m_uiNatives ; i++ )
uint32_t str_size;
ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate memory to hold the native's name.
pSys->m_pstrNativeCalls[i] = calloc(str_size, sizeof(char));
if( !pSys->m_pstrNativeCalls[i] )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Native String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;


// read in the native's name.
ignores = fread(pSys->m_pstrNativeCalls[i], sizeof(char), str_size, *ppFile);
bytecount += str_size;
printf("[Tagha Load Script] :: Copied Native Name: '%s' @ %pn", pSys->m_pstrNativeCalls[i], pSys->m_pstrNativeCalls+i);

pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_func_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// see if the script has its own functions.
// This table is so host or other script can call these functions by name or address.
ignore_warns = fread(&pSys->m_uiFuncs, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
if( !pSys->m_uiFuncs )
return bytecount;

// copy the function data from the header.
for( uint32_t i=0 ; i<pSys->m_uiFuncs ; i++ )
uint32_t str_size;
ignore_warns = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);

// allocate the hashmap function key.
char *strFunc = calloc(str_size, sizeof(char));
if( !strFunc )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Table String%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */
ignore_warns = fread(strFunc, sizeof(char), str_size, *ppFile);
bytecount += str_size;

// copy func's header data to our table
// then store the table to our function hashmap with the key
// we allocated earlier.
struct TaghaCDef *pFuncData = calloc(1, sizeof(struct TaghaCDef));
if( !pFuncData )
printf("[%sTagha Load Script Error%s]: **** %sFailed to allocate memory for Func Data%s ****n", KRED, RESET, KGRN, RESET);
Tagha_Free(pSys), *ppSys = NULL;
fclose(*ppFile), *ppFile=NULL;
return 0;
/* if */

ignore_warns = fread(&pFuncData->m_uiOffset, sizeof(uint32_t), 1, *ppFile);
bytecount += sizeof(uint32_t);
printf("[Tagha Load Script] :: Copied Function name '%s' /* for */
pSys = NULL;
return bytecount;


static uint32_t scripthdr_read_global_table(struct Tagha **const ppSys, FILE **const ppFile)

if( !*ppSys or !*ppFile )
return 0;

struct Tagha *pSys = *ppSys;
uint32_t bytecount = 0;
int32_t ignore_warns = 0;

// check if the script has global variables.
ignore_warns = fread(&pSys->m_uiGlobals, sizeof(uint32_t), 1, *ppFile);
printf("[Tagha Load Script] :: Amount of Global Vars: %" PRIu32 "n", pSys->m_uiGlobals);
bytecount += sizeof(uint32_t);
uint32_t globalbytes = 0;
if( !pSys->m_uiGlobals )
return bytecount;

for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) offset: %" PRIu32 "n", strGlobal, pGlobalData->m_uiOffset);

// insert the global var's table to our hashmap.
pGlobalData->m_ucDefType = DefGlobal;
Map_Insert(pSys->m_pmapCDefs, strGlobal, (uintptr_t)pGlobalData);
strGlobal = NULL; pGlobalData = NULL;
/* for( uint32_t i=0 ; i<pSys->m_uiGlobals ; i++ ) */
pSys = NULL;
return bytecount;


void Tagha_BuildFromFile(struct Tagha *pSys, const char *restrict strFilename)
highest address


void Tagha_BuildFromPtr(struct Tagha *restrict pSys, void *pProgram, const uint64_t Programsize)
data segment

void Tagha_Free(struct Tagha *restrict const pSys)

if( !pSys )
return;

// kill memory
FREE_MEM(pSys->m_pMemory);

// free our native table
uint32_t i, Size;
if( pSys->m_pstrNativeCalls )
for( i=0 ; i<pSys->m_uiNatives ; i++ )
FREE_MEM(pSys->m_pstrNativeCalls[i]);

memset(pSys->m_pstrNativeCalls, 0, pSys->m_uiNatives);
FREE_MEM(pSys->m_pstrNativeCalls);

// free our C definitions hashmap and all the tables in it.
if( pSys->m_pmapCDefs )
struct KeyNode
*restrict kv = NULL,
*next = NULL
;
Size = pSys->m_pmapCDefs->size;
for( i=0 ; i<Size ; i++ )
for( kv = pSys->m_pmapCDefs->m_ppTable[i] ; kv ; kv = next )
next = kv->m_pNext;
if( kv->m_pData )
free((struct TaghaCDef *)(uintptr_t)kv->m_pData), kv->m_pData = 0;
if( kv->m_strKey )
free((char *)kv->m_strKey), kv->m_strKey = NULL;


Map_Free(pSys->m_pmapCDefs);
FREE_MEM(pSys->m_pmapCDefs);


// since the system's native hashmap has nothing allocated,
// we just free the hashmap's internal data and then the hashmap itself.
if( pSys->m_pmapNatives )
Map_Free(pSys->m_pmapNatives);
FREE_MEM(pSys->m_pmapNatives);


// free our script argument vector.
if( pSys->m_pArgv )
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);
FREE_MEM(pSys->m_pArgv);


// set our stack pointer pointers to NULL
pSys->m_Regs[rip].UCharPtr = pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = NULL;



void Tagha_Reset(struct Tagha *restrict const pSys)

if( !pSys )
return;

// resets the script without crashing Tagha and the host.
memset(pSys->m_pTextSegment+1, 0, pSys->m_uiMemsize-pSys->m_uiInstrSize);
pSys->m_Regs[rsp].UCharPtr = pSys->m_Regs[rbp].UCharPtr = pSys->m_pMemory + (pSys->m_uiMemsize-1);

memset(pSys->m_Regs, 0, sizeof(union CValue) * rsp);
// TODO: reset global variable data to original values?



void *Tagha_GetGlobalByName(const struct Tagha *restrict const pSys, const char *restrict strGlobalName)

if( !pSys or !pSys->m_pmapCDefs )
return NULL;

// get the global's .data segment offset then return the pointer to that offset.
struct TaghaCDef *pOffset = (struct TaghaCDef *)(uintptr_t)Map_Get(pSys->m_pmapCDefs, strGlobalName);
return pOffset and pOffset->m_ucDefType==DefGlobal ? (pSys->m_pTextSegment+1)+ pOffset->m_uiOffset : NULL;


void Tagha_PushValues(struct Tagha *restrict const pSys, const uint32_t uiArgs, union CValue values)

if( !pSys or !pSys->m_pMemory )
return;

// remember that arguments must be passed right to left.
// we have enough args to fit in registers.
if( uiArgs <= 10 )
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*uiArgs);

// we have too many args, use both regs and stack.
else if( uiArgs>10 )
// first push args into reg.
memcpy(pSys->m_Regs+rds, values, sizeof(union CValue)*10);

// next, push the remaining values from last to first.
if( pSys->m_bSafeMode and (pSys->m_Regs[rsp].SelfPtr-(uiArgs-10)) < (union CValue *)pSys->m_pStackSegment )
Tagha_PrintErr(pSys, __func__, "stack overflow!");
return;

memcpy(pSys->m_Regs[rsp].SelfPtr, values+10, sizeof(union CValue)*(uiArgs-10));
pSys->m_Regs[rsp].SelfPtr -= (uiArgs-10);



union CValue Tagha_PopValue(struct Tagha *restrict const pSys)

union CValue val= .UInt64=0L ;
if( !pSys or !pSys->m_pMemory )
printf("[%sTagha Pop%s]: **** %pSys is NULL%s ****n", KRED, RESET, KGRN, RESET);
return val;

return pSys->m_Regs[ras];


void Tagha_SetCmdArgs(struct Tagha *restrict const pSys, char *argv)

if( !pSys or !pSys->m_pMemory or !argv )
return;

// clear old arguments, if any.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
FREE_MEM(pSys->m_pArgv[i].Str);

// get the size of argument vector
uint32_t newargc = 0;
while( argv[++newargc] != NULL );

// resize our system's argument vector.
if( pSys->m_iArgc != newargc )
pSys->m_iArgc = newargc;
FREE_MEM(pSys->m_pArgv);
pSys->m_pArgv = calloc(pSys->m_iArgc+1, sizeof(union CValue));


/* For Implementing 'int argc' and 'char *argv'
* C Standards dictates the following...
* - The value of argc shall be nonnegative.
* - The parameters argc and argv and the strings pointed to by the argv array shall be modifiable by the program
* - argv[argc] shall be a null pointer.
* - If the value of argc is greater than zero, the array members argv[0] through argv[argc-1] inclusive shall contain pointers to strings, which are given implementation-defined values by the host environment prior to program startup.
* - If the value of argc is greater than zero, the string pointed to by argv[0] represents the program name; argv[0][0] shall be the null character if the program name is not available from the host environment. If the value of argc is greater than one, the strings pointed to by argv[1] through argv[argc-1] represent the program parameters.
*/

// Copy down our argument vector's strings.
for( uint32_t i=0 ; i<pSys->m_iArgc ; i++ )
size_t strsize = strlen(argv[i])+1;
pSys->m_pArgv[i].Str = calloc(strsize, sizeof(char));

if( pSys->m_pArgv[i].Str )
strncpy(pSys->m_pArgv[i].Str, argv[i], strsize);
pSys->m_pArgv[i].Str[strsize-1] = 0;


pSys->m_pArgv[pSys->m_iArgc].Str = NULL;



uint32_t Tagha_GetMemSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMemsize;

uint32_t Tagha_GetInstrSize(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiInstrSize;

uint32_t Tagha_GetMaxInstrs(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiMaxInstrs;

uint32_t Tagha_GetNativeCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiNatives;

uint32_t Tagha_GetFuncCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiFuncs;

uint32_t Tagha_GetGlobalsCount(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_uiGlobals;

bool Tagha_IsSafemodeActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bSafeMode;

bool Tagha_IsDebugActive(const struct Tagha *restrict const pSys)

return !pSys ? 0 : pSys->m_bDebugMode;




void Tagha_PrintStack(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .stack Segmentn");

uint32_t size = pSys->m_uiMemsize;
union CValue *p = (union CValue *)(pSys->m_pMemory + (size-1));

while( (uint8_t *)p >= pSys->m_pStackSegment )
if( pSys->m_Regs[rsp].SelfPtr == p )
printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
--p;
/* while( p>=pSys->m_pStackSegment ) */
puts("n");


void Tagha_PrintData(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .data Segmentn");
for( uint8_t *p = pSys->m_pDataSegment ; p > pSys->m_pTextSegment ; --p )
printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintInstrs(const struct Tagha *restrict const pSys)

if( !pSys or !pSys->m_pMemory )
return;

puts("DEBUG PRINT: .text Segmentn");
for( uint8_t *p = pSys->m_pMemory ; p <= pSys->m_pTextSegment ; p++ )
printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);

puts("n");


void Tagha_PrintPtrs(const struct Tagha *restrict const pSys)

if( !pSys )
return;

puts("DEBUG ...---===---... Printing Pointers...n");
printf("Instruction Ptr: %p
nStack Ptr: %p
nStack Frame Ptr: %pn", pSys->m_Regs[rip].UCharPtr, pSys->m_Regs[rsp].UCharPtr, pSys->m_Regs[rbp].UCharPtr);
puts("n");


void Tagha_PrintErr(const struct Tagha *restrict const pSys, const char *restrict funcname, const char *restrict err, ...)

if( !pSys or !err )
return;

va_list args;
va_start(args, err);
printf("[%sTagha Error%s]: **** %s reported: '", KRED, KNRM, funcname);
vprintf(err, args);
va_end(args);
printf("' ****nCurrent Instr Addr: %s%p

void Tagha_PrintRegData(const struct Tagha *restrict const pSys)

puts("ntPRINTING REGISTER DATA ==========================n");
for( uint8_t i=0 ; i<regsize ; i++ )
printf("register[%s] == %" PRIu64 "n", RegIDToStr(i), pSys->m_Regs[i].UInt64);
puts("tEND OF PRINTING REGISTER DATA ===============n");









share|improve this question












share|improve this question




share|improve this question








edited Jan 7 at 3:38
























asked Jan 4 at 2:47









Nergal

1888




1888











  • What do you want from this review?
    – chux
    Jan 4 at 4:49










  • @chux well I posted on the OP that I wanted a review of the overall design and structure for the entire software but a mod removed that for some reason.
    – Nergal
    Jan 4 at 5:31










  • Why restrict in is_c_file()?
    – chux
    Jan 6 at 3:12






  • 1




    Code does not compile as tagha.h is missing from this post. Off-site material is not sufficient. Better to include enough code here so it may compiled even if it can not linked.
    – chux
    Jan 6 at 3:16










  • @chux tagha.h is in the github repository. Alright, I'll add it to the OP.
    – Nergal
    Jan 7 at 3:36
















  • What do you want from this review?
    – chux
    Jan 4 at 4:49










  • @chux well I posted on the OP that I wanted a review of the overall design and structure for the entire software but a mod removed that for some reason.
    – Nergal
    Jan 4 at 5:31










  • Why restrict in is_c_file()?
    – chux
    Jan 6 at 3:12






  • 1




    Code does not compile as tagha.h is missing from this post. Off-site material is not sufficient. Better to include enough code here so it may compiled even if it can not linked.
    – chux
    Jan 6 at 3:16










  • @chux tagha.h is in the github repository. Alright, I'll add it to the OP.
    – Nergal
    Jan 7 at 3:36















What do you want from this review?
– chux
Jan 4 at 4:49




What do you want from this review?
– chux
Jan 4 at 4:49












@chux well I posted on the OP that I wanted a review of the overall design and structure for the entire software but a mod removed that for some reason.
– Nergal
Jan 4 at 5:31




@chux well I posted on the OP that I wanted a review of the overall design and structure for the entire software but a mod removed that for some reason.
– Nergal
Jan 4 at 5:31












Why restrict in is_c_file()?
– chux
Jan 6 at 3:12




Why restrict in is_c_file()?
– chux
Jan 6 at 3:12




1




1




Code does not compile as tagha.h is missing from this post. Off-site material is not sufficient. Better to include enough code here so it may compiled even if it can not linked.
– chux
Jan 6 at 3:16




Code does not compile as tagha.h is missing from this post. Off-site material is not sufficient. Better to include enough code here so it may compiled even if it can not linked.
– chux
Jan 6 at 3:16












@chux tagha.h is in the github repository. Alright, I'll add it to the OP.
– Nergal
Jan 7 at 3:36




@chux tagha.h is in the github repository. Alright, I'll add it to the OP.
– Nergal
Jan 7 at 3:36










1 Answer
1






active

oldest

votes

















up vote
1
down vote













Some quick reviewed parts using a compilation with warnings fully enabled.



What is a bit cheesy about this is that a grader could first take your code and run it through a compiler with lots of warnings enabled. Your code made about 80 warnings. Lots of minor ones. Some of the more significant ones are summarized below. But is is like a term paper with spelling/grammar errors. All those warnings (some minor, others trivial) distract from your presentation. Advice: Compile code with maximum warnings enabled and clean up code.



  1. static bool is_c_file() and static bool is_tbc_file() not used. Consider deleting.


  2. Why does code use uint32_t for size information such as bytecount and others instead of size_t? It the goal to work on 32-bit platforms or smaller? size_t is the best type for size and array math being neither too wide nor too narrow.



  3. Mis-match format various places such as PRIu32 used, PRIu8 expected.



    printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    ...
    printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys-> s->m_pMemory, *p);
    ...
    printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);



  4. Code does a lot of bytecount += sizeof(some_tpye). Why that type? Is it because of the previous line like pSys->m_uiFuncs = .... If so, consider rather than adding the size of the type, add the size of the object. It is easier to code, review and maintain.



    pSys->m_uiFuncs = *Reader.UInt32Ptr++;
    // bytecount += sizeof(uint32_t);
    bytecount += sizeof pSys->m_uiFuncs;



  5. Similar to above. Why sizeof(uint32_t)? Is it the right type? Use the sizeof the object and remove doubt.



    // ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
    ignores = fread(&str_size, sizeof *str_size, 1, *ppFile);



  6. Casting hides problems. Example below. ftell() return a long. And with a value of -1 is an error



    if( !fseek(pFile, 0, SEEK_END) ) 
    // size = (uint64_t)ftell(pFile);
    long lsize = ftell(pFile);
    if (lsize == -1) TBD_code();
    size = (uint64_t)lsize;
    rewind(pFile);



More later as time permits.






share|improve this answer





















  • Alright, all the suggested changes have been done except for deleting unused functions, I feel they may be useful in the future as I develop Tagha further. I've enabled -Wall for the compiling scripts. Nothing in Tagha's core code uses size_t except for line 904 in tagha_api.c: size_t strsize = strlen(argv[i])+1; What are you talking about having others use size_t? Is there anything else that can be done possibly?
    – Nergal
    Jan 7 at 18:46











  • @Nergal Re: unused functions (dead code). A well warning enabled compile will continue to warn. By leaving in dead code, it encourages either 1) ignoring warnings or 2) not fully enabling warnings - both of these are undesirable practices. IMO, simple #if 0 ... #endif the dead code if it remains useful for reference. Adding a comment for their retention is useful too.
    – chux
    Jan 7 at 20:02










  • @Nergal Tagha's code uses uint32_t for the "size" of things as part of a "file" structure. Using a fixed width type for a file has benefits and is usually "good". Using size_t for sizes in memory is better - how to reconcile the 2 is where could generates lots of warnings as those 2 types are not necessarily the same range/size. More study need study of code needed.
    – chux
    Jan 7 at 20:12










  • Tip for always compiling with warnings put this in your .bashrc: CFLAGS+='-pedantic-errors -Wall' then compile with make which uses that variable in default rules.
    – luser droog
    Jan 12 at 3:57











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f184243%2fvirtual-machine-in-c-to-run-c%23new-answer', 'question_page');

);

Post as a guest






























1 Answer
1






active

oldest

votes








1 Answer
1






active

oldest

votes









active

oldest

votes






active

oldest

votes








up vote
1
down vote













Some quick reviewed parts using a compilation with warnings fully enabled.



What is a bit cheesy about this is that a grader could first take your code and run it through a compiler with lots of warnings enabled. Your code made about 80 warnings. Lots of minor ones. Some of the more significant ones are summarized below. But is is like a term paper with spelling/grammar errors. All those warnings (some minor, others trivial) distract from your presentation. Advice: Compile code with maximum warnings enabled and clean up code.



  1. static bool is_c_file() and static bool is_tbc_file() not used. Consider deleting.


  2. Why does code use uint32_t for size information such as bytecount and others instead of size_t? It the goal to work on 32-bit platforms or smaller? size_t is the best type for size and array math being neither too wide nor too narrow.



  3. Mis-match format various places such as PRIu32 used, PRIu8 expected.



    printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    ...
    printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys-> s->m_pMemory, *p);
    ...
    printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);



  4. Code does a lot of bytecount += sizeof(some_tpye). Why that type? Is it because of the previous line like pSys->m_uiFuncs = .... If so, consider rather than adding the size of the type, add the size of the object. It is easier to code, review and maintain.



    pSys->m_uiFuncs = *Reader.UInt32Ptr++;
    // bytecount += sizeof(uint32_t);
    bytecount += sizeof pSys->m_uiFuncs;



  5. Similar to above. Why sizeof(uint32_t)? Is it the right type? Use the sizeof the object and remove doubt.



    // ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
    ignores = fread(&str_size, sizeof *str_size, 1, *ppFile);



  6. Casting hides problems. Example below. ftell() return a long. And with a value of -1 is an error



    if( !fseek(pFile, 0, SEEK_END) ) 
    // size = (uint64_t)ftell(pFile);
    long lsize = ftell(pFile);
    if (lsize == -1) TBD_code();
    size = (uint64_t)lsize;
    rewind(pFile);



More later as time permits.






share|improve this answer





















  • Alright, all the suggested changes have been done except for deleting unused functions, I feel they may be useful in the future as I develop Tagha further. I've enabled -Wall for the compiling scripts. Nothing in Tagha's core code uses size_t except for line 904 in tagha_api.c: size_t strsize = strlen(argv[i])+1; What are you talking about having others use size_t? Is there anything else that can be done possibly?
    – Nergal
    Jan 7 at 18:46











  • @Nergal Re: unused functions (dead code). A well warning enabled compile will continue to warn. By leaving in dead code, it encourages either 1) ignoring warnings or 2) not fully enabling warnings - both of these are undesirable practices. IMO, simple #if 0 ... #endif the dead code if it remains useful for reference. Adding a comment for their retention is useful too.
    – chux
    Jan 7 at 20:02










  • @Nergal Tagha's code uses uint32_t for the "size" of things as part of a "file" structure. Using a fixed width type for a file has benefits and is usually "good". Using size_t for sizes in memory is better - how to reconcile the 2 is where could generates lots of warnings as those 2 types are not necessarily the same range/size. More study need study of code needed.
    – chux
    Jan 7 at 20:12










  • Tip for always compiling with warnings put this in your .bashrc: CFLAGS+='-pedantic-errors -Wall' then compile with make which uses that variable in default rules.
    – luser droog
    Jan 12 at 3:57















up vote
1
down vote













Some quick reviewed parts using a compilation with warnings fully enabled.



What is a bit cheesy about this is that a grader could first take your code and run it through a compiler with lots of warnings enabled. Your code made about 80 warnings. Lots of minor ones. Some of the more significant ones are summarized below. But is is like a term paper with spelling/grammar errors. All those warnings (some minor, others trivial) distract from your presentation. Advice: Compile code with maximum warnings enabled and clean up code.



  1. static bool is_c_file() and static bool is_tbc_file() not used. Consider deleting.


  2. Why does code use uint32_t for size information such as bytecount and others instead of size_t? It the goal to work on 32-bit platforms or smaller? size_t is the best type for size and array math being neither too wide nor too narrow.



  3. Mis-match format various places such as PRIu32 used, PRIu8 expected.



    printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    ...
    printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys-> s->m_pMemory, *p);
    ...
    printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);



  4. Code does a lot of bytecount += sizeof(some_tpye). Why that type? Is it because of the previous line like pSys->m_uiFuncs = .... If so, consider rather than adding the size of the type, add the size of the object. It is easier to code, review and maintain.



    pSys->m_uiFuncs = *Reader.UInt32Ptr++;
    // bytecount += sizeof(uint32_t);
    bytecount += sizeof pSys->m_uiFuncs;



  5. Similar to above. Why sizeof(uint32_t)? Is it the right type? Use the sizeof the object and remove doubt.



    // ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
    ignores = fread(&str_size, sizeof *str_size, 1, *ppFile);



  6. Casting hides problems. Example below. ftell() return a long. And with a value of -1 is an error



    if( !fseek(pFile, 0, SEEK_END) ) 
    // size = (uint64_t)ftell(pFile);
    long lsize = ftell(pFile);
    if (lsize == -1) TBD_code();
    size = (uint64_t)lsize;
    rewind(pFile);



More later as time permits.






share|improve this answer





















  • Alright, all the suggested changes have been done except for deleting unused functions, I feel they may be useful in the future as I develop Tagha further. I've enabled -Wall for the compiling scripts. Nothing in Tagha's core code uses size_t except for line 904 in tagha_api.c: size_t strsize = strlen(argv[i])+1; What are you talking about having others use size_t? Is there anything else that can be done possibly?
    – Nergal
    Jan 7 at 18:46











  • @Nergal Re: unused functions (dead code). A well warning enabled compile will continue to warn. By leaving in dead code, it encourages either 1) ignoring warnings or 2) not fully enabling warnings - both of these are undesirable practices. IMO, simple #if 0 ... #endif the dead code if it remains useful for reference. Adding a comment for their retention is useful too.
    – chux
    Jan 7 at 20:02










  • @Nergal Tagha's code uses uint32_t for the "size" of things as part of a "file" structure. Using a fixed width type for a file has benefits and is usually "good". Using size_t for sizes in memory is better - how to reconcile the 2 is where could generates lots of warnings as those 2 types are not necessarily the same range/size. More study need study of code needed.
    – chux
    Jan 7 at 20:12










  • Tip for always compiling with warnings put this in your .bashrc: CFLAGS+='-pedantic-errors -Wall' then compile with make which uses that variable in default rules.
    – luser droog
    Jan 12 at 3:57













up vote
1
down vote










up vote
1
down vote









Some quick reviewed parts using a compilation with warnings fully enabled.



What is a bit cheesy about this is that a grader could first take your code and run it through a compiler with lots of warnings enabled. Your code made about 80 warnings. Lots of minor ones. Some of the more significant ones are summarized below. But is is like a term paper with spelling/grammar errors. All those warnings (some minor, others trivial) distract from your presentation. Advice: Compile code with maximum warnings enabled and clean up code.



  1. static bool is_c_file() and static bool is_tbc_file() not used. Consider deleting.


  2. Why does code use uint32_t for size information such as bytecount and others instead of size_t? It the goal to work on 32-bit platforms or smaller? size_t is the best type for size and array math being neither too wide nor too narrow.



  3. Mis-match format various places such as PRIu32 used, PRIu8 expected.



    printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    ...
    printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys-> s->m_pMemory, *p);
    ...
    printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);



  4. Code does a lot of bytecount += sizeof(some_tpye). Why that type? Is it because of the previous line like pSys->m_uiFuncs = .... If so, consider rather than adding the size of the type, add the size of the object. It is easier to code, review and maintain.



    pSys->m_uiFuncs = *Reader.UInt32Ptr++;
    // bytecount += sizeof(uint32_t);
    bytecount += sizeof pSys->m_uiFuncs;



  5. Similar to above. Why sizeof(uint32_t)? Is it the right type? Use the sizeof the object and remove doubt.



    // ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
    ignores = fread(&str_size, sizeof *str_size, 1, *ppFile);



  6. Casting hides problems. Example below. ftell() return a long. And with a value of -1 is an error



    if( !fseek(pFile, 0, SEEK_END) ) 
    // size = (uint64_t)ftell(pFile);
    long lsize = ftell(pFile);
    if (lsize == -1) TBD_code();
    size = (uint64_t)lsize;
    rewind(pFile);



More later as time permits.






share|improve this answer













Some quick reviewed parts using a compilation with warnings fully enabled.



What is a bit cheesy about this is that a grader could first take your code and run it through a compiler with lots of warnings enabled. Your code made about 80 warnings. Lots of minor ones. Some of the more significant ones are summarized below. But is is like a term paper with spelling/grammar errors. All those warnings (some minor, others trivial) distract from your presentation. Advice: Compile code with maximum warnings enabled and clean up code.



  1. static bool is_c_file() and static bool is_tbc_file() not used. Consider deleting.


  2. Why does code use uint32_t for size information such as bytecount and others instead of size_t? It the goal to work on 32-bit platforms or smaller? size_t is the best type for size and array math being neither too wide nor too narrow.



  3. Mis-match format various places such as PRIu32 used, PRIu8 expected.



    printf("Stack[%.10" PRIu32 "] == %" PRIu64 " - T.O.S.n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    else printf("Stack[%.10" PRIu32 "] == %" PRIu64 "n", (uint8_t *)p-pSys->m_pMemory, p->UInt64);
    ...
    printf("Data[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys-> s->m_pMemory, *p);
    ...
    printf("Text[%.10" PRIu32 "] == %" PRIu32 "n", p-pSys->m_pMemory, *p);



  4. Code does a lot of bytecount += sizeof(some_tpye). Why that type? Is it because of the previous line like pSys->m_uiFuncs = .... If so, consider rather than adding the size of the type, add the size of the object. It is easier to code, review and maintain.



    pSys->m_uiFuncs = *Reader.UInt32Ptr++;
    // bytecount += sizeof(uint32_t);
    bytecount += sizeof pSys->m_uiFuncs;



  5. Similar to above. Why sizeof(uint32_t)? Is it the right type? Use the sizeof the object and remove doubt.



    // ignores = fread(&str_size, sizeof(uint32_t), 1, *ppFile);
    ignores = fread(&str_size, sizeof *str_size, 1, *ppFile);



  6. Casting hides problems. Example below. ftell() return a long. And with a value of -1 is an error



    if( !fseek(pFile, 0, SEEK_END) ) 
    // size = (uint64_t)ftell(pFile);
    long lsize = ftell(pFile);
    if (lsize == -1) TBD_code();
    size = (uint64_t)lsize;
    rewind(pFile);



More later as time permits.







share|improve this answer













share|improve this answer



share|improve this answer











answered Jan 7 at 6:00









chux

11.4k11238




11.4k11238











  • Alright, all the suggested changes have been done except for deleting unused functions, I feel they may be useful in the future as I develop Tagha further. I've enabled -Wall for the compiling scripts. Nothing in Tagha's core code uses size_t except for line 904 in tagha_api.c: size_t strsize = strlen(argv[i])+1; What are you talking about having others use size_t? Is there anything else that can be done possibly?
    – Nergal
    Jan 7 at 18:46











  • @Nergal Re: unused functions (dead code). A well warning enabled compile will continue to warn. By leaving in dead code, it encourages either 1) ignoring warnings or 2) not fully enabling warnings - both of these are undesirable practices. IMO, simple #if 0 ... #endif the dead code if it remains useful for reference. Adding a comment for their retention is useful too.
    – chux
    Jan 7 at 20:02










  • @Nergal Tagha's code uses uint32_t for the "size" of things as part of a "file" structure. Using a fixed width type for a file has benefits and is usually "good". Using size_t for sizes in memory is better - how to reconcile the 2 is where could generates lots of warnings as those 2 types are not necessarily the same range/size. More study need study of code needed.
    – chux
    Jan 7 at 20:12










  • Tip for always compiling with warnings put this in your .bashrc: CFLAGS+='-pedantic-errors -Wall' then compile with make which uses that variable in default rules.
    – luser droog
    Jan 12 at 3:57

















  • Alright, all the suggested changes have been done except for deleting unused functions, I feel they may be useful in the future as I develop Tagha further. I've enabled -Wall for the compiling scripts. Nothing in Tagha's core code uses size_t except for line 904 in tagha_api.c: size_t strsize = strlen(argv[i])+1; What are you talking about having others use size_t? Is there anything else that can be done possibly?
    – Nergal
    Jan 7 at 18:46











  • @Nergal Re: unused functions (dead code). A well warning enabled compile will continue to warn. By leaving in dead code, it encourages either 1) ignoring warnings or 2) not fully enabling warnings - both of these are undesirable practices. IMO, simple #if 0 ... #endif the dead code if it remains useful for reference. Adding a comment for their retention is useful too.
    – chux
    Jan 7 at 20:02










  • @Nergal Tagha's code uses uint32_t for the "size" of things as part of a "file" structure. Using a fixed width type for a file has benefits and is usually "good". Using size_t for sizes in memory is better - how to reconcile the 2 is where could generates lots of warnings as those 2 types are not necessarily the same range/size. More study need study of code needed.
    – chux
    Jan 7 at 20:12










  • Tip for always compiling with warnings put this in your .bashrc: CFLAGS+='-pedantic-errors -Wall' then compile with make which uses that variable in default rules.
    – luser droog
    Jan 12 at 3:57
















Alright, all the suggested changes have been done except for deleting unused functions, I feel they may be useful in the future as I develop Tagha further. I've enabled -Wall for the compiling scripts. Nothing in Tagha's core code uses size_t except for line 904 in tagha_api.c: size_t strsize = strlen(argv[i])+1; What are you talking about having others use size_t? Is there anything else that can be done possibly?
– Nergal
Jan 7 at 18:46





Alright, all the suggested changes have been done except for deleting unused functions, I feel they may be useful in the future as I develop Tagha further. I've enabled -Wall for the compiling scripts. Nothing in Tagha's core code uses size_t except for line 904 in tagha_api.c: size_t strsize = strlen(argv[i])+1; What are you talking about having others use size_t? Is there anything else that can be done possibly?
– Nergal
Jan 7 at 18:46













@Nergal Re: unused functions (dead code). A well warning enabled compile will continue to warn. By leaving in dead code, it encourages either 1) ignoring warnings or 2) not fully enabling warnings - both of these are undesirable practices. IMO, simple #if 0 ... #endif the dead code if it remains useful for reference. Adding a comment for their retention is useful too.
– chux
Jan 7 at 20:02




@Nergal Re: unused functions (dead code). A well warning enabled compile will continue to warn. By leaving in dead code, it encourages either 1) ignoring warnings or 2) not fully enabling warnings - both of these are undesirable practices. IMO, simple #if 0 ... #endif the dead code if it remains useful for reference. Adding a comment for their retention is useful too.
– chux
Jan 7 at 20:02












@Nergal Tagha's code uses uint32_t for the "size" of things as part of a "file" structure. Using a fixed width type for a file has benefits and is usually "good". Using size_t for sizes in memory is better - how to reconcile the 2 is where could generates lots of warnings as those 2 types are not necessarily the same range/size. More study need study of code needed.
– chux
Jan 7 at 20:12




@Nergal Tagha's code uses uint32_t for the "size" of things as part of a "file" structure. Using a fixed width type for a file has benefits and is usually "good". Using size_t for sizes in memory is better - how to reconcile the 2 is where could generates lots of warnings as those 2 types are not necessarily the same range/size. More study need study of code needed.
– chux
Jan 7 at 20:12












Tip for always compiling with warnings put this in your .bashrc: CFLAGS+='-pedantic-errors -Wall' then compile with make which uses that variable in default rules.
– luser droog
Jan 12 at 3:57





Tip for always compiling with warnings put this in your .bashrc: CFLAGS+='-pedantic-errors -Wall' then compile with make which uses that variable in default rules.
– luser droog
Jan 12 at 3:57













 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f184243%2fvirtual-machine-in-c-to-run-c%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Greedy Best First Search implementation in Rust

Function to Return a JSON Like Objects Using VBA Collections and Arrays

C++11 CLH Lock Implementation