#!/usr/bin/perl
#this program takes a C-source with MS-VC-like assembler (with __asm)
#and outputs a source compilable by gcc (normally...)
#you must compile the produced source with the option "-masm=intel"
#(the version of gcc must be at least 3.1)

#the perl of redhat 9 implicitely adds "use utf8;"
no utf8;

#with samenumline the lines of the input and output correspond.
#it is useful to debug a program
$samenumline=1;
#use locallabels or macro or labelsnum
#if you want to compile with the -O3 option
$macro=0;
$locallabels=0;
$labelsnum=0;
$PARANO=0;
$pic=0;

if($ARGV[0] =~ /^(-h|--help|-help)$/) {
    print STDERR "usage: msof2gcc.pl [-locallabels|-macro|-labelsnum|-pic] [ prog.c ] > prog2.c\n";
    exit 0;
}


do {
    $cont=1;
    if($ARGV[0] eq "-macro") {
	$macro = 1;
	shift @ARGV;
    } elsif ($ARGV[0] eq "-locallabels") {
	$locallabels = 1;
	shift @ARGV;
    } elsif ($ARGV[0] eq "-labelsnum") {
	$labelsnum = 1;
	shift @ARGV;
    } elsif ($ARGV[0] eq "-pic") {
	$pic = 1;
	shift @ARGV;
    } else {
	$cont=0;
    }
} while($cont);


$registers="(ax|eax|ebx|ecx|edi|edx|esi|mm0|mm1|mm2|mm3|mm4|mm5|mm6|mm7".
    "|xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7)"; 

$numbloc=0;

sub init_bloc {
    $numbloc++;
    %identifiers_bloc=();
    %registers_bloc=();
    if($locallabels) {
	%labels=();
	$nlabel=0;
    }
}

sub end_bloc {
    my $first=1;
    my $fin="";
    if($macro) {
	$fin.="\".endm\\n\\t\"  \"BB${numbloc}\\n\\t\" ";
    }
    $fin.=": :";
    if(not $samenumline) {$fin.="\n";}
    foreach(keys %identifiers_bloc) {
	if(not $first) {$fin.=",";} else{$first=0}
	$fin.="[$identifiers_bloc{$_}] \"m\" ($_)";
    }
    if(not $samenumline) {$fin.="\n";}
    $fin.=" :";
    if(not $samenumline) {$fin.="\n";}
    foreach(keys %registers_bloc) {
	if($pic && ($_ eq "ebx")) { #is it correct ?
	    $bloc="\"push %%$_\\n\\t\" $bloc \"pop %%$_\\n\\t\"";
	} else {
	    $fin.="\"%$_\",";
	}
    }
    $fin.="\"memory\"";
    if(not $samenumline) {$fin.="\n";}
    $bloc.=$fin;
}

sub conv_identifier {
    my ($arg);
    $arg=$_[0];
    if(not defined $identifiers_bloc{$arg}) {
	$_=$arg;
	s/\./_/g;
	$identifiers_bloc{$arg}=$_;
    }
    #print STDERR "$arg\n";
    return "%[$identifiers_bloc{$arg}]";
}

sub conv_register {
    my ($arg);
    $arg=$_[0];
    $registers_bloc{$arg}=1;
    return "%%$arg";
}

if($locallabels) {
    sub conv_label {
	my ($arg, $label);
	$arg=$_[0];
	$label=$_[1];
	if(not defined $labels{$arg}) {
	    $labels{$arg}="${nlabel}f";
	    $nlabel++;
	}
	if($label) {
	    $_=$labels{$arg};
	    s/f$//;
	    $labels{$arg}="${_}b";
	    return $_;
	}
	return "$labels{$arg}";
    }
}

sub convert_formula {
    my ($arg,$ret,$reg,$reg2);
    $arg=$_[0];
    #print STDERR "formula=$arg\n";
    $_=$arg;
    if (m/^[a-zA-Z_][a-zA-Z_0-9]*$/) {
	if(m/^$registers$/i) {
	    $ret=conv_register($arg);
	} else {
	    $ret=conv_identifier($arg);
	}
    } else {
	$_=$arg;
	#print STDERR "DEBUT BOUCLE\n";
	while(m/^(.*[^%a-zA-Z\[])??([a-zA-Z][a-zA-Z0-9]*)([^a-zA-Z0-9].*)??$/) {
	    $reg=$2;
	    $_=$reg;
	    if(m/^$registers$/i) {
		$reg2=conv_register($reg);
	    } else {
		$reg2=conv_identifier($reg);
	    }
	    #print STDERR "$reg --> $reg2\n";
	    $_=$arg;
	    s/^((.*[^%a-zA-Z])??)$reg(([^a-zA-Z0-9].*)??)$/\1$reg2\3/;
	    $arg=$_;
	}
	#print STDERR "FIN BOUCLE\n";
	$ret=$_;
    }
    return $ret;
}

sub convert_arg {
    my ($arg,$arg2,$pref,$suff);
    $arg=$_[0];
    #print STDERR "__________ $arg _______________\n";
    if($arg eq "") { return "";}
    $_=$arg;

    if(m/^(dword|byte|qword)[\s]+ptr[\s]*\[([^\]]*)\]$/i) {
	$pref=$1;
	$suff=$2;
	if($suff =~ m/^[a-zA-Z_][a-zA-Z_0-9]*$/ and
	   not $suff =~ m/^$registers$/i) {
	    #this generates dword ptr[dword ptr[...]]
	    #it compiles but is it correct ?
	    #$arg2="$pref ptr[".conv_identifier($suff)."]";
	    $arg2=conv_identifier($suff);
	    $NOTSURE=1;
	} else {
	    $suff=convert_formula($suff);
	    if(not "$cmd" =~ /^(movq|movd|punpckldq)$/) {
		$arg2="$pref ptr[$suff]";
	    } else {
		$arg2="[$suff]"; # est-ce correct ??
		$NOTSURE=1;
	    }
	}
    } elsif (m/^\[([^\]]*)\]$/) {
	$arg2="[".convert_formula($1)."]";
    } elsif (m/^[a-zA-Z_][a-zA-Z_0-9]$*/) {
	if(m/^$registers$/i) {
	    $arg2=conv_register($arg);
	} else {
	    $arg2=conv_identifier($arg);
	}
    } elsif (m/^[0-9a-fA-F][0-9a-fA-Fhx]*$/) {
	$arg2=$arg;
    } else {
	print STDERR "BIG PB with __${arg}__";
	exit(1);
    }
    #print STDERR "'${arg}' --> '${arg2}'\n";
    #print STDERR "${arg}\n"; # to generate the registers !!
    return $arg2;
}


sub convert {
    my ($micros,$args,$arg1,$arg2); #MANDATORY parenthesis
    $NOTSURE=0;
    $micros=$_[0];
    #print STDERR "inst=\@\@${micros}\@\@\n";
    $_=$micros;
    if(!m/([a-zA-Z_][a-zA-Z_0-9]*:??)([\s]+(([^\s].*)??))??$/) {
	print STDERR "BIG PB avec __${micros}__\n";
	exit(1);
    }
    $cmd=$1;
    $args=$3;
    
    #Some command alter registers which are not in the argument
    if($cmd eq "cpuid") {
	#$bloc.="\"xor %%eax, %%eax\\n\\t\" ";
	foreach("eax", "ebx","ecx","edx") {
	    conv_register($_);
	}
    } elsif($cmd eq "rdtsc") {
	conv_register("eax","edx");
    }

    if($cmd =~ /:$/) {
	if($macro) {
	    $_=$cmd; s/:$//; $cmd=$_;
	    $gcc="B$numbloc.$cmd.\\\\\@: $args";
	} elsif($locallabels) {
	    my ($label);
	    $_=$cmd; s/:$//; $cmd=$_;
	    $label=conv_label($cmd,1);
	    $gcc="$label: $args";
	} elsif($labelsnum) {
	    $_=$cmd; s/:$//; $cmd=$_;
	    $gcc="${cmd}_%=: $args";
	} else {
	    $gcc="B$numbloc.$cmd $args";
	}
    } elsif($cmd eq "align") {
	$gcc=".align $args";
    } elsif($args eq "") {
	$gcc="$cmd $args";
    } else {
	$_=$args;
	if(! m/^([^,]*[^\s])([\s]*,[\s]*([^\s][^,]*))??$/) {
	    print STDERR "BIG PB avec __${args}__\n";
	    exit(1);
	}
	$arg1=$1;
	$arg2=$3;
	if($cmd eq jne && $arg1 =~ /^near/) {
	    #$cmd="jne near";
	    $_=$arg1;
	    /^near\s+(([^s].*)?)$/;
	    $arg1=$1;
	}
	
	#print STDERR "arg1=$arg1 arg2=$arg2\n";
	#faudra peut-etre traiter les labels aussi....
	if(not $cmd =~ m/^(jmp|jnz|jg|jl|jne|jne near|jz|jb)$/i) {
	    $arg1=convert_arg($arg1);
	    $arg2=convert_arg($arg2);
	} else {
	    if($macro) {
		$arg1="B$numbloc.$arg1.\\\\\@";
	    } elsif($locallabels) {
		$arg1=conv_label($arg1,0);
	    } elsif($labelsnum) {
		$arg1="${arg1}_%=";
	    } else {
		$arg1="B$numbloc.$arg1";
	    }
	}
	if($arg2 ne "") {
	    $args="$arg1, $arg2";
	} else {
	    $args=$arg1;
	}
	$gcc ="$cmd $args";
    }
    
    if($NOTSURE and $PARANO) {
	print STDERR "'${micros}' --> '${gcc}' NOT SURE\n";
	if(not $samenumline) {
	    $gcc = "$gcc \\n\\t\"\n\"\#\# NOT SURE originally ${micros}";
	}
    }
    #print STDERR "'${micros}' --> '${gcc}'\n";
    return $gcc;
}

while(<>) {
    s/(0x[0-9a-fA-F]{9,16})(\b|[^0-9a-fA-FL])/\1LL/g;
    s/(0x[0-9a-fA-F]{9,16})L(\b|[^L])/\1LL/g;
    s/\bconst\s+((unsigned\s+)?__int64)\b/\1/g;
    s/\b_asm/__asm/;
    if(/__asm/) {
	if(m/__asm[\s].*;/) {
	    s/__asm[\s]+(.*)[\s]*;/__asm__ __volatile__\(\"\1\\n\\t\"\);/;
	    print;
	    next;
	}
	s/__asm/__asm__ __volatile__/;
	while(! m/\{/) {
	    print;
	    $_=<>; # must be in scalar context
	}
	chomp;
        m/^(.*)\{(.*)$/;
	print $1;
	$bloc="$2\n";
	init_bloc;
	if($macro) {
	    $bloc="\"\.macro BB$numbloc\\n\\t\" ".$bloc;
	}
	$_=<>;
	chomp;
	do {
	    s/__asm\s*(.*);/\1/;
	    s/^([\s])*([a-zA-Z])/\1\"\2/;
	    if(!m&^(.*)\"([^/]+(/[^/]+)*[^\s])([\s]*(//.*)??)$&) {
		$bloc.="$_\n";
	    } else {
		$avantassemb=$1;
		$assemb=$2;
		$apresassemb=$4;
		chomp($assemb);
		$assemb2=convert($assemb);
		$bloc.="$avantassemb\"$assemb2\\n\\t\"$apresassemb\n";
	    }
	    $_=<>; # must be in scalar context
	    chomp;
	} while(! m/\}/);
	end_bloc;
	m/^(.*)\}(.*)$/;
	$bloc.=$1;
	print "($bloc);";
	print "$2\n";
    }
    else {
	print;
    }
}

