/* * dis.cpp * Patrick Ytzen * Last edited: 3/30/06 * Purpose: * To take a SPARC machine language and to disassemble it * into SPARC assembly language * * Assumptions: * Synthetic instructions and pseudo-ops are not translated * into their synthetic forms * ie. ret -> jumpl %o7 + 8, %g0 * refer to pg473 * %sp and %fp are represented as %o6 and %i6 respectivley */ //include statements #include #include #include using namespace std; string format1(unsigned int, unsigned int); string format2(unsigned int, unsigned int); string format3(unsigned int); string format3b(unsigned int); string reg(unsigned int); string frmt3a_cmd(string, unsigned int, string, string); string frmt3b_cmd(string, unsigned int, string, string); string frmt2_cmd(unsigned int); std::string itoa(int value, int base); //main entry point int main(int argc, char ** argv){ string outf; //temporary string for new filename string str; //string that will be written to file unsigned int working; //working line of machine code (in Hex) unsigned int op; //the operation (first two bytes of the binary string unsigned int pc = 0x0; //program counter //opening the file given from the command line //error in usage and for nonexistant file if (argc < 2) { cerr << "Usage: dasm " << endl; return 1; } fstream in(argv[1], ios::in|ios::binary); if (!in.good()) { cerr << "File not opened ..." << endl; return 2; } //changes the file name from .o to .s outf = argv[1]; outf.replace(outf.length()-1,1,"s"); //opens the out file fstream out(outf.c_str(), ios::out|ios::binary); //while there are still instructions left to read in //decode into assembly language while(in.read((char *) &working, sizeof(working))){ /*shift the bytes down 30 to isolate the top two bits and zero out above those bits*/ op = (working >> 30) & 0x3; switch (op){ case 0x0: //format 2 (branching or sethi) str =format2(working, pc); break; case 0x1: //format 1 (call instruction) str = format1(working, pc); break; case 0x2: //Format 3 (page 235) str = format3(working); break; case 0x3: //Format 3 (page 236) str = format3(working); break; } //used for debugging purposes making sure that the correct string // will be written to the file. cout << "<0x" << hex << pc << "> " << str << endl; //write the string to the file and increment the program counter out.write((char *) &str, sizeof (str)); pc += 0x4; } return 0; }//end main /* Format 1 instructions will take a format one instruction in bits 30 bits and take the nessecary parts and decode into a call instruction */ string format1(unsigned int inst, unsigned int pc){ string str; //output string unsigned int disp; //displacement unsigned int label; //location of the label //isolate the displacement //and find the label with the program counter disp = inst & 0x3fffffff; label = (disp << 2) + pc; //construct the call statement with nessesary parts str = "call "; str += "0x"; str += itoa(label,16); return str; }//end format1 /*Format 2 Instructions will take a format two instruction in 30 bits and take the nessesary parts to decode into a sethi instruction or a branch instruction */ string format2(unsigned int inst, unsigned int pc){ string str; //output string unsigned int a; //annuled bit` unsigned int cond; //cond field unsigned int op2; //op2 field unsigned int disp; //displacement string as = ""; //temporary strings string tmp = ""; //decide sethi or branc op2 = (inst >> 22) & 0x7; if(op2 == 0x2){ /*integer branch decode annuled bit cond bit and get the command get the displacement then the label construct the instruction */ a = (inst >> 29) & 0x1; cond = (inst >> 25) & 0xf; as = frmt2_cmd(cond); disp = inst & 0x3fffff; disp = (disp << 2) + pc; if(a == 1) as+= ",a"; str = as + " " + "0x" + itoa(disp,16); }else{ /*sethi decode the destination register then the number going to the register and construct the instruction */ a = (inst >> 25) & 0x1f; as = reg(a); disp = (inst & 0x3fffff); str = "sethi "; tmp = "0x"; str = str + tmp; str = str + itoa(disp, 16); tmp = ", "; str = str + tmp + as; } //return the instruction return str; }//end format2 /*format 3 instruction will take a 32 bit instruction and translate the nexssesary parts to create a format 3 instruciton in SPARC */ string format3(unsigned int inst){ unsigned int op3; //op3 field unsigned int i; //i bit string str; //output string string rd_s; //destination register string string rs1_s; //rs1 register string string imm_s; //immediate 13 string unsigned int op; //op field //decode the op field op = (inst >> 30) & 0x3; //translate the immediate 13 and take care of sign bit issues int imm = (inst & 0x1fff) | ((inst & 0x1000)>>12 ? 0xffffe000 : 0); /*decode the desination register the op3 field the rs1 register and the i bit*/ rd_s = reg((inst >> 25) & 0x1f); op3 = (inst >> 19) & 0x3f; rs1_s = reg((inst >> 14) & 0x1f); i = (inst >> 13) & 0x1; //decide if it is an immediate integer or register if(i == 1){ //decode the integer into a string imm_s = itoa(imm, 10); }else{ //decode the register imm_s = reg(inst & 0x3f); } //take the op field to send to the correct function if(op == 0x2){ //op == 2 str = frmt3a_cmd(rd_s, op3, rs1_s, imm_s); }else{ //op == 3 str = frmt3b_cmd(rd_s, op3, rs1_s, imm_s); } //return string return str; }//end format3 /* Register will take a 5 bit string and decode it into the correct register. %fp %sp are not returnd will return as %o6 and %i6 */ string reg(unsigned int regist){ //get the register type and the number unsigned int reg_typ = (regist >> 3) & 0x3; unsigned int reg_num = (regist & 0x7); string reg_name; //register name //switch on the first two bits to decide register letter //then convert the number to a string switch(reg_typ){ case 0x0: //g reg_name = "%g" + itoa(reg_num, 10); break; case 0x1: //o reg_name = "%o" + itoa(reg_num, 10); break; case 0x2: //l reg_name = "%l" + itoa(reg_num, 10); break; case 0x3: //i reg_name = "%i" + itoa(reg_num, 10); break; } //return full register name return reg_name; }//end reg /*format 3a command will take the destination register, op3 field, rs1 field and the immediate 13 field and return correct command from teh op = 0x2 table */ string frmt3a_cmd(string rd, unsigned int op3, string rs1, string imm){ string str; //output string //switch on the op3 field to find correct command switch (op3){ case 0x0: str = "add"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x1: str = "and"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x2: str = "or"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x3: str = "xor"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x4: str = "sub"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x5: str = "andn"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x6: str = "orn"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x7: str = "xnor"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x8: str = "addx"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0xc: str = "subx"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x10: str = "addcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x11: str = "andcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x12: str = "orcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x13: str = "xorcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x14: str = "subcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x15: str = "andncc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x16: str = "orncc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x17: str = "addxcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x1c: str = "subxcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x20: str = "taddcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x21: str = "tsubcc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x22: str = "taddcctv"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x23: str = "tasubcctv"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x24: str = "mulscc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x25: str = "sll"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x26: str = "srl"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x27: str = "sra"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x28: str = "rdy"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x29: str = "rdpsr"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x2a: str = "rdwim"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x2b: str = "rdtbr"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x30: str = "wry"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x31: str = "wrpsr"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x32: str = "wrwim"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x33: str = "wrtbr"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x34: str = "fpop1"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x35: str = "fpop2"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x36: str = "cpop1"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x37: str = "cpop2"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x38: str = "jmpl"; str = str + " " + rs1 + " + " + imm + ", " + rd; break; case 0x39: str = "rett"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x3a: str = "ticc"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x3b: str = "iflush"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x3c: str = "save"; str = str + " " + rs1 + ", " + imm + ", " + rd; break; case 0x3d: str = "restore"; break; } //construct the instruction and return return str; }//end format3a_cmd /*format 3 b instruction will take the destination register, op3 field, rs1 field and immediate 13 field and create the correct format 3 instruction from the op == 0x3 table */ string frmt3b_cmd(string rd, unsigned int op3, string rs1, string imm){ string str; // output string //switch on op3 to decode correct command //and create instruction from there. switch (op3){ case 0x0: str = "ld"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x1: str = "ldub"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x2: str = "lduh"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x3: str = "ldd"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x4: str = "st"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x5: str = "stb"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x6: str = "sth"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x7: str = "std"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x9: str = "ldsb"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0xa: str = "ldsh"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0xd: str = "ldstub"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0xf: str = "swap"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x10: str = "lda"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x11: str = "lduba"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x12: str = "lduha"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x13: str = "ldda"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x14: str = "sta"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x15: str = "stba"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x16: str = "stha"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x17: str = "stda"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x19: str = "ldsba"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x1a: str = "ldsha"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x1d: str = "ldstuba"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x1f: str = "swapa"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x20: str = "ldf"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x21: str = "ldfsr"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x23: str = "lddf"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x24: str = "stf"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x25: str = "stfsr"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x26: str = "stdfq"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x27: str = "stdf"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x30: str = "ldc"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x31: str = "ldcsr"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x33: str = "lddc"; str = str + " [" + rs1 + " + " + imm + "], " + rd; break; case 0x34: str = "stc"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x35: str = "stcsr"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x36: str = "stdcq"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; case 0x37: str = "stdc"; str = str + " " + rd + ", [" + rs1 + " + " + imm + " ]"; break; } //return instruction return str; }//end format3 command /*Format 2 command will take an unsigned integer and decode the correct format 2 command */ string frmt2_cmd(unsigned int inst){ string str; //output string //switch on instruciton to get correct command switch(inst){ case 0x0: str = "bn"; break; case 0x1: str = "be"; break; case 0x2: str = "ble"; break; case 0x3: str = "bl"; break; case 0x4: str = "bleu"; break; case 0x5: str = "blu"; break; case 0x6: str = "bneg"; break; case 0x7: str = "bvs"; break; case 0x8: str = "ba"; break; case 0x9: str = "bne"; break; case 0xa: str = "bg"; break; case 0xb: str = "bge"; break; case 0xc: str = "bgu"; break; case 0xd: str = "bgeu"; break; case 0xe: str = "bpos"; break; case 0xf: str = "bvc"; break; } //return command return str; }//end format2 command /* Takes a integer value and will convert it into a string paying attention to the base that is given (base 10, base 16, base 2, etc) unoriginal code - taken from http://www.jb.man.ac.uk/~slowe/cpp/itoa.html */ std::string itoa(int value, int base) { enum { kMaxDigits = 35 }; std::string buf; buf.reserve( kMaxDigits ); // Pre-allocate enough space. // check that the base if valid if (base < 2 || base > 16) return buf; int quotient = value; // Translating number to string with base: do { buf += "0123456789abcdef"[ std::abs( quotient % base ) ]; quotient /= base; } while ( quotient ); // Append the negative sign for base 10 if ( value < 0 && base == 10) buf += '-'; std::reverse( buf.begin(), buf.end() ); return buf; }//end itoa