![]() |
Resources and Decompiling ThemWritten by Martin Lafaix |
IntroductionWhat's that? OS/2 comes with a resource compiler, RC, which allows us to put resources in an executable file; but, it would sometimes be useful to do just the opposite, namely, extracting resources from an executable (or from a DLL).Why? If we were able to extract resources, it would help us adapting this lovely little tool, which unfortunately has all its messages and menus in, say, Chinese... :-) and which is no longer supported by its author. Or, it would allow us to correct those lovely typographical errors in the base OS/2 system, too. (At least, the French version includes some boring typos, in menu items and shortcuts :-( ) Or, it would even allow us to grab some lovely dialog box and include it in our wonderful projects. Contents This article contains four parts. The first one describes the general executable file structure, the second one describes the 16-bit EXE structure, the third describes the 32-bit EXE structure, and the fourth describes the RES to RC translation. Sample code will be given in REXX, which will use many user-defined functions, such as readw() or readl(). A (short) bibliography can be found at the end of this document. A Voyage to OMFIn the following sections, "EXE" will be used as a generic term for .EXE or .DLL files. The Old DOS Header Each EXE starts with an old DOS header. Two fields interest us, namely e_magic and e_lfanew. The first one contains "MZ" and allowed us to recognize the EXE header, and the second one contains the offset of the new EXE header, which is where the fun begins. The remaining fields are used for a DOS 'stub', that is, a program which displays a message like: This program cannot be run in a DOS session. This message is displayed whenever the program is run from vanilla DOS. struct exe_hdr /* DOS 1, 2, 3 .EXE header */ { unsigned short e_magic; /* Magic number */ unsigned short e_cblp; /* Bytes on last page of file */ unsigned short e_cp; /* Pages in file */ unsigned short e_crlc; /* Relocations */ unsigned short e_cparhdr; /* Size of header in paragraphs */ unsigned short e_minalloc; /* Minimum extra paragraphs needed */ unsigned short e_maxalloc; /* Maximum extra paragraphs needed */ unsigned short e_ss; /* Initial (relative) SS value */ unsigned short e_sp; /* Initial SP value */ unsigned short e_csum; /* Checksum */ unsigned short e_ip; /* Initial IP value */ unsigned short e_cs; /* Initial (relative) CS value */ unsigned short e_lfarlc; /* File address of relocation table */ unsigned short e_ovno; /* Overlay number */ unsigned short e_res[ERES1WDS];/* Reserved words */ unsigned short e_oemid; /* OEM identifier (for e_oeminfo) */ unsigned short e_oeminfo; /* OEM information; e_oemid specific */ unsigned short e_res2[ERES2WDS];/* Reserved words */ long e_lfanew; /* File address of new exe header */ };Figure 1. The DOS 1, 2, 3 .EXE header. Recognizing it from REXX In the following code, infile contains the EXE filename. base will then contain the new header offset. if charin(infile,,2) = 'MZ' then base = 1+l2d(charin(infile,61,4)) Exploring 16-Bit HeadersRecognizing a 16-bit EXE header A 16-bit EXE header starts with the 'NE' magic number; a 16-bit EXE structure starts with this magic number:struct new_exe /* New .EXE header */ { unsigned short ne_magic; /* Magic number NE_MAGIC */ unsigned char ne_ver; /* Version number */ unsigned char ne_rev; /* Revision number */ unsigned short ne_enttab; /* Offset of Entry Table */ unsigned short ne_cbenttab; /* Number of bytes in Entry Table */ long ne_crc; /* Checksum of whole file */ unsigned short ne_flags; /* Flag word */ unsigned short ne_autodata; /* Automatic data segment number */ unsigned short ne_heap; /* Initial heap allocation */ unsigned short ne_stack; /* Initial stack allocation */ long ne_csip; /* Initial CS:IP setting */ long ne_sssp; /* Initial SS:SP setting */ unsigned short ne_cseg; /* Count of file segments */ unsigned short ne_cmod; /* Entries in Module Reference Table */ unsigned short ne_cbnrestab; /* Size of non-resident name table */ unsigned short ne_segtab; /* Offset of Segment Table */ unsigned short ne_rsrctab; /* Offset of Resource Table */ unsigned short ne_restab; /* Offset of resident name table */ unsigned short ne_modtab; /* Offset of Module Reference Table */ unsigned short ne_imptab; /* Offset of Imported Names Table */ long ne_nrestab; /* Offset of Non-resident Names Table */ unsigned short ne_cmovent; /* Count of movable entries */ unsigned short ne_align; /* Segment alignment shift count */ unsigned short ne_cres; /* Count of resource entries */ unsigned char ne_exetyp; /* Target operating system */ unsigned char ne_flagsothers; /* Other .EXE flags */ char ne_res[NERESBYTES]; /* Pad structure to 64 bytes */ };Figure 2. The OS/2 286 .EXE header. The following fields interest us:
More information on the 16-bit EXE header can be found in NEWEXE.H, which comes with the Developer's toolkit. Unfortunately, it isn't very informative. Extracting a Resource From The EXE The process of extracting resources from an EXE to a RES file is quite simple. We walk through the resource table (rsrctab), and, for each entry, we find and emit the corresponding segment. (We have to twiddle the segment flag and create a small header for the resource, but that's not a big deal.)
Exploring 32-Bit HeadersRecognizing a 32-bit EXE headerA 32-bit EXE header starts with the 'LX' magic number; a 32-bit structure starts with this magic number: struct e32_exe /* New 32-bit .EXE header */ { unsigned char e32_magic[2]; /* Magic number E32_MAGIC */ unsigned char e32_border; /* The byte ordering for the .EXE */ unsigned char e32_worder; /* The word ordering for the .EXE */ unsigned long e32_level; /* The EXE format level for now = 0 */ unsigned short e32_cpu; /* The CPU type */ unsigned short e32_os; /* The OS type */ unsigned long e32_ver; /* Module version */ unsigned long e32_mflags; /* Module flags */ unsigned long e32_mpages; /* Module # pages */ unsigned long e32_startobj; /* Object # for instruction pointer */ unsigned long e32_eip; /* Extended instruction pointer */ unsigned long e32_stackobj; /* Object # for stack pointer */ unsigned long e32_esp; /* Extended stack pointer */ unsigned long e32_pagesize; /* .EXE page size */ unsigned long e32_pageshift; /* Page alignment shift in .EXE */ unsigned long e32_fixupsize; /* Fixup section size */ unsigned long e32_fixupsum; /* Fixup section checksum */ unsigned long e32_ldrsize; /* Loader section size */ unsigned long e32_ldrsum; /* Loader section checksum */ unsigned long e32_objtab; /* Object table offset */ unsigned long e32_objcnt; /* Number of objects in module */ unsigned long e32_objmap; /* Object page map offset */ unsigned long e32_itermap; /* Object iterated data map offset */ unsigned long e32_rsrctab; /* Offset of Resource Table */ unsigned long e32_rsrccnt; /* Number of resource entries */ unsigned long e32_restab; /* Offset of resident name table */ unsigned long e32_enttab; /* Offset of Entry Table */ unsigned long e32_dirtab; /* Offset of Module Directive Table */ unsigned long e32_dircnt; /* Number of module directives */ unsigned long e32_fpagetab; /* Offset of Fixup Page Table */ unsigned long e32_frectab; /* Offset of Fixup Record Table */ unsigned long e32_impmod; /* Offset of Import Module Name Table */ unsigned long e32_impmodcnt; /* Number of entries in ImportFigure 3. The OS/2 386 .EXE header. The following fields interest us:
Tables and Maps RelationsIn this section, we will view the various relations between tables and maps.
Figure 4. Tables and maps relations. To find the data of a resource r, we first have to read the corresponding entry in the resource table. The obj field of this entry allows us to find the object which contains the data. The pagemap field of the object table entry then allows us to locate the object's pages in the EXE, via the page map. To find the resource data, we then just have to read cb bytes from object, starting at offset. Extracting a Resource From The EXE The process of extracting resources from a 32-bit EXE to a RES file is similar to the 16-bit EXE to RES conversion. We walk through the resource table (rsrctab), and, for each entry, we find and emit the corresponding resource. (We have to twiddle the object flag and create a small header for the resource, but that's not a big deal.) Note: In the following code, we'll assume that objects span over consecutive pages. That is, we will not handle the case where the object's pages are arranged discontinuously in the EXE.
The RES to RC TranslationAnd now the last part. First, we will describe the RES format, and we will then describe some resources data. We will focus our interest on the human-readable resources, such as menus, stringtables and so on. Note: An important exception will be the dialog templates, for the following two reasons:
And we will finally describe briefly the included resource decompiler, rdc. The RES format The RES file is an aggregate of resources. Each resource is composed of a header, followed by the resource data.
Figure 5. The resource header. Type is the resource type (see below). Id is the resource name/identifier. Flags is the resource attributes (MOVEABLE, LOADONCALL, ...). cb is the resource size. The following types may appear: RT_POINTER 1 /* mouse pointer shape */ RT_BITMAP 2 /* bitmap */ RT_MENU 3 /* menu template */ RT_DIALOG 4 /* dialog template */ RT_STRING 5 /* string tables */ RT_FONTDIR 6 /* font directory */ RT_FONT 7 /* font */ RT_ACCELTABLE 8 /* accelerator tables */ RT_RCDATA 9 /* binary data */ RT_MESSAGE 10 /* error msg tables */ RT_DLGINCLUDE 11 /* dialog include file name */ RT_VKEYTBL 12 /* key to vkey tables */ RT_KEYTBL 13 /* key to UGL tables */ RT_CHARTBL 14 /* glyph to character tables */ RT_DISPLAYINFO 15 /* screen display information */ RT_FKASHORT 16 /* function key area short form */ RT_FKALONG 17 /* function key area long form */ RT_HELPTABLE 18 /* Help table for Cary Help manager */ RT_HELPSUBTABLE 19 /* Help subtable for Cary Help manager */ RT_FDDIR 20 /* DBCS uniq/font driver directory */ RT_FD 21 /* DBCS uniq/font driver */Other values for type denote user-defined resources. Resource data format We first have to read the resource header: res2rc: /* convert .RES format to .RC */ call skip 1 /* skipping the 'FF'x */ rt = readw() /* the resource type */ call skip 1 /* skipping the 'FF'x */ id = readw() /* the resource ID/name */ opt = readw() /* the resource flag */ cb = readl() /* the resource data size */Then, according to the resource type, we'll have to do specific operations: select when rt = 1 then call emit 'POINTER 'id' 'option()' 'file('ptr')nl when rt = 2 then call emit 'BITMAP 'id' 'option()' 'file('bmp')nl when rt = 7 then call emit 'FONT 'id' 'option()' 'file('fon')nlIf the resource is a pointer, a bitmap, a font or an icon, the resource data is the corresponding pointer, bitmap, font or icon. We then just have to recreate a file containing this data. when rt = 3 then do; call emit 'MENU 'id' 'option()nl'BEGIN'nl; call emit menuout(' ')nl'END'nl; endIf the resource is a menu, it's not that simple :-) The resource data is the corresponding menu structure:
cb is the size of the menu data. type is the menu type (Only 0 and 1 are valid). cp is the menu code page (850 by default). offs is the starting offset of the menu data, from the start of the structure. count is the number of item composing the menu. If the menu type is 1, count is followed by another 16-bit field, ppoffs (presentation parameter offset, from the start of the structure). But we won't handle type 1 menus, so... Every item has the following format:
style is the item style (MIS_*). attrib is the item attributes (MIA_*). Id is the item identifier. If the item contains data (that is, if item is a submenu, or has the MIS_BITMAP, MIS_STRING or ... style), then the previous structure is followed by the corresponding data: MIS_SUBMENU Data is a menu structure, as previously defined. MIS_STRING Data is a null-terminated string. MIS_BITMAP Data can be any of the following: 'FF'x, followed by a 16-bit word, representing the resource identifier. '00'x. No resource identifier provided. "#", and subsequent characters make up the decimal representation of the resource identifier.So, if the resource is a menu, we will have to emit each item, recursively (as a menu can contain a submenu, ...). when rt = 5 then call emit 'STRINGTABLE 'option()nl'BEGIN'strout()'END'nl when rt = 10 then call emit 'MESSAGETABLE 'option()nl'BEGIN'strout()'END'nlIf the resource is a stringtable or a messagetable, then we have to emit the corresponding table. Each string/messagetable contains up to 16 strings. (In a RC file, you can have more than one stringtable, with more than 16 strings, but rc does not preserve your ordering -- string IDs are maintained, though.) In the RES file, STRINGTABLE data looks like the following:
Each string is zero-terminated. If len is zero, the string does not exists. when rt = 8 then do; call emit 'ACCELTABLE 'id' 'option()nl'BEGIN'nl||keyout()'END'nl; endIf the resource is an acceltable, then we have to emit the corresponding table. ACCELTABLE resource data looks like the following:
count is the number of keys in the acceltable. cp is the acceltable codepage. And the type/key/cmd triplets describe the accel-keys : type is the key's type (VIRTUALKEY, shifted, ...). key is the key's value (VK_F1, "a", ...). cmd is the accel command. when rt = 11 then do; call emit 'DLGINCLUDE 'id' 'charin(infile,,cb)nl; cb = 0; endIf the resource is a dlginclude statement, then the resource data will contain the included file name. Note: This information is of little value if you don't have the included file... when rt = 18 then call emit 'HELPTABLE 'id||nl'BEGIN'htout()'END'nlIf the resource is a helptable, then the resource data will contain the following:
wnd n is the application window ID. sub n is the help subtable ID. ext n is the extended help panel ID. when rt = 19 then call emit 'HELPSUBTABLE 'id||hstout()nlIf the resource is a HELPSUBTABLE, then the resource data will contain the following:
Each subitem contains size items (the default size value is 2): wnd n is the child window ID. help n is the help panel ID. And, if size is more than 2, the remaining integers have an application-defined meaning. otherwise call emit 'RESOURCE 'rt' 'id' 'option()' 'file('dat')If the resource is of any other type, then we emit the 'RESOURCE' generic statement, and we put the resource data in a .DAT file. The rc compiler will handle that gracefully. :-) end /* select */ The Resource decompiler The interesting part, at last! A resource decompiler (named rdc.cmd) is provided in rdc.zip. It's usage is as follow: Usage: rdc [Figure 6. The resource decompiler usage. Note: Please note the following:
SummaryIn the previous parts, we have seen how to extract resources from an executable (a .EXE or a .DLL), and how to extract some resources from a .RES file, as we have focused our interest on the 'human-readable' resources. While I realize there are still many obscure points, I hope you will find the included information useful. And I'll try my best to answer all questions on it.
BibliographyOn EXE format
Functionsoutname This function, which requires two parameters, returns an output filename (if one does not already exists).outname: /* return name made from infile and extension */ if outfile = '' then if lastpos('.',arg(1)) > lastpos('\',arg(1)) then outfile = left(arg(1),lastpos('.',arg(1)))arg(2) else outfile = arg(1)'.'arg(2) return outfile readw This function reads one word (two bytes) from current file position. The file's position is updated. readw: /* read one word from infile */ return w2d(charin(infile,,2))readl This function reads one long word (four bytes) from current file position. The file's position is updated. readl: /* read one long from infile */ return l2d(charin(infile,,4))skip This function skips arg(1) chars in current file. The file's position is updated. skip: /* skip arg(1) chars */ return charin(infile,,arg(1))bit This function returns bit arg(2) of arg(1). arg(1) can contain up to 32 bits. Note: bits are numbered from left to right. bit: /* return bit arg(2) of arg(1) */ return substr(x2b(d2x(arg(1),4)), arg(2),1)w2d This function translates a little-endian word to a REXX integer. w2d: /* little-endian word to decimal */ w = c2x(arg(1)) return x2d(substr(w,3,2)substr(w,1,2))d2w This function translates a REXX integer to a little-endian word. d2w: /* decimal to little-endian word */ w = d2x(arg(1),4) return x2c(substr(w,3,2)substr(w,1,2))l2d This function translates a little-endian long word to a REXX integer. l2d: /* little-endian long to decimal */ l = c2x(arg(1)) return x2d(substr(l,7,2)substr(l,5,2)substr(l,3,2)substr(l,1,2))d2l This function translates a REXX integer to a little-endian long word. d2l: /* decimal to little-endian long */ l = d2x(arg(1),8) return x2c(substr(l,7,2)substr(l,5,2)substr(l,3,2)substr(l,1,2))emit This function writes arg(1) to output file. emit: /* write data to output file */ return charout(outfile,arg(1))option This function translates the option's attributes into a RC string. option: /* convert flags to option string */ if bit(opt,10) then r = 'PRELOAD'; else r = 'LOADONCALL' if bit(opt,12) then r = r' MOVEABLE' if bit(opt, 4) then r = r' DISCARDABLE' if \ (bit(opt,4) | bit(opt,12)) then r = r' FIXED' if r = 'LOADONCALL MOVEABLE DISCARDABLE' then r = '' return rfile This function creates a new file, with extension arg(1), and fill it with cb bytes of infile. file: /* write cb bytes to res_xxx.arg(1) */ r = 'res_'right(fnum,4,'0')'.'arg(1) call charout r,charin(infile,,cb) fnum = fnum+1; cb = 0 call stream r,'c','close' return rstrout This function extracts a string/messagetable definitions, and returns a string containing the table. strout: /* extract strings definitions */ call skip 2 id = (id-1)*16; cb = cb-2; r = nl do while cb > 0 len = x2d(c2x(charin(infile,,1))) if len > 1 then r = r' 'left(id,8)'"'charin(infile,,len-1)'"'nl call skip 1 id = id+1; cb = cb-len-1 end /* do */ return rkeyout This functions extracts an acceltable definition, and returns a string containing the acceltable. keyout: /* extract acceltable definitions */ procedure expose nl cb infile outfile cnt = readw() cp = readw() cb = cb-4 if cp \= 850 then call emit arg(1)'CODEPAGE 'cp||nl do cnt typ = readw() key = readw() if \ bit(typ,15) & key >= 32 & key <= 255 then key = '"'d2c(key)'"'; else key = '0x'd2x(key) cmd = readw() cb = cb-6; t = '' if bit(typ,16) then t = t', CHAR' if bit(typ,15) then t = t', VIRTUALKEY' if bit(typ,14) then t = t', SCANCODE' if bit(typ,13) then t = t', SHIFT' if bit(typ,12) then t = t', CONTROL' if bit(typ,11) then t = t', ALT' if bit(typ,10) then t = t', LONEKEY' if bit(typ, 8) then t = t', SYSCOMMAND' if bit(typ, 7) then t = t', HELP' call emit ' 'left(key',',8)left(cmd',',8)substr(t,3)nl end /* do */ return ''htout This function returns a string containing the HELPTABLE definition. htout: /* extract helptable definitions */ r = nl i = readw() do while i \= 0 r = r' HELPITEM 'i', 'readw() call skip 2 r = r', 'readw()nl; cb = cb-8 i = readw() end /* do */ cb = cb-2 return rhstout This function returns a string containing the HELPSUBTABLE definition. hstout: /* extract helpsubtable definitions */ sis = readw() if sis \= 2 then r = nl'SUBITEMSIZE 'sis; else r = '' r = r||nl'BEGIN'nl; cb = cb-2 i = readw() do while i \= 0 r = r||' HELPSUBITEM 'i do sis-1; r = r', 'readw(); end cb = cb-2*sis; r = r||nl i = readw(); end /* do */ cb = cb-2 return r'END'itemout This functions emits the current menu item. itemout: /* extract menu item definition */ procedure expose nl cb infile outfile cb = cb-6; s = ''; a = ''; r = arg(1)'MENUITEM "'; x = '| MIS_'; y = '| MIA_' sty = readw() att = readw() iid = readw() if \ (bit(sty,13) | bit(sty,14)) then do c = charin(infile); cb = cb-1 if c = 'FF'x & bit(sty,15) then do; r = r'#'readw(); cb = cb-2; end else do while c \= '00'x; r = r||c; c = charin(infile); cb = cb-1; end end if bit(sty,15) then s = s x'BITMAP' if bit(sty,14) then s = s x'SEPARATOR' if bit(sty,13) then s = s x'OWNERDRAW' if bit(sty,12) then s = s x'SUBMENU' if bit(sty,11) then s = s x'MULTMENU' if bit(sty,10) then s = s x'SYSCOMMAND' if bit(sty, 9) then s = s x'HELP' if bit(sty, 8) then s = s x'STATIC' if bit(sty, 7) then s = s x'BUTTONSEPARATOR' if bit(sty, 6) then s = s x'BREAK' if bit(sty, 5) then s = s x'BREAKSEPARATOR' if bit(sty, 4) then s = s x'GROUP' if bit(sty, 3) then s = s x'SINGLE' if bit(att,11) then a = a y'NODISMISS' if bit(att, 4) then a = a y'FRAMED' if bit(att, 3) then a = a y'CHECKED' if bit(att, 2) then a = a y'DISABLED' if bit(att, 1) then a = a y'HILITED' if a \= '' then a = ','substr(a,3) if s \= '' then s = ','substr(s,3); else if a \= '' then s = ',' call emit r'", 'iid||s||a||nl if bit(sty,12) then do; call emit arg(1)'BEGIN'nl; call emit menuout(arg(1)' ','')arg(1)'END'nl; end returnmenuout This functions emit the current menu or submenu. menuout: /* extract menus definitions */ procedure expose nl cb infile outfile cb = cb-10; cbs = readw() typ = readw() cp = readw() off = readw() cnt = readw() if arg(2) \= '' then do if cp \= 850 then call emit 'CODEPAGE 'cp||nl call emit arg(2) end /* do */ do cnt; call itemout arg(1); end return '' |