Sweden-Number/tools/specmaker/msmangle.c

597 lines
18 KiB
C

/*
* Demangle VC++ symbols into C function prototypes
*
* Copyright 2000 Jon Griffiths
*/
#include "specmaker.h"
/* Type for parsing mangled types */
typedef struct _compound_type
{
char dest_type;
int flags;
int have_qualifiers;
char *expression;
} compound_type;
/* Initialise a compound type structure */
#define INIT_CT(ct) do { memset (&ct, 0, sizeof (ct)); } while (0)
/* free the memory used by a compound structure */
#define FREE_CT(ct) do { if (ct.expression) free (ct.expression); } while (0)
/* Internal functions */
static char *demangle_datatype (char **str, compound_type *ct,
parsed_symbol* sym);
static char *get_constraints_convention_1 (char **str, compound_type *ct);
static char *get_constraints_convention_2 (char **str, compound_type *ct);
static char *get_type_string (const char c, const int constraints);
static int get_type_constant (const char c, const int constraints);
static char *get_pointer_type_string (compound_type *ct,
const char *expression);
/*******************************************************************
* demangle_symbol
*
* Demangle a C++ linker symbol into a C prototype
*/
int symbol_demangle (parsed_symbol *sym)
{
compound_type ct;
int is_static = 0, is_const = 0;
char *function_name = NULL;
char *class_name = NULL;
char *name;
static unsigned int hash = 0; /* In case of overloaded functions */
assert (globals.do_code);
assert (sym && sym->symbol);
hash++;
/* MS mangled names always begin with '?' */
name = sym->symbol;
if (*name++ != '?')
return -1;
if (VERBOSE)
puts ("Attempting to demangle symbol");
/* Then function name or operator code */
if (*name == '?')
{
/* C++ operator code (one character, or two if the first is '_') */
switch (*++name)
{
case '0': function_name = strdup ("ctor"); break;
case '1': function_name = strdup ("dtor"); break;
case '2': function_name = strdup ("operator_new"); break;
case '3': function_name = strdup ("operator_delete"); break;
case '4': function_name = strdup ("operator_equals"); break;
case '5': function_name = strdup ("operator_shiftright"); break;
case '6': function_name = strdup ("operator_shiftleft"); break;
case '7': function_name = strdup ("operator_not"); break;
case '8': function_name = strdup ("operator_equalsequals"); break;
case '9': function_name = strdup ("operator_notequals"); break;
case 'A': function_name = strdup ("operator_array"); break;
case 'C': function_name = strdup ("operator_dereference"); break;
case 'D': function_name = strdup ("operator_multiply"); break;
case 'E': function_name = strdup ("operator_plusplus"); break;
case 'F': function_name = strdup ("operator_minusminus"); break;
case 'G': function_name = strdup ("operator_minus"); break;
case 'H': function_name = strdup ("operator_plus"); break;
case 'I': function_name = strdup ("operator_address"); break;
case 'J': function_name = strdup ("operator_dereferencememberptr"); break;
case 'K': function_name = strdup ("operator_divide"); break;
case 'L': function_name = strdup ("operator_modulo"); break;
case 'M': function_name = strdup ("operator_lessthan"); break;
case 'N': function_name = strdup ("operator_lessthanequal"); break;
case 'O': function_name = strdup ("operator_greaterthan"); break;
case 'P': function_name = strdup ("operator_greaterthanequal"); break;
case 'R': function_name = strdup ("operator_functioncall"); break;
case 'S': function_name = strdup ("operator_compliment"); break;
case 'T': function_name = strdup ("operator_xor"); break;
case 'U': function_name = strdup ("operator_logicalor"); break;
case 'V': function_name = strdup ("operator_logicaland"); break;
case 'W': function_name = strdup ("operator_or"); break;
case 'X': function_name = strdup ("operator_multiplyequals"); break;
case 'Y': function_name = strdup ("operator_plusequals"); break;
case 'Z': function_name = strdup ("operator_minusequals"); break;
case '_':
switch (*++name)
{
case '0': function_name = strdup ("operator_divideequals"); break;
case '1': function_name = strdup ("operator_moduloequals"); break;
case '2': function_name = strdup ("operator_shiftrightequals"); break;
case '3': function_name = strdup ("operator_shiftleftequals"); break;
case '4': function_name = strdup ("operator_andequals"); break;
case '5': function_name = strdup ("operator_orequals"); break;
case '6': function_name = strdup ("operator_xorequals"); break;
/* FIXME: These look like static vtable/rtti information ? */
case 'E': function_name = strdup ("_unknown_E"); break;
case 'G': function_name = strdup ("_unknown_G"); break;
default:
return -1;
}
break;
default:
/* FIXME: Other operators */
return -1;
}
name++;
}
else
{
/* Type or function name terminated by '@' */
function_name = name;
while (*name && *name++ != '@') ;
if (!*name)
return -1;
function_name = str_substring (function_name, name - 1);
}
/* Either a class name, or '@' if the symbol is not a class member */
if (*name == '@')
{
class_name = strdup ("global"); /* Non member function (or a datatype) */
name++;
}
else
{
/* Class the function is associated with, terminated by '@@' */
class_name = name;
while (*name && *name++ != '@') ;
if (*name++ != '@')
return -1;
class_name = str_substring (class_name, name - 2);
}
/* Note: This is guesswork on my part, but it seems to work:
* 'Q' Means the function is passed an implicit 'this' pointer.
* 'S' Means static member function, i.e. no implicit 'this' pointer.
* 'Y' Is used for datatypes and functions, so there is no 'this' pointer.
* This character also implies some other things:
* 'Y','S' = The character after the calling convention is always the
* start of the return type code.
* 'Q' Character after the calling convention is 'const'ness code
* (only non static member functions can be const).
* 'U' also occurs, it seems to behave like Q, but probably implies
* something else.
*/
switch(*name++)
{
case 'U' :
case 'Q' :
/* Implicit 'this' pointer */
sym->arg_text [sym->argc] = str_create (3, "struct ", class_name, " *");
sym->arg_type [sym->argc] = ARG_POINTER;
sym->arg_flag [sym->argc] = 0;
sym->arg_name [sym->argc++] = strdup ("_this");
/* New struct definitions can be 'grep'ed out for making a fixup header */
if (VERBOSE)
printf ("struct %s { int _FIXME; };\n", class_name);
break;
case 'S' :
is_static = 1;
break;
case 'Y' :
break;
default:
return -1;
}
/* Next is the calling convention */
switch (*name++)
{
case 'A':
sym->calling_convention = strdup ("__cdecl");
break;
case 'B': /* FIXME: Something to do with __declspec(dllexport)? */
case 'I': /* __fastcall */
case 'G':
sym->calling_convention = strdup ("__stdcall");
break;
default:
return -1;
}
/* If the symbol is associated with a class, its 'const' status follows */
if (sym->argc)
{
if (*name == 'B')
is_const = 1;
else if (*name != 'E')
return -1;
name++;
}
/* Return type, or @ if 'void' */
if (*name == '@')
{
sym->return_text = strdup ("void");
sym->return_type = ARG_VOID;
name++;
}
else
{
INIT_CT (ct);
if (!demangle_datatype (&name, &ct, sym))
return -1;
sym->return_text = ct.expression;
sym->return_type = get_type_constant(ct.dest_type, ct.flags);
ct.expression = NULL;
FREE_CT (ct);
}
/* Now come the function arguments */
while (*name && *name != 'Z')
{
/* Decode each data type and append it to the argument list */
if (*name != '@')
{
INIT_CT (ct);
if (!demangle_datatype(&name, &ct, sym))
return -1;
if (strcmp (ct.expression, "void"))
{
sym->arg_text [sym->argc] = ct.expression;
ct.expression = NULL;
sym->arg_type [sym->argc] = get_type_constant (ct.dest_type, ct.flags);
sym->arg_flag [sym->argc] = ct.flags;
sym->arg_name[sym->argc] = str_create_num (1, sym->argc, "arg");
sym->argc++;
}
else
break; /* 'void' terminates an argument list */
FREE_CT (ct);
}
else
name++;
}
while (*name == '@')
name++;
/* Functions are always terminated by 'Z'. If we made it this far and
* Don't find it, we have incorrectly identified a data type.
*/
if (*name != 'Z')
return -1;
/* Note: '()' after 'Z' means 'throws', but we don't care here */
/* Create the function name. Include a unique number because otherwise
* overloaded functions could have the same c signature.
*/
sym->function_name = str_create_num (4, hash, class_name, "_",
function_name, is_static ? "_static" : is_const ? "_const" : "_");
assert (sym->return_text);
assert (sym->calling_convention);
assert (sym->function_name);
free (class_name);
free (function_name);
if (VERBOSE)
puts ("Demangled symbol OK");
return 0;
}
/*******************************************************************
* demangle_datatype
*
* Attempt to demangle a C++ data type, which may be compound.
* a compound type is made up of a number of simple types. e.g:
* char** = (pointer to (pointer to (char)))
*
* Uses a simple recursive descent algorithm that is broken
* and/or incomplete, without a doubt ;-)
*/
static char *demangle_datatype (char **str, compound_type *ct,
parsed_symbol* sym)
{
char *iter;
assert (str && *str);
assert (ct);
iter = *str;
if (!get_constraints_convention_1 (&iter, ct))
return NULL;
switch (*iter)
{
case '_':
if (*++iter != 'N') /* _N = bool */
return NULL;
iter++;
ct->dest_type = 'I'; /* treat as int */
if (!get_constraints_convention_2 (&iter, ct))
return NULL;
ct->expression = get_type_string (ct->dest_type, ct->flags);
break;
case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'M':
case 'N': case 'O': case 'X': case 'Z':
/* Simple data types */
ct->dest_type = *iter++;
if (!get_constraints_convention_2 (&iter, ct))
return NULL;
ct->expression = get_type_string (ct->dest_type, ct->flags);
break;
case 'U':
case 'V':
/* Class/struct/union */
ct->dest_type = *iter++;
if (*iter == '0' || *iter == '1')
{
/* Referring to class type (implicit 'this') */
char *stripped;
if (!sym->argc)
return NULL;
iter++;
/* Apply our constraints to the base type (struct xxx *) */
stripped = strdup (sym->arg_text [0]);
if (!stripped)
fatal ("Out of Memory");
/* If we're a reference, re-use the pointer already in the type */
if (!ct->flags & CT_BY_REFERENCE)
stripped[ strlen (stripped) - 2] = '\0'; /* otherwise, strip it */
ct->expression = str_create (2, ct->flags & CT_CONST ? "const " :
ct->flags & CT_VOLATILE ? "volatile " : "", stripped);
free (stripped);
}
else if (*iter == '_')
{
/* The name of the class/struct, followed by '@@' */
char *struct_name = ++iter;
while (*iter && *iter++ != '@') ;
if (*iter++ != '@')
return NULL;
struct_name = str_substring (struct_name, iter - 2);
ct->expression = str_create (4, ct->flags & CT_CONST ? "const " :
ct->flags & CT_VOLATILE ? "volatile " : "", "struct ",
struct_name, ct->flags & CT_BY_REFERENCE ? " *" : "");
free (struct_name);
}
break;
case 'Q': /* FIXME: Array Just treated as pointer currently */
case 'P': /* Pointer */
{
compound_type sub_ct;
INIT_CT (sub_ct);
ct->dest_type = *iter++;
if (!get_constraints_convention_2 (&iter, ct))
return NULL;
/* FIXME: P6 = Function pointer, others who knows.. */
if (isdigit (*iter))
return NULL;
/* Recurse to get the pointed-to type */
if (!demangle_datatype (&iter, &sub_ct, sym))
return NULL;
ct->expression = get_pointer_type_string (ct, sub_ct.expression);
FREE_CT (sub_ct);
}
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
/* Referring back to previously parsed type */
if (sym->argc >= (size_t)('0' - *iter))
return NULL;
ct->dest_type = sym->arg_type ['0' - *iter];
ct->expression = strdup (sym->arg_text ['0' - *iter]);
iter++;
break;
default :
return NULL;
}
if (!ct->expression)
return NULL;
return (char *)(*str = iter);
}
/* Constraints:
* There are two conventions for specifying data type constaints. I
* don't know how the compiler chooses between them, but I suspect it
* is based on ensuring that linker names are unique.
* Convention 1. The data type modifier is given first, followed
* by the data type it operates on. '?' means passed by value,
* 'A' means passed by reference. Note neither of these characters
* is a valid base data type. This is then followed by a character
* specifying constness or volatilty.
* Convention 2. The base data type (which is never '?' or 'A') is
* given first. The character modifier is optionally given after
* the base type character. If a valid character mofifier is present,
* then it only applies to the current data type if the character
* after that is not 'A' 'B' or 'C' (Because this makes a convention 1
* constraint for the next data type).
*
* The conventions are usually mixed within the same symbol.
* Since 'C' is both a qualifier and a data type, I suspect that
* convention 1 allows specifying e.g. 'volatile signed char*'. In
* convention 2 this would be 'CC' which is ambigious (i.e. Is it two
* pointers, or a single pointer + modifier?). In convention 1 it
* is encoded as '?CC' which is not ambigious. This probably
* holds true for some other types as well.
*/
/*******************************************************************
* get_constraints_convention_1
*
* Get type constraint information for a data type
*/
static char *get_constraints_convention_1 (char **str, compound_type *ct)
{
char *iter = *str, **retval = str;
if (ct->have_qualifiers)
return (char *)*str; /* Previously got constraints for this type */
if (*iter == '?' || *iter == 'A')
{
ct->have_qualifiers = 1;
ct->flags |= (*iter++ == '?' ? 0 : CT_BY_REFERENCE);
switch (*iter++)
{
case 'A' :
break; /* non-const, non-volatile */
case 'B' :
ct->flags |= CT_CONST;
break;
case 'C' :
ct->flags |= CT_VOLATILE;
break;
default :
return NULL;
}
}
return (char *)(*retval = iter);
}
/*******************************************************************
* get_constraints_convention_2
*
* Get type constraint information for a data type
*/
static char *get_constraints_convention_2 (char **str, compound_type *ct)
{
char *iter = *str, **retval = str;
/* FIXME: Why do arrays have both convention 1 & 2 constraints? */
if (ct->have_qualifiers && ct->dest_type != 'Q')
return (char *)*str; /* Previously got constraints for this type */
ct->have_qualifiers = 1; /* Even if none, we've got all we're getting */
switch (*iter)
{
case 'A' :
if (iter[1] != 'A' && iter[1] != 'B' && iter[1] != 'C')
iter++;
break;
case 'B' :
ct->flags |= CT_CONST;
iter++;
break;
case 'C' :
/* See note above, if we find 'C' it is _not_ a signed char */
ct->flags |= CT_VOLATILE;
iter++;
break;
}
return (char *)(*retval = iter);
}
/*******************************************************************
* get_type_string
*
* Return a string containing the name of a data type
*/
static char *get_type_string (const char c, const int constraints)
{
char *type_string;
switch (c)
{
case 'C': /* Signed char, fall through */
case 'D': type_string = "char"; break;
case 'E': type_string = "unsigned char"; break;
case 'F': type_string = "short int"; break;
case 'G': type_string = "unsigned short int"; break;
case 'H': type_string = "int"; break;
case 'I': type_string = "unsigned int"; break;
case 'J': type_string = "long"; break;
case 'K': type_string = "unsigned long"; break;
case 'M': type_string = "float"; break;
case 'N': type_string = "double"; break;
case 'O': type_string = "long double"; break;
case 'U':
case 'V': type_string = "struct"; break;
case 'X': return strdup ("void");
case 'Z': return strdup ("...");
default:
return NULL;
}
return str_create (3, constraints & CT_CONST ? "const " :
constraints & CT_VOLATILE ? "volatile " : "", type_string,
constraints & CT_BY_REFERENCE ? " *" : "");
}
/*******************************************************************
* get_type_constant
*
* Get the ARG_* constant for this data type
*/
static int get_type_constant (const char c, const int constraints)
{
/* Any reference type is really a pointer */
if (constraints & CT_BY_REFERENCE)
return ARG_POINTER;
switch (c)
{
case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
case 'J': case 'K':
return ARG_LONG;
case 'M':
return -1; /* FIXME */
case 'N': case 'O':
return ARG_DOUBLE;
case 'P': case 'Q':
return ARG_POINTER;
case 'U': case 'V':
return ARG_STRUCT;
case 'X':
return ARG_VOID;
case 'Z':
default:
return -1;
}
}
/*******************************************************************
* get_pointer_type_string
*
* Return a string containing 'pointer to expression'
*/
static char *get_pointer_type_string (compound_type *ct,
const char *expression)
{
/* FIXME: set a compound flag for bracketing expression if needed */
return str_create (3, ct->flags & CT_CONST ? "const " :
ct->flags & CT_VOLATILE ? "volatile " : "", expression,
ct->flags & CT_BY_REFERENCE ? " **" : " *");
}