Sweden-Number/tools/winapi/winapi_parser.pm

668 lines
18 KiB
Perl
Raw Normal View History

#
# Copyright 1999, 2000, 2001 Patrik Stridvall
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
package winapi_parser;
use strict;
use warnings 'all';
use output qw($output);
use options qw($options);
2001-07-11 19:27:45 +02:00
# Defined a couple common regexp tidbits
my $CALL_CONVENTION="__cdecl|__stdcall|" .
"__RPC_API|__RPC_STUB|__RPC_USER|RPC_ENTRY|" .
"RPC_VAR_ENTRY|STDMETHODCALLTYPE|NET_API_FUNCTION|" .
"CALLBACK|CDECL|NTAPI|PASCAL|APIENTRY|" .
"SEC_ENTRY|VFWAPI|VFWAPIV|WINGDIPAPI|WMIAPI|WINAPI|WINAPIV|";
sub parse_c_file($$) {
my $file = shift;
my $callbacks = shift;
my $empty_callback = sub { };
my $c_comment_found_callback = $$callbacks{c_comment_found} || $empty_callback;
my $cplusplus_comment_found_callback = $$callbacks{cplusplus_comment_found} || $empty_callback;
my $function_create_callback = $$callbacks{function_create} || $empty_callback;
my $function_found_callback = $$callbacks{function_found} || $empty_callback;
my $type_create_callback = $$callbacks{type_create} || $empty_callback;
my $type_found_callback = $$callbacks{type_found} || $empty_callback;
my $preprocessor_found_callback = $$callbacks{preprocessor_found} || $empty_callback;
# global
my $debug_channels = [];
my $in_function = 0;
my $function_begin;
my $function_end;
{
my $documentation_line;
my $documentation;
my $function_line;
my $linkage;
my $return_type;
my $calling_convention;
my $internal_name = "";
my $argument_types;
my $argument_names;
my $argument_documentations;
my $statements_line;
my $statements;
2002-06-01 04:55:48 +02:00
$function_begin = sub {
$documentation_line = shift;
$documentation = shift;
$function_line = shift;
$linkage = shift;
$return_type= shift;
$calling_convention = shift;
$internal_name = shift;
$argument_types = shift;
$argument_names = shift;
$argument_documentations = shift;
2002-06-01 04:55:48 +02:00
if(defined($argument_names) && defined($argument_types) &&
$#$argument_names == -1)
{
foreach my $n (0..$#$argument_types) {
push @$argument_names, "";
}
}
2002-06-01 04:55:48 +02:00
if(defined($argument_documentations) &&
$#$argument_documentations == -1)
{
foreach my $n (0..$#$argument_documentations) {
push @$argument_documentations, "";
}
}
2002-06-01 04:55:48 +02:00
$in_function = 1;
};
$function_end = sub {
$statements_line = shift;
$statements = shift;
my $function = &$function_create_callback();
2002-06-01 04:55:48 +02:00
if(!defined($documentation_line)) {
$documentation_line = 0;
}
2002-06-01 04:55:48 +02:00
$function->file($file);
$function->debug_channels([@$debug_channels]);
$function->documentation_line($documentation_line);
$function->documentation($documentation);
$function->function_line($function_line);
$function->linkage($linkage);
2002-06-01 04:55:48 +02:00
$function->return_type($return_type);
$function->calling_convention($calling_convention);
$function->internal_name($internal_name);
if(defined($argument_types)) {
$function->argument_types([@$argument_types]);
}
if(defined($argument_names)) {
$function->argument_names([@$argument_names]);
}
if(defined($argument_documentations)) {
$function->argument_documentations([@$argument_documentations]);
}
$function->statements_line($statements_line);
$function->statements($statements);
2002-06-01 04:55:48 +02:00
&$function_found_callback($function);
$in_function = 0;
};
}
2001-07-11 19:27:45 +02:00
my $in_type = 0;
my $type_begin;
my $type_end;
{
my $type;
2001-07-14 02:48:41 +02:00
$type_begin = sub {
$type = shift;
$in_type = 1;
};
$type_end = sub {
my $names = shift;
foreach my $name (@$names) {
if($type =~ /^(?:enum|interface|struct|union)/) {
# $output->write("typedef $type {\n");
# $output->write("} $name;\n");
} else {
# $output->write("typedef $type $name;\n");
}
}
$in_type = 0;
};
}
2001-07-11 19:27:45 +02:00
1999-12-12 00:01:45 +01:00
my %regs_entrypoints;
2001-07-02 03:21:48 +02:00
my @comment_lines = ();
my @comments = ();
my $statements_line;
my $statements;
my $level = 0;
my $extern_c = 0;
my $again = 0;
my $lookahead = 0;
my $lookahead_count = 0;
print STDERR "Processing file '$file' ... " if $options->verbose;
open(IN, "< $file") || die "<internal>: $file: $!\n";
2002-10-02 03:23:55 +02:00
local $_ = "";
readmore: while($again || defined(my $line = <IN>)) {
2002-10-02 03:23:55 +02:00
$_ = "" if !defined($_);
if(!$again) {
chomp $line;
if($lookahead) {
$lookahead = 0;
$_ .= "\n" . $line;
2001-07-02 03:21:48 +02:00
$lookahead_count++;
} else {
$_ = $line;
$lookahead_count = 0;
}
2002-10-02 03:23:55 +02:00
$output->write(" $level($lookahead_count): $line\n") if $options->debug >= 2;
$output->write("*** $_\n") if $options->debug >= 3;
} else {
$lookahead_count = 0;
$again = 0;
}
2001-07-11 19:27:45 +02:00
# CVS merge conflicts in file?
if(/^(<<<<<<<|=======|>>>>>>>)/) {
$output->write("$file: merge conflicts in file\n");
last;
}
2002-06-01 04:55:48 +02:00
my $prefix="";
while ($_ ne "")
{
if (s/^([^\"\/]+|\"(?:[^\\\"]*|\\.)*\")//)
{
$prefix.=$1;
}
elsif (/^\/\*/)
{
# remove C comments
if(s/^(\/\*.*?\*\/)//s) {
my @lines = split(/\n/, $1);
push @comment_lines, $.;
push @comments, $1;
&$c_comment_found_callback($. - $#lines, $., $1);
if($#lines <= 0) {
$_ = "$prefix $_";
} else {
$_ = $prefix . ("\n" x $#lines) . $_;
}
$again = 1;
} else {
$_ = "$prefix$_";
$lookahead = 1;
}
next readmore;
}
elsif (s/^(\/\/.*)$//)
{
# remove C++ comments
&$cplusplus_comment_found_callback($., $1);
$again = 1;
}
elsif (s/^(.)//)
{
$prefix.=$1;
}
}
$_=$prefix;
2000-05-18 02:07:31 +02:00
# remove preprocessor directives
if(s/^\s*\#/\#/s) {
if (/^#\s*if\s+0\s*$/ms) {
# Skip #if 0 ... #endif sections entirely.
# They are typically used as 'super comments' and may not
# contain C code. This totally ignores nesting.
if(s/^(\s*#\s*if\s+0\s*\n.*?\n\s*#\s*endif\s*)\n//s) {
my @lines = split(/\n/, $1);
$_ = "\n" x $#lines;
&$preprocessor_found_callback("if", "0");
$again = 1;
} else {
$lookahead = 1;
}
next readmore;
}
elsif(/^(\#.*?)\\$/s) {
2003-08-08 23:04:17 +02:00
$_ = "$1\n";
$lookahead = 1;
next;
} elsif(s/^\#\s*(\w+)((?:\s+(.*?))?\s*)$//s) {
my @lines = split(/\n/, $2);
if($#lines > 0) {
$_ = "\n" x $#lines;
}
if(defined($3)) {
&$preprocessor_found_callback($1, $3);
} else {
&$preprocessor_found_callback($1, "");
}
$again = 1;
next;
}
}
# Remove extern "C"
2002-06-01 04:55:48 +02:00
if(s/^\s*extern\s+"C"\s+\{//m) {
$extern_c = 1;
$again = 1;
2002-06-01 04:55:48 +02:00
next;
}
2001-07-02 03:21:48 +02:00
my $documentation_line;
my $documentation;
my @argument_documentations = ();
{
my $n = $#comments;
while($n >= 0 && ($comments[$n] !~ /^\/\*\*/ ||
2002-06-01 04:55:48 +02:00
$comments[$n] =~ /^\/\*\*+\/$/))
{
$n--;
}
if(defined($comments[$n]) && $n >= 0) {
2001-07-02 03:21:48 +02:00
my @lines = split(/\n/, $comments[$n]);
$documentation_line = $comment_lines[$n] - scalar(@lines) + 1;
$documentation = $comments[$n];
2001-07-02 03:21:48 +02:00
for(my $m=$n+1; $m <= $#comments; $m++) {
if($comments[$m] =~ /^\/\*\*+\/$/ ||
$comments[$m] =~ /^\/\*\s*(?:\!)?defined/) # FIXME: Kludge
{
@argument_documentations = ();
next;
}
push @argument_documentations, $comments[$m];
}
} else {
$documentation = "";
}
}
if($level > 0)
{
my $line = "";
while(/^[^\{\}]/) {
s/^([^\{\}\'\"]*)//s;
$line .= $1;
if(s/^\'//) {
$line .= "\'";
while(/^./ && !s/^\'//) {
s/^([^\'\\]*)//s;
$line .= $1;
if(s/^\\//) {
$line .= "\\";
if(s/^(.)//s) {
$line .= $1;
if($1 eq "0") {
s/^(\d{0,3})//s;
$line .= $1;
}
}
}
}
$line .= "\'";
} elsif(s/^\"//) {
$line .= "\"";
while(/^./ && !s/^\"//) {
s/^([^\"\\]*)//s;
$line .= $1;
if(s/^\\//) {
$line .= "\\";
if(s/^(.)//s) {
$line .= $1;
if($1 eq "0") {
s/^(\d{0,3})//s;
$line .= $1;
}
}
}
}
$line .= "\"";
}
}
if(s/^\{//) {
$_ = $'; $again = 1;
$line .= "{";
print "+1: \{$_\n" if $options->debug >= 2;
$level++;
$statements .= $line;
} elsif(s/^\}//) {
$_ = $'; $again = 1;
$line .= "}" if $level > 1;
2002-06-01 04:55:48 +02:00
print "-1: \}$_\n" if $options->debug >= 2;
$level--;
if($level == -1 && $extern_c) {
$extern_c = 0;
$level = 0;
}
$statements .= $line;
} else {
$statements .= "$line\n";
}
if($level == 0) {
if($in_function) {
&$function_end($statements_line, $statements);
$statements = undef;
} elsif($in_type) {
if(/^\s*((?:(?:FAR\s*)?\*\s*(?:RESTRICTED_POINTER\s+)?)?
(?:volatile\s+)?
(?:\w+|WS\(\w+\))\s*
(?:\s*,\s*(?:(?:FAR\s*)?\*+\s*(?:RESTRICTED_POINTER\s+)?)?(?:volatile\s+)?(?:\w+|WS\(\w+\)))*\s*);/sx) {
my @parts = split(/\s*,\s*/, $1);
&$type_end([@parts]);
} elsif(/;/s) {
die "$file: $.: syntax error: '$_'\n";
} else {
$lookahead = 1;
}
}
}
2001-07-18 22:09:12 +02:00
next;
} elsif(/(extern\s+|static\s+)?((interface\s+|struct\s+|union\s+|enum\s+|signed\s+|unsigned\s+)?\w+((\s*\*)+\s*|\s+))
(($CALL_CONVENTION)\s+)?
(?:DECLSPEC_HOTPATCH\s+)?
2004-05-17 22:50:24 +02:00
(\w+(\(\w+\))?)\s*\((.*?)\)\s*(\{|\;)/sx)
{
2001-07-02 03:21:48 +02:00
my @lines = split(/\n/, $&);
my $function_line = $. - scalar(@lines) + 1;
$_ = $'; $again = 1;
2001-07-02 03:21:48 +02:00
if($11 eq "{") {
$level++;
}
my $linkage = $1;
my $return_type = $2;
my $calling_convention = $7;
my $name = $8;
my $arguments = $10;
if(!defined($linkage)) {
$linkage = "";
}
if(!defined($calling_convention)) {
$calling_convention = "";
}
$linkage =~ s/\s*$//;
$return_type =~ s/\s*$//;
$return_type =~ s/\s*\*\s*/*/g;
$return_type =~ s/(\*+)/ $1/g;
1999-12-12 00:01:45 +01:00
if($regs_entrypoints{$name}) {
$name = $regs_entrypoints{$name};
2002-06-01 04:55:48 +02:00
}
$arguments =~ y/\t\n/ /;
$arguments =~ s/^\s*(.*?)\s*$/$1/;
if($arguments eq "") { $arguments = "..." }
my @argument_types;
my @argument_names;
2004-05-17 22:50:24 +02:00
my @arguments;
my $n = 0;
while ($arguments =~ s/^((?:[^,\(\)]*|(?:\([^\)]*\))?)+)(?:,|$)// && $1) {
my $argument = $1;
push @arguments, $argument;
my $argument_type = "";
my $argument_name = "";
2004-05-17 22:50:24 +02:00
$argument =~ s/^\s*(.*?)\s*$/$1/;
# print " " . ($n + 1) . ": '$argument'\n";
$argument =~ s/^(?:IN OUT|IN|OUT)?\s+//;
2007-06-21 11:15:26 +02:00
$argument =~ s/^(?:const|CONST|GDIPCONST|volatile)?\s+//;
if($argument =~ /^\.\.\.$/) {
$argument_type = "...";
$argument_name = "...";
} elsif($argument =~ /^
((?:interface\s+|struct\s+|union\s+|enum\s+|register\s+|(?:signed\s+|unsigned\s+)?
(?:short\s+(?=int)|long\s+(?=int))?)?(?:\w+|ElfW\(\w+\)|WS\(\w+\)))\s*
2007-06-21 11:15:26 +02:00
((?:__RPC_FAR|const|CONST|GDIPCONST|volatile)?\s*(?:\*\s*(?:__RPC_FAR|const|CONST|volatile)?\s*?)*)\s*
(\w*)\s*(\[\])?(?:\s+OPTIONAL)?$/x)
{
2004-05-17 22:50:24 +02:00
$argument_type = $1;
if ($2) {
$argument_type .= " $2";
}
if ($4) {
$argument_type .= "$4";
}
$argument_name = $3;
2004-05-17 22:50:24 +02:00
} elsif ($argument =~ /^
((?:interface\s+|struct\s+|union\s+|enum\s+|register\s+|(?:signed\s+|unsigned\s+)?
2004-05-17 22:50:24 +02:00
(?:short\s+(?=int)|long\s+(?=int))?)?\w+)\s*
((?:const|volatile)?\s*(?:\*\s*(?:const|volatile)?\s*?)*)\s*
(?:(?:$CALL_CONVENTION)\s+)?
\(\s*(?:$CALL_CONVENTION)?\s*\*\s*((?:\w+)?)\s*\)\s*
2004-05-17 22:50:24 +02:00
\(\s*(.*?)\s*\)$/x)
{
my $return_type = $1;
if($2) {
$return_type .= " $2";
}
$argument_name = $3;
my $arguments = $4;
$return_type =~ s/\s+/ /g;
$arguments =~ s/\s*,\s*/,/g;
$argument_type = "$return_type (*)($arguments)";
} elsif ($argument =~ /^
((?:interface\s+|struct\s+|union\s+|enum\s+|register\s+|(?:signed\s+|unsigned\s+)
2004-05-17 22:50:24 +02:00
(?:short\s+(?=int)|long\s+(?=int))?)?\w+)\s*
((?:const|volatile)?\s*(?:\*\s*(?:const|volatile)?\s*?)*)\s*
2004-05-17 22:50:24 +02:00
(\w+)\s*\[\s*(.*?)\s*\](?:\[\s*(.*?)\s*\])?$/x)
{
my $return_type = $1;
if($2) {
$return_type .= " $2";
}
$argument_name = $3;
2004-05-17 22:50:24 +02:00
$argument_type = "$return_type\[$4\]";
if (defined($5)) {
$argument_type .= "\[$5\]";
}
2004-05-17 22:50:24 +02:00
# die "$file: $.: syntax error: '$argument_type':'$argument_name'\n";
} else {
# This is either a complex argument type, typically
# involving parentheses, or a macro argument. This is rare
# so just ignore the 'function' declaration.
print STDERR "$file: $.: cannot parse declaration argument (ignoring): '$argument'\n";
next readmore;
}
2004-05-17 22:50:24 +02:00
$argument_type =~ s/\s*(?:const|volatile)\s*/ /g; # Remove const/volatile
2004-05-17 22:50:24 +02:00
$argument_type =~ s/([^\*\(\s])\*/$1 \*/g; # Assure whitespace between non-* and *
$argument_type =~ s/,([^\s])/, $1/g; # Assure whitespace after ,
$argument_type =~ s/\*\s+\*/\*\*/g; # Remove whitespace between * and *
$argument_type =~ s/([\(\[])\s+/$1/g; # Remove whitespace after ( and [
$argument_type =~ s/\s+([\)\]])/$1/g; # Remove whitespace before ] and )
$argument_type =~ s/\s+/ /; # Remove multiple whitespace
$argument_type =~ s/^\s*(.*?)\s*$/$1/; # Remove leading and trailing whitespace
$argument_name =~ s/^\s*(.*?)\s*$/$1/; # Remove leading and trailing whitespace
$argument_types[$n] = $argument_type;
$argument_names[$n] = $argument_name;
# print " " . ($n + 1) . ": '" . $argument_types[$n] . "', '" . $argument_names[$n] . "'\n";
2004-05-17 22:50:24 +02:00
$n++;
}
if($#argument_types == 0 && $argument_types[0] =~ /^void$/i) {
$#argument_types = -1;
2002-06-01 04:55:48 +02:00
$#argument_names = -1;
}
if($options->debug) {
print "$file: $return_type $calling_convention $name(" . join(",", @arguments) . ")\n";
}
2001-07-11 19:27:45 +02:00
&$function_begin($documentation_line, $documentation,
$function_line, $linkage, $return_type, $calling_convention, $name,
\@argument_types,\@argument_names,\@argument_documentations);
if($level == 0) {
&$function_end(undef, undef);
}
$statements_line = $.;
2001-07-30 20:49:10 +02:00
$statements = "";
} elsif(/__ASM_GLOBAL_FUNC\(\s*(.*?)\s*,/s) {
my @lines = split(/\n/, $&);
my $function_line = $. - scalar(@lines) + 1;
$_ = $'; $again = 1;
2001-07-11 19:27:45 +02:00
&$function_begin($documentation_line, $documentation,
$function_line, "", "void", "__asm", $1);
&$function_end($., "");
2004-02-20 20:57:42 +01:00
} elsif(/DEFINE_THISCALL_WRAPPER\((\S*)\)/s) {
my @lines = split(/\n/, $&);
my $function_line = $. - scalar(@lines) + 1;
$_ = $'; $again = 1;
&$function_begin($documentation_line, $documentation,
$function_line, "", "void", "", "__thiscall_" . $1, \());
&$function_end($function_line, "");
1999-12-12 00:01:45 +01:00
} elsif(/DEFINE_REGS_ENTRYPOINT_\d+\(\s*(\S*)\s*,\s*([^\s,\)]*).*?\)/s) {
$_ = $'; $again = 1;
$regs_entrypoints{$2} = $1;
} elsif(/DEFAULT_DEBUG_CHANNEL\s*\((\S+)\)/s) {
$_ = $'; $again = 1;
unshift @$debug_channels, $1;
} elsif(/(DEFAULT|DECLARE)_DEBUG_CHANNEL\s*\((\S+)\)/s) {
$_ = $'; $again = 1;
push @$debug_channels, $1;
} elsif(/typedef\s+(enum|interface|struct|union)(?:\s+DECLSPEC_ALIGN\(\d+\))?(?:\s+(\w+))?\s*\{/s) {
$_ = $'; $again = 1;
$level++;
my $type = $1;
if(defined($2)) {
$type .= " $2";
}
&$type_begin($type);
} elsif(/typedef\s+
((?:const\s+|CONST\s+|enum\s+|interface\s+|long\s+|signed\s+|short\s+|struct\s+|union\s+|unsigned\s+|volatile\s+)*?)
(\w+)
(?:\s+const|\s+volatile)?
((?:\s*(?:(?:FAR|__RPC_FAR|TW_HUGE)?\s*)?\*+\s*|\s+)(?:volatile\s+|DECLSPEC_ALIGN\(\d+\)\s+)?\w+\s*(?:\[[^\]]*\])*
(?:\s*,\s*(?:\s*(?:(?:FAR|__RPC_FAR|TW_HUGE)?\s*)?\*+\s*|\s+)\w+\s*(?:\[[^\]]*\])?)*)
2002-06-01 04:55:48 +02:00
\s*;/sx)
{
$_ = $'; $again = 1;
my $type = "$1 $2";
my @names;
my @parts = split(/\s*,\s*/, $2);
foreach my $part (@parts) {
if($part =~ /(?:\s*((?:(?:FAR|__RPC_FAR|TW_HUGE)?\s*)?\*+)\s*|\s+)(\w+)\s*(\[[^\]]*\])?/) {
my $name = $2;
if(defined($1)) {
$name = "$1$2";
}
if(defined($3)) {
$name .= $3;
}
push @names, $name;
}
}
&$type_begin($type);
&$type_end([@names]);
} elsif(/typedef\s+
(?:(?:const\s+|enum\s+|interface\s+|long\s+|signed\s+|short\s+|struct\s+|union\s+|unsigned\s+|volatile\s+)*?)
(\w+(?:\s*\*+\s*)?)\s*
(?:(\w+)\s*)?
\((?:(\w+)\s*)?\s*(?:\*\s*(\w+)|_ATL_CATMAPFUNC)\s*\)\s*
2002-06-01 04:55:48 +02:00
(?:\(([^\)]*)\)|\[([^\]]*)\])\s*;/sx)
{
2002-06-01 04:55:48 +02:00
$_ = $'; $again = 1;
my $type;
if(defined($2) || defined($3)) {
my $cc = $2 || $3;
if(defined($5)) {
$type = "$1 ($cc *)($5)";
} else {
$type = "$1 ($cc *)[$6]";
}
} else {
if(defined($5)) {
$type = "$1 (*)($5)";
} else {
$type = "$1 (*)[$6]";
}
}
my $name = $4;
&$type_begin($type);
&$type_end([$name]);
} elsif(/typedef[^\{;]*;/s) {
$_ = $'; $again = 1;
$output->write("$file: $.: could not parse typedef: '$&'\n");
} elsif(/typedef[^\{]*\{[^\}]*\}[^;];/s) {
$_ = $'; $again = 1;
$output->write("$file: $.: could not parse multi-line typedef: '$&'\n");
} elsif(/\'[^\']*\'/s) {
$_ = $'; $again = 1;
} elsif(/\"(?:[^\\\"]*|\\.)*\"/s) {
$_ = $'; $again = 1;
} elsif(/;/s) {
$_ = $'; $again = 1;
} elsif(/extern\s+"C"\s+{/s) {
$_ = $'; $again = 1;
} elsif(/\{/s) {
$_ = $'; $again = 1;
print "+1: $_\n" if $options->debug >= 2;
$level++;
} else {
$lookahead = 1;
}
}
close(IN);
print STDERR "done\n" if $options->verbose;
$output->write("$file: not at toplevel at end of file\n") unless $level == 0;
}
1;